Commit 3a1ff5ded9cf22e114991b5a49857b54f8e56b02
1 parent
a94a729f
Add option to preserve unreferenced objects
Showing
12 changed files
with
212 additions
and
1 deletions
ChangeLog
| 1 | +2017-07-28 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Add --preserve-unreferenced command-line option and | ||
| 4 | + setPreserveUnreferencedObjects method to QPDFWriter. This option | ||
| 5 | + causes QPDFWriter to write all objects from the input file to the | ||
| 6 | + output file regardless of whether the objects are referenced. | ||
| 7 | + Objects are written to the output file in numerical order from the | ||
| 8 | + input file. This option has no effect for linearized files. | ||
| 9 | + | ||
| 1 | 2017-07-27 Jay Berkenbilt <ejb@ql.org> | 10 | 2017-07-27 Jay Berkenbilt <ejb@ql.org> |
| 2 | 11 | ||
| 3 | * Add --precheck-streams command-line option and setStreamPrecheck | 12 | * Add --precheck-streams command-line option and setStreamPrecheck |
| 4 | - option to QPDFWriter to tell QPDFWriter to attempt decoding a | 13 | + method to QPDFWriter to tell QPDFWriter to attempt decoding a |
| 5 | stream fully before deciding whether to filter it or not. | 14 | stream fully before deciding whether to filter it or not. |
| 6 | 15 | ||
| 7 | * Recover gracefully from streams that aren't filterable because | 16 | * Recover gracefully from streams that aren't filterable because |
include/qpdf/QPDF.hh
| @@ -396,6 +396,12 @@ class QPDF | @@ -396,6 +396,12 @@ class QPDF | ||
| 396 | QPDF_DLL | 396 | QPDF_DLL |
| 397 | void showXRefTable(); | 397 | void showXRefTable(); |
| 398 | 398 | ||
| 399 | + // Returns a list of indirect objects for every object in the xref | ||
| 400 | + // table. Useful for discovering objects that are not otherwised | ||
| 401 | + // referenced. | ||
| 402 | + QPDF_DLL | ||
| 403 | + std::vector<QPDFObjectHandle> getAllObjects(); | ||
| 404 | + | ||
| 399 | // Optimization support -- see doc/optimization. Implemented in | 405 | // Optimization support -- see doc/optimization. Implemented in |
| 400 | // QPDF_optimization.cc | 406 | // QPDF_optimization.cc |
| 401 | 407 |
include/qpdf/QPDFWriter.hh
| @@ -155,6 +155,12 @@ class QPDFWriter | @@ -155,6 +155,12 @@ class QPDFWriter | ||
| 155 | QPDF_DLL | 155 | QPDF_DLL |
| 156 | void setPrecheckStreams(bool); | 156 | void setPrecheckStreams(bool); |
| 157 | 157 | ||
| 158 | + // Preserve unreferenced objects. The default behavior is to | ||
| 159 | + // discard any object that is not visited during a traversal of | ||
| 160 | + // the object structure from the trailer. | ||
| 161 | + QPDF_DLL | ||
| 162 | + void setPreserveUnreferencedObjects(bool); | ||
| 163 | + | ||
| 158 | // Set the minimum PDF version. If the PDF version of the input | 164 | // Set the minimum PDF version. If the PDF version of the input |
| 159 | // file (or previously set minimum version) is less than the | 165 | // file (or previously set minimum version) is less than the |
| 160 | // version passed to this method, the PDF version of the output | 166 | // version passed to this method, the PDF version of the output |
| @@ -427,6 +433,7 @@ class QPDFWriter | @@ -427,6 +433,7 @@ class QPDFWriter | ||
| 427 | qpdf_stream_data_e stream_data_mode; | 433 | qpdf_stream_data_e stream_data_mode; |
| 428 | bool qdf_mode; | 434 | bool qdf_mode; |
| 429 | bool precheck_streams; | 435 | bool precheck_streams; |
| 436 | + bool preserve_unreferenced_objects; | ||
| 430 | bool static_id; | 437 | bool static_id; |
| 431 | bool suppress_original_object_ids; | 438 | bool suppress_original_object_ids; |
| 432 | bool direct_stream_lengths; | 439 | bool direct_stream_lengths; |
libqpdf/QPDF.cc
| @@ -989,6 +989,22 @@ QPDF::showXRefTable() | @@ -989,6 +989,22 @@ QPDF::showXRefTable() | ||
| 989 | } | 989 | } |
| 990 | } | 990 | } |
| 991 | 991 | ||
| 992 | +std::vector<QPDFObjectHandle> | ||
| 993 | +QPDF::getAllObjects() | ||
| 994 | +{ | ||
| 995 | + std::vector<QPDFObjectHandle> result; | ||
| 996 | + for (std::map<QPDFObjGen, QPDFXRefEntry>::iterator iter = | ||
| 997 | + this->xref_table.begin(); | ||
| 998 | + iter != this->xref_table.end(); ++iter) | ||
| 999 | + { | ||
| 1000 | + | ||
| 1001 | + QPDFObjGen const& og = (*iter).first; | ||
| 1002 | + result.push_back(QPDFObjectHandle::Factory::newIndirect( | ||
| 1003 | + this, og.getObj(), og.getGen())); | ||
| 1004 | + } | ||
| 1005 | + return result; | ||
| 1006 | +} | ||
| 1007 | + | ||
| 992 | void | 1008 | void |
| 993 | QPDF::setLastObjectDescription(std::string const& description, | 1009 | QPDF::setLastObjectDescription(std::string const& description, |
| 994 | int objid, int generation) | 1010 | int objid, int generation) |
libqpdf/QPDFWriter.cc
| @@ -58,6 +58,7 @@ QPDFWriter::init() | @@ -58,6 +58,7 @@ QPDFWriter::init() | ||
| 58 | stream_data_mode = qpdf_s_compress; | 58 | stream_data_mode = qpdf_s_compress; |
| 59 | qdf_mode = false; | 59 | qdf_mode = false; |
| 60 | precheck_streams = false; | 60 | precheck_streams = false; |
| 61 | + preserve_unreferenced_objects = false; | ||
| 61 | static_id = false; | 62 | static_id = false; |
| 62 | suppress_original_object_ids = false; | 63 | suppress_original_object_ids = false; |
| 63 | direct_stream_lengths = true; | 64 | direct_stream_lengths = true; |
| @@ -184,6 +185,12 @@ QPDFWriter::setPrecheckStreams(bool val) | @@ -184,6 +185,12 @@ QPDFWriter::setPrecheckStreams(bool val) | ||
| 184 | } | 185 | } |
| 185 | 186 | ||
| 186 | void | 187 | void |
| 188 | +QPDFWriter::setPreserveUnreferencedObjects(bool val) | ||
| 189 | +{ | ||
| 190 | + this->preserve_unreferenced_objects = val; | ||
| 191 | +} | ||
| 192 | + | ||
| 193 | +void | ||
| 187 | QPDFWriter::setMinimumPDFVersion(std::string const& version) | 194 | QPDFWriter::setMinimumPDFVersion(std::string const& version) |
| 188 | { | 195 | { |
| 189 | setMinimumPDFVersion(version, 0); | 196 | setMinimumPDFVersion(version, 0); |
| @@ -3074,6 +3081,17 @@ QPDFWriter::writeStandard() | @@ -3074,6 +3081,17 @@ QPDFWriter::writeStandard() | ||
| 3074 | writeHeader(); | 3081 | writeHeader(); |
| 3075 | writeString(this->extra_header_text); | 3082 | writeString(this->extra_header_text); |
| 3076 | 3083 | ||
| 3084 | + if (this->preserve_unreferenced_objects) | ||
| 3085 | + { | ||
| 3086 | + QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard"); | ||
| 3087 | + std::vector<QPDFObjectHandle> all = this->pdf.getAllObjects(); | ||
| 3088 | + for (std::vector<QPDFObjectHandle>::iterator iter = all.begin(); | ||
| 3089 | + iter != all.end(); ++iter) | ||
| 3090 | + { | ||
| 3091 | + enqueueObject(*iter); | ||
| 3092 | + } | ||
| 3093 | + } | ||
| 3094 | + | ||
| 3077 | // Put root first on queue. | 3095 | // Put root first on queue. |
| 3078 | QPDFObjectHandle trailer = getTrimmedTrailer(); | 3096 | QPDFObjectHandle trailer = getTrimmedTrailer(); |
| 3079 | enqueueObject(trailer.getKey("/Root")); | 3097 | enqueueObject(trailer.getKey("/Root")); |
manual/qpdf-manual.xml
| @@ -839,6 +839,27 @@ outfile.pdf</option> | @@ -839,6 +839,27 @@ outfile.pdf</option> | ||
| 839 | </listitem> | 839 | </listitem> |
| 840 | </varlistentry> | 840 | </varlistentry> |
| 841 | <varlistentry> | 841 | <varlistentry> |
| 842 | + <term><option>--preserve-unreferenced</option></term> | ||
| 843 | + <listitem> | ||
| 844 | + <para> | ||
| 845 | + Tells qpdf to preserve objects that are not referenced when | ||
| 846 | + writing the file. Ordinarily any object that is not referenced | ||
| 847 | + in a traversal of the document from the trailer dictionary | ||
| 848 | + will be discarded. This may be useful in working with some | ||
| 849 | + damaged files or inspecting files with known unreferenced | ||
| 850 | + objects. | ||
| 851 | + </para> | ||
| 852 | + <para> | ||
| 853 | + This flag is ignored for linearized files and has the effect | ||
| 854 | + of causing objects in the new file to be written in order by | ||
| 855 | + object ID from the original file. This does not mean that | ||
| 856 | + object numbers will be the same since qpdf may create stream | ||
| 857 | + lengths as direct or indirect differently from the original | ||
| 858 | + file, and the original file may have gaps in its numbering. | ||
| 859 | + </para> | ||
| 860 | + </listitem> | ||
| 861 | + </varlistentry> | ||
| 862 | + <varlistentry> | ||
| 842 | <term><option>--qdf</option></term> | 863 | <term><option>--qdf</option></term> |
| 843 | <listitem> | 864 | <listitem> |
| 844 | <para> | 865 | <para> |
qpdf/qpdf.cc
| @@ -203,6 +203,7 @@ familiar with the PDF file format or who are PDF developers.\n\ | @@ -203,6 +203,7 @@ familiar with the PDF file format or who are PDF developers.\n\ | ||
| 203 | --object-streams=mode controls handing of object streams\n\ | 203 | --object-streams=mode controls handing of object streams\n\ |
| 204 | --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\ | 204 | --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\ |
| 205 | --precheck-streams precheck ability to decode streams\n\ | 205 | --precheck-streams precheck ability to decode streams\n\ |
| 206 | +--preserve-unreferenced preserve unreferenced objects\n\ | ||
| 206 | --qdf turns on \"QDF mode\" (below)\n\ | 207 | --qdf turns on \"QDF mode\" (below)\n\ |
| 207 | --min-version=version sets the minimum PDF version of the output file\n\ | 208 | --min-version=version sets the minimum PDF version of the output file\n\ |
| 208 | --force-version=version forces this to be the PDF version of the output file\n\ | 209 | --force-version=version forces this to be the PDF version of the output file\n\ |
| @@ -1030,6 +1031,7 @@ int main(int argc, char* argv[]) | @@ -1030,6 +1031,7 @@ int main(int argc, char* argv[]) | ||
| 1030 | bool ignore_xref_streams = false; | 1031 | bool ignore_xref_streams = false; |
| 1031 | bool qdf_mode = false; | 1032 | bool qdf_mode = false; |
| 1032 | bool precheck_streams = false; | 1033 | bool precheck_streams = false; |
| 1034 | + bool preserve_unreferenced_objects = false; | ||
| 1033 | std::string min_version; | 1035 | std::string min_version; |
| 1034 | std::string force_version; | 1036 | std::string force_version; |
| 1035 | 1037 | ||
| @@ -1219,6 +1221,10 @@ int main(int argc, char* argv[]) | @@ -1219,6 +1221,10 @@ int main(int argc, char* argv[]) | ||
| 1219 | { | 1221 | { |
| 1220 | precheck_streams = true; | 1222 | precheck_streams = true; |
| 1221 | } | 1223 | } |
| 1224 | + else if (strcmp(arg, "preserve-unreferenced") == 0) | ||
| 1225 | + { | ||
| 1226 | + preserve_unreferenced_objects = true; | ||
| 1227 | + } | ||
| 1222 | else if (strcmp(arg, "min-version") == 0) | 1228 | else if (strcmp(arg, "min-version") == 0) |
| 1223 | { | 1229 | { |
| 1224 | if (parameter == 0) | 1230 | if (parameter == 0) |
| @@ -1714,6 +1720,10 @@ int main(int argc, char* argv[]) | @@ -1714,6 +1720,10 @@ int main(int argc, char* argv[]) | ||
| 1714 | { | 1720 | { |
| 1715 | w.setPrecheckStreams(true); | 1721 | w.setPrecheckStreams(true); |
| 1716 | } | 1722 | } |
| 1723 | + if (preserve_unreferenced_objects) | ||
| 1724 | + { | ||
| 1725 | + w.setPreserveUnreferencedObjects(true); | ||
| 1726 | + } | ||
| 1717 | if (normalize_set) | 1727 | if (normalize_set) |
| 1718 | { | 1728 | { |
| 1719 | w.setContentNormalization(normalize); | 1729 | w.setContentNormalization(normalize); |
qpdf/qpdf.testcov
| @@ -280,3 +280,4 @@ QPDFObjectHandle found fake 1 | @@ -280,3 +280,4 @@ QPDFObjectHandle found fake 1 | ||
| 280 | QPDFObjectHandle no val for last key 0 | 280 | QPDFObjectHandle no val for last key 0 |
| 281 | QPDF resolve failure to null 0 | 281 | QPDF resolve failure to null 0 |
| 282 | QPDFWriter precheck stream 0 | 282 | QPDFWriter precheck stream 0 |
| 283 | +QPDFWriter preserve unreferenced standard 0 |
qpdf/qtest/qpdf.test
| @@ -743,6 +743,24 @@ $td->runtest("check output", | @@ -743,6 +743,24 @@ $td->runtest("check output", | ||
| 743 | {$td->FILE => "bad-data-precheck.pdf"}); | 743 | {$td->FILE => "bad-data-precheck.pdf"}); |
| 744 | show_ntests(); | 744 | show_ntests(); |
| 745 | # ---------- | 745 | # ---------- |
| 746 | +$td->notify("--- Preserve unreferenced objects ---"); | ||
| 747 | +$n_tests += 4; | ||
| 748 | + | ||
| 749 | +$td->runtest("drop unused objects", | ||
| 750 | + {$td->COMMAND => "qpdf --static-id unreferenced-objects.pdf a.pdf"}, | ||
| 751 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | ||
| 752 | +$td->runtest("check output", | ||
| 753 | + {$td->FILE => "a.pdf"}, | ||
| 754 | + {$td->FILE => "unreferenced-dropped.pdf"}); | ||
| 755 | +$td->runtest("keep unused objects", | ||
| 756 | + {$td->COMMAND => "qpdf --static-id --preserve-unreferenced" . | ||
| 757 | + " unreferenced-objects.pdf a.pdf"}, | ||
| 758 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | ||
| 759 | +$td->runtest("check output", | ||
| 760 | + {$td->FILE => "a.pdf"}, | ||
| 761 | + {$td->FILE => "unreferenced-preserved.pdf"}); | ||
| 762 | +show_ntests(); | ||
| 763 | +# ---------- | ||
| 746 | $td->notify("--- Copy Foreign Objects ---"); | 764 | $td->notify("--- Copy Foreign Objects ---"); |
| 747 | $n_tests += 7; | 765 | $n_tests += 7; |
| 748 | 766 |
qpdf/qtest/qpdf/unreferenced-dropped.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/unreferenced-objects.pdf
0 → 100644
| 1 | +%PDF-1.3 | ||
| 2 | +%¿÷¢þ | ||
| 3 | +%QDF-1.0 | ||
| 4 | + | ||
| 5 | +2 0 obj | ||
| 6 | +<< | ||
| 7 | + /Pages 1 0 R | ||
| 8 | + /Type /Catalog | ||
| 9 | +>> | ||
| 10 | +endobj | ||
| 11 | + | ||
| 12 | +1 0 obj | ||
| 13 | +<< | ||
| 14 | + /Count 1 | ||
| 15 | + /Kids [ | ||
| 16 | + 3 0 R | ||
| 17 | + ] | ||
| 18 | + /Type /Pages | ||
| 19 | +>> | ||
| 20 | +endobj | ||
| 21 | + | ||
| 22 | +%% Page 1 | ||
| 23 | +3 0 obj | ||
| 24 | +<< | ||
| 25 | + /Contents 4 0 R | ||
| 26 | + /MediaBox [ | ||
| 27 | + 0 | ||
| 28 | + 0 | ||
| 29 | + 612 | ||
| 30 | + 792 | ||
| 31 | + ] | ||
| 32 | + /Parent 1 0 R | ||
| 33 | + /Resources << | ||
| 34 | + /Font << | ||
| 35 | + /F1 6 0 R | ||
| 36 | + >> | ||
| 37 | + /ProcSet 9 0 R | ||
| 38 | + >> | ||
| 39 | + /Type /Page | ||
| 40 | +>> | ||
| 41 | +endobj | ||
| 42 | + | ||
| 43 | +%% Contents for page 1 | ||
| 44 | +4 0 obj | ||
| 45 | +<< | ||
| 46 | + /Length 5 0 R | ||
| 47 | +>> | ||
| 48 | +stream | ||
| 49 | +BT | ||
| 50 | + /F1 24 Tf | ||
| 51 | + 72 720 Td | ||
| 52 | + (Potato) Tj | ||
| 53 | +ET | ||
| 54 | +endstream | ||
| 55 | +endobj | ||
| 56 | + | ||
| 57 | +5 0 obj | ||
| 58 | +44 | ||
| 59 | +endobj | ||
| 60 | + | ||
| 61 | +6 0 obj | ||
| 62 | +<< | ||
| 63 | + /BaseFont /Helvetica | ||
| 64 | + /Encoding /WinAnsiEncoding | ||
| 65 | + /Name /F1 | ||
| 66 | + /Subtype /Type1 | ||
| 67 | + /Type /Font | ||
| 68 | +>> | ||
| 69 | +endobj | ||
| 70 | + | ||
| 71 | +7 0 obj | ||
| 72 | +[ 8 0 R ] | ||
| 73 | +endobj | ||
| 74 | + | ||
| 75 | +8 0 obj | ||
| 76 | +/Potato | ||
| 77 | +endobj | ||
| 78 | + | ||
| 79 | +9 0 obj | ||
| 80 | +[ | ||
| 81 | |||
| 82 | + /Text | ||
| 83 | +] | ||
| 84 | +endobj | ||
| 85 | + | ||
| 86 | +xref | ||
| 87 | +0 10 | ||
| 88 | +0000000000 65535 f | ||
| 89 | +0000000079 00000 n | ||
| 90 | +0000000025 00000 n | ||
| 91 | +0000000161 00000 n | ||
| 92 | +0000000376 00000 n | ||
| 93 | +0000000475 00000 n | ||
| 94 | +0000000494 00000 n | ||
| 95 | +0000000612 00000 n | ||
| 96 | +0000000638 00000 n | ||
| 97 | +0000000662 00000 n | ||
| 98 | +trailer << | ||
| 99 | + /Root 2 0 R | ||
| 100 | + /Size 10 | ||
| 101 | + /ID [<5c2381b459937c988290150df782f1fd><5c2381b459937c988290150df782f1fd>] | ||
| 102 | +>> | ||
| 103 | +startxref | ||
| 104 | +697 | ||
| 105 | +%%EOF |
qpdf/qtest/qpdf/unreferenced-preserved.pdf
0 → 100644
No preview for this file type