Commit 3a1ff5ded9cf22e114991b5a49857b54f8e56b02
1 parent
a94a729f
Add option to preserve unreferenced objects
Showing
12 changed files
with
212 additions
and
1 deletions
ChangeLog
| 1 | +2017-07-28 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Add --preserve-unreferenced command-line option and | |
| 4 | + setPreserveUnreferencedObjects method to QPDFWriter. This option | |
| 5 | + causes QPDFWriter to write all objects from the input file to the | |
| 6 | + output file regardless of whether the objects are referenced. | |
| 7 | + Objects are written to the output file in numerical order from the | |
| 8 | + input file. This option has no effect for linearized files. | |
| 9 | + | |
| 1 | 10 | 2017-07-27 Jay Berkenbilt <ejb@ql.org> |
| 2 | 11 | |
| 3 | 12 | * Add --precheck-streams command-line option and setStreamPrecheck |
| 4 | - option to QPDFWriter to tell QPDFWriter to attempt decoding a | |
| 13 | + method to QPDFWriter to tell QPDFWriter to attempt decoding a | |
| 5 | 14 | stream fully before deciding whether to filter it or not. |
| 6 | 15 | |
| 7 | 16 | * Recover gracefully from streams that aren't filterable because | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -396,6 +396,12 @@ class QPDF |
| 396 | 396 | QPDF_DLL |
| 397 | 397 | void showXRefTable(); |
| 398 | 398 | |
| 399 | + // Returns a list of indirect objects for every object in the xref | |
| 400 | + // table. Useful for discovering objects that are not otherwised | |
| 401 | + // referenced. | |
| 402 | + QPDF_DLL | |
| 403 | + std::vector<QPDFObjectHandle> getAllObjects(); | |
| 404 | + | |
| 399 | 405 | // Optimization support -- see doc/optimization. Implemented in |
| 400 | 406 | // QPDF_optimization.cc |
| 401 | 407 | ... | ... |
include/qpdf/QPDFWriter.hh
| ... | ... | @@ -155,6 +155,12 @@ class QPDFWriter |
| 155 | 155 | QPDF_DLL |
| 156 | 156 | void setPrecheckStreams(bool); |
| 157 | 157 | |
| 158 | + // Preserve unreferenced objects. The default behavior is to | |
| 159 | + // discard any object that is not visited during a traversal of | |
| 160 | + // the object structure from the trailer. | |
| 161 | + QPDF_DLL | |
| 162 | + void setPreserveUnreferencedObjects(bool); | |
| 163 | + | |
| 158 | 164 | // Set the minimum PDF version. If the PDF version of the input |
| 159 | 165 | // file (or previously set minimum version) is less than the |
| 160 | 166 | // version passed to this method, the PDF version of the output |
| ... | ... | @@ -427,6 +433,7 @@ class QPDFWriter |
| 427 | 433 | qpdf_stream_data_e stream_data_mode; |
| 428 | 434 | bool qdf_mode; |
| 429 | 435 | bool precheck_streams; |
| 436 | + bool preserve_unreferenced_objects; | |
| 430 | 437 | bool static_id; |
| 431 | 438 | bool suppress_original_object_ids; |
| 432 | 439 | bool direct_stream_lengths; | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -989,6 +989,22 @@ QPDF::showXRefTable() |
| 989 | 989 | } |
| 990 | 990 | } |
| 991 | 991 | |
| 992 | +std::vector<QPDFObjectHandle> | |
| 993 | +QPDF::getAllObjects() | |
| 994 | +{ | |
| 995 | + std::vector<QPDFObjectHandle> result; | |
| 996 | + for (std::map<QPDFObjGen, QPDFXRefEntry>::iterator iter = | |
| 997 | + this->xref_table.begin(); | |
| 998 | + iter != this->xref_table.end(); ++iter) | |
| 999 | + { | |
| 1000 | + | |
| 1001 | + QPDFObjGen const& og = (*iter).first; | |
| 1002 | + result.push_back(QPDFObjectHandle::Factory::newIndirect( | |
| 1003 | + this, og.getObj(), og.getGen())); | |
| 1004 | + } | |
| 1005 | + return result; | |
| 1006 | +} | |
| 1007 | + | |
| 992 | 1008 | void |
| 993 | 1009 | QPDF::setLastObjectDescription(std::string const& description, |
| 994 | 1010 | int objid, int generation) | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -58,6 +58,7 @@ QPDFWriter::init() |
| 58 | 58 | stream_data_mode = qpdf_s_compress; |
| 59 | 59 | qdf_mode = false; |
| 60 | 60 | precheck_streams = false; |
| 61 | + preserve_unreferenced_objects = false; | |
| 61 | 62 | static_id = false; |
| 62 | 63 | suppress_original_object_ids = false; |
| 63 | 64 | direct_stream_lengths = true; |
| ... | ... | @@ -184,6 +185,12 @@ QPDFWriter::setPrecheckStreams(bool val) |
| 184 | 185 | } |
| 185 | 186 | |
| 186 | 187 | void |
| 188 | +QPDFWriter::setPreserveUnreferencedObjects(bool val) | |
| 189 | +{ | |
| 190 | + this->preserve_unreferenced_objects = val; | |
| 191 | +} | |
| 192 | + | |
| 193 | +void | |
| 187 | 194 | QPDFWriter::setMinimumPDFVersion(std::string const& version) |
| 188 | 195 | { |
| 189 | 196 | setMinimumPDFVersion(version, 0); |
| ... | ... | @@ -3074,6 +3081,17 @@ QPDFWriter::writeStandard() |
| 3074 | 3081 | writeHeader(); |
| 3075 | 3082 | writeString(this->extra_header_text); |
| 3076 | 3083 | |
| 3084 | + if (this->preserve_unreferenced_objects) | |
| 3085 | + { | |
| 3086 | + QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard"); | |
| 3087 | + std::vector<QPDFObjectHandle> all = this->pdf.getAllObjects(); | |
| 3088 | + for (std::vector<QPDFObjectHandle>::iterator iter = all.begin(); | |
| 3089 | + iter != all.end(); ++iter) | |
| 3090 | + { | |
| 3091 | + enqueueObject(*iter); | |
| 3092 | + } | |
| 3093 | + } | |
| 3094 | + | |
| 3077 | 3095 | // Put root first on queue. |
| 3078 | 3096 | QPDFObjectHandle trailer = getTrimmedTrailer(); |
| 3079 | 3097 | enqueueObject(trailer.getKey("/Root")); | ... | ... |
manual/qpdf-manual.xml
| ... | ... | @@ -839,6 +839,27 @@ outfile.pdf</option> |
| 839 | 839 | </listitem> |
| 840 | 840 | </varlistentry> |
| 841 | 841 | <varlistentry> |
| 842 | + <term><option>--preserve-unreferenced</option></term> | |
| 843 | + <listitem> | |
| 844 | + <para> | |
| 845 | + Tells qpdf to preserve objects that are not referenced when | |
| 846 | + writing the file. Ordinarily any object that is not referenced | |
| 847 | + in a traversal of the document from the trailer dictionary | |
| 848 | + will be discarded. This may be useful in working with some | |
| 849 | + damaged files or inspecting files with known unreferenced | |
| 850 | + objects. | |
| 851 | + </para> | |
| 852 | + <para> | |
| 853 | + This flag is ignored for linearized files and has the effect | |
| 854 | + of causing objects in the new file to be written in order by | |
| 855 | + object ID from the original file. This does not mean that | |
| 856 | + object numbers will be the same since qpdf may create stream | |
| 857 | + lengths as direct or indirect differently from the original | |
| 858 | + file, and the original file may have gaps in its numbering. | |
| 859 | + </para> | |
| 860 | + </listitem> | |
| 861 | + </varlistentry> | |
| 862 | + <varlistentry> | |
| 842 | 863 | <term><option>--qdf</option></term> |
| 843 | 864 | <listitem> |
| 844 | 865 | <para> | ... | ... |
qpdf/qpdf.cc
| ... | ... | @@ -203,6 +203,7 @@ familiar with the PDF file format or who are PDF developers.\n\ |
| 203 | 203 | --object-streams=mode controls handing of object streams\n\ |
| 204 | 204 | --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\ |
| 205 | 205 | --precheck-streams precheck ability to decode streams\n\ |
| 206 | +--preserve-unreferenced preserve unreferenced objects\n\ | |
| 206 | 207 | --qdf turns on \"QDF mode\" (below)\n\ |
| 207 | 208 | --min-version=version sets the minimum PDF version of the output file\n\ |
| 208 | 209 | --force-version=version forces this to be the PDF version of the output file\n\ |
| ... | ... | @@ -1030,6 +1031,7 @@ int main(int argc, char* argv[]) |
| 1030 | 1031 | bool ignore_xref_streams = false; |
| 1031 | 1032 | bool qdf_mode = false; |
| 1032 | 1033 | bool precheck_streams = false; |
| 1034 | + bool preserve_unreferenced_objects = false; | |
| 1033 | 1035 | std::string min_version; |
| 1034 | 1036 | std::string force_version; |
| 1035 | 1037 | |
| ... | ... | @@ -1219,6 +1221,10 @@ int main(int argc, char* argv[]) |
| 1219 | 1221 | { |
| 1220 | 1222 | precheck_streams = true; |
| 1221 | 1223 | } |
| 1224 | + else if (strcmp(arg, "preserve-unreferenced") == 0) | |
| 1225 | + { | |
| 1226 | + preserve_unreferenced_objects = true; | |
| 1227 | + } | |
| 1222 | 1228 | else if (strcmp(arg, "min-version") == 0) |
| 1223 | 1229 | { |
| 1224 | 1230 | if (parameter == 0) |
| ... | ... | @@ -1714,6 +1720,10 @@ int main(int argc, char* argv[]) |
| 1714 | 1720 | { |
| 1715 | 1721 | w.setPrecheckStreams(true); |
| 1716 | 1722 | } |
| 1723 | + if (preserve_unreferenced_objects) | |
| 1724 | + { | |
| 1725 | + w.setPreserveUnreferencedObjects(true); | |
| 1726 | + } | |
| 1717 | 1727 | if (normalize_set) |
| 1718 | 1728 | { |
| 1719 | 1729 | w.setContentNormalization(normalize); | ... | ... |
qpdf/qpdf.testcov
qpdf/qtest/qpdf.test
| ... | ... | @@ -743,6 +743,24 @@ $td->runtest("check output", |
| 743 | 743 | {$td->FILE => "bad-data-precheck.pdf"}); |
| 744 | 744 | show_ntests(); |
| 745 | 745 | # ---------- |
| 746 | +$td->notify("--- Preserve unreferenced objects ---"); | |
| 747 | +$n_tests += 4; | |
| 748 | + | |
| 749 | +$td->runtest("drop unused objects", | |
| 750 | + {$td->COMMAND => "qpdf --static-id unreferenced-objects.pdf a.pdf"}, | |
| 751 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 752 | +$td->runtest("check output", | |
| 753 | + {$td->FILE => "a.pdf"}, | |
| 754 | + {$td->FILE => "unreferenced-dropped.pdf"}); | |
| 755 | +$td->runtest("keep unused objects", | |
| 756 | + {$td->COMMAND => "qpdf --static-id --preserve-unreferenced" . | |
| 757 | + " unreferenced-objects.pdf a.pdf"}, | |
| 758 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 759 | +$td->runtest("check output", | |
| 760 | + {$td->FILE => "a.pdf"}, | |
| 761 | + {$td->FILE => "unreferenced-preserved.pdf"}); | |
| 762 | +show_ntests(); | |
| 763 | +# ---------- | |
| 746 | 764 | $td->notify("--- Copy Foreign Objects ---"); |
| 747 | 765 | $n_tests += 7; |
| 748 | 766 | ... | ... |
qpdf/qtest/qpdf/unreferenced-dropped.pdf
0 → 100644
No preview for this file type
qpdf/qtest/qpdf/unreferenced-objects.pdf
0 → 100644
| 1 | +%PDF-1.3 | |
| 2 | +%¿÷¢þ | |
| 3 | +%QDF-1.0 | |
| 4 | + | |
| 5 | +2 0 obj | |
| 6 | +<< | |
| 7 | + /Pages 1 0 R | |
| 8 | + /Type /Catalog | |
| 9 | +>> | |
| 10 | +endobj | |
| 11 | + | |
| 12 | +1 0 obj | |
| 13 | +<< | |
| 14 | + /Count 1 | |
| 15 | + /Kids [ | |
| 16 | + 3 0 R | |
| 17 | + ] | |
| 18 | + /Type /Pages | |
| 19 | +>> | |
| 20 | +endobj | |
| 21 | + | |
| 22 | +%% Page 1 | |
| 23 | +3 0 obj | |
| 24 | +<< | |
| 25 | + /Contents 4 0 R | |
| 26 | + /MediaBox [ | |
| 27 | + 0 | |
| 28 | + 0 | |
| 29 | + 612 | |
| 30 | + 792 | |
| 31 | + ] | |
| 32 | + /Parent 1 0 R | |
| 33 | + /Resources << | |
| 34 | + /Font << | |
| 35 | + /F1 6 0 R | |
| 36 | + >> | |
| 37 | + /ProcSet 9 0 R | |
| 38 | + >> | |
| 39 | + /Type /Page | |
| 40 | +>> | |
| 41 | +endobj | |
| 42 | + | |
| 43 | +%% Contents for page 1 | |
| 44 | +4 0 obj | |
| 45 | +<< | |
| 46 | + /Length 5 0 R | |
| 47 | +>> | |
| 48 | +stream | |
| 49 | +BT | |
| 50 | + /F1 24 Tf | |
| 51 | + 72 720 Td | |
| 52 | + (Potato) Tj | |
| 53 | +ET | |
| 54 | +endstream | |
| 55 | +endobj | |
| 56 | + | |
| 57 | +5 0 obj | |
| 58 | +44 | |
| 59 | +endobj | |
| 60 | + | |
| 61 | +6 0 obj | |
| 62 | +<< | |
| 63 | + /BaseFont /Helvetica | |
| 64 | + /Encoding /WinAnsiEncoding | |
| 65 | + /Name /F1 | |
| 66 | + /Subtype /Type1 | |
| 67 | + /Type /Font | |
| 68 | +>> | |
| 69 | +endobj | |
| 70 | + | |
| 71 | +7 0 obj | |
| 72 | +[ 8 0 R ] | |
| 73 | +endobj | |
| 74 | + | |
| 75 | +8 0 obj | |
| 76 | +/Potato | |
| 77 | +endobj | |
| 78 | + | |
| 79 | +9 0 obj | |
| 80 | +[ | |
| 81 | ||
| 82 | + /Text | |
| 83 | +] | |
| 84 | +endobj | |
| 85 | + | |
| 86 | +xref | |
| 87 | +0 10 | |
| 88 | +0000000000 65535 f | |
| 89 | +0000000079 00000 n | |
| 90 | +0000000025 00000 n | |
| 91 | +0000000161 00000 n | |
| 92 | +0000000376 00000 n | |
| 93 | +0000000475 00000 n | |
| 94 | +0000000494 00000 n | |
| 95 | +0000000612 00000 n | |
| 96 | +0000000638 00000 n | |
| 97 | +0000000662 00000 n | |
| 98 | +trailer << | |
| 99 | + /Root 2 0 R | |
| 100 | + /Size 10 | |
| 101 | + /ID [<5c2381b459937c988290150df782f1fd><5c2381b459937c988290150df782f1fd>] | |
| 102 | +>> | |
| 103 | +startxref | |
| 104 | +697 | |
| 105 | +%%EOF | ... | ... |
qpdf/qtest/qpdf/unreferenced-preserved.pdf
0 → 100644
No preview for this file type