Commit 3a1ff5ded9cf22e114991b5a49857b54f8e56b02

Authored by Jay Berkenbilt
1 parent a94a729f

Add option to preserve unreferenced objects

ChangeLog
  1 +2017-07-28 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add --preserve-unreferenced command-line option and
  4 + setPreserveUnreferencedObjects method to QPDFWriter. This option
  5 + causes QPDFWriter to write all objects from the input file to the
  6 + output file regardless of whether the objects are referenced.
  7 + Objects are written to the output file in numerical order from the
  8 + input file. This option has no effect for linearized files.
  9 +
1 2017-07-27 Jay Berkenbilt <ejb@ql.org> 10 2017-07-27 Jay Berkenbilt <ejb@ql.org>
2 11
3 * Add --precheck-streams command-line option and setStreamPrecheck 12 * Add --precheck-streams command-line option and setStreamPrecheck
4 - option to QPDFWriter to tell QPDFWriter to attempt decoding a 13 + method to QPDFWriter to tell QPDFWriter to attempt decoding a
5 stream fully before deciding whether to filter it or not. 14 stream fully before deciding whether to filter it or not.
6 15
7 * Recover gracefully from streams that aren't filterable because 16 * Recover gracefully from streams that aren't filterable because
include/qpdf/QPDF.hh
@@ -396,6 +396,12 @@ class QPDF @@ -396,6 +396,12 @@ class QPDF
396 QPDF_DLL 396 QPDF_DLL
397 void showXRefTable(); 397 void showXRefTable();
398 398
  399 + // Returns a list of indirect objects for every object in the xref
  400 + // table. Useful for discovering objects that are not otherwised
  401 + // referenced.
  402 + QPDF_DLL
  403 + std::vector<QPDFObjectHandle> getAllObjects();
  404 +
399 // Optimization support -- see doc/optimization. Implemented in 405 // Optimization support -- see doc/optimization. Implemented in
400 // QPDF_optimization.cc 406 // QPDF_optimization.cc
401 407
include/qpdf/QPDFWriter.hh
@@ -155,6 +155,12 @@ class QPDFWriter @@ -155,6 +155,12 @@ class QPDFWriter
155 QPDF_DLL 155 QPDF_DLL
156 void setPrecheckStreams(bool); 156 void setPrecheckStreams(bool);
157 157
  158 + // Preserve unreferenced objects. The default behavior is to
  159 + // discard any object that is not visited during a traversal of
  160 + // the object structure from the trailer.
  161 + QPDF_DLL
  162 + void setPreserveUnreferencedObjects(bool);
  163 +
158 // Set the minimum PDF version. If the PDF version of the input 164 // Set the minimum PDF version. If the PDF version of the input
159 // file (or previously set minimum version) is less than the 165 // file (or previously set minimum version) is less than the
160 // version passed to this method, the PDF version of the output 166 // version passed to this method, the PDF version of the output
@@ -427,6 +433,7 @@ class QPDFWriter @@ -427,6 +433,7 @@ class QPDFWriter
427 qpdf_stream_data_e stream_data_mode; 433 qpdf_stream_data_e stream_data_mode;
428 bool qdf_mode; 434 bool qdf_mode;
429 bool precheck_streams; 435 bool precheck_streams;
  436 + bool preserve_unreferenced_objects;
430 bool static_id; 437 bool static_id;
431 bool suppress_original_object_ids; 438 bool suppress_original_object_ids;
432 bool direct_stream_lengths; 439 bool direct_stream_lengths;
libqpdf/QPDF.cc
@@ -989,6 +989,22 @@ QPDF::showXRefTable() @@ -989,6 +989,22 @@ QPDF::showXRefTable()
989 } 989 }
990 } 990 }
991 991
  992 +std::vector<QPDFObjectHandle>
  993 +QPDF::getAllObjects()
  994 +{
  995 + std::vector<QPDFObjectHandle> result;
  996 + for (std::map<QPDFObjGen, QPDFXRefEntry>::iterator iter =
  997 + this->xref_table.begin();
  998 + iter != this->xref_table.end(); ++iter)
  999 + {
  1000 +
  1001 + QPDFObjGen const& og = (*iter).first;
  1002 + result.push_back(QPDFObjectHandle::Factory::newIndirect(
  1003 + this, og.getObj(), og.getGen()));
  1004 + }
  1005 + return result;
  1006 +}
  1007 +
992 void 1008 void
993 QPDF::setLastObjectDescription(std::string const& description, 1009 QPDF::setLastObjectDescription(std::string const& description,
994 int objid, int generation) 1010 int objid, int generation)
libqpdf/QPDFWriter.cc
@@ -58,6 +58,7 @@ QPDFWriter::init() @@ -58,6 +58,7 @@ QPDFWriter::init()
58 stream_data_mode = qpdf_s_compress; 58 stream_data_mode = qpdf_s_compress;
59 qdf_mode = false; 59 qdf_mode = false;
60 precheck_streams = false; 60 precheck_streams = false;
  61 + preserve_unreferenced_objects = false;
61 static_id = false; 62 static_id = false;
62 suppress_original_object_ids = false; 63 suppress_original_object_ids = false;
63 direct_stream_lengths = true; 64 direct_stream_lengths = true;
@@ -184,6 +185,12 @@ QPDFWriter::setPrecheckStreams(bool val) @@ -184,6 +185,12 @@ QPDFWriter::setPrecheckStreams(bool val)
184 } 185 }
185 186
186 void 187 void
  188 +QPDFWriter::setPreserveUnreferencedObjects(bool val)
  189 +{
  190 + this->preserve_unreferenced_objects = val;
  191 +}
  192 +
  193 +void
187 QPDFWriter::setMinimumPDFVersion(std::string const& version) 194 QPDFWriter::setMinimumPDFVersion(std::string const& version)
188 { 195 {
189 setMinimumPDFVersion(version, 0); 196 setMinimumPDFVersion(version, 0);
@@ -3074,6 +3081,17 @@ QPDFWriter::writeStandard() @@ -3074,6 +3081,17 @@ QPDFWriter::writeStandard()
3074 writeHeader(); 3081 writeHeader();
3075 writeString(this->extra_header_text); 3082 writeString(this->extra_header_text);
3076 3083
  3084 + if (this->preserve_unreferenced_objects)
  3085 + {
  3086 + QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
  3087 + std::vector<QPDFObjectHandle> all = this->pdf.getAllObjects();
  3088 + for (std::vector<QPDFObjectHandle>::iterator iter = all.begin();
  3089 + iter != all.end(); ++iter)
  3090 + {
  3091 + enqueueObject(*iter);
  3092 + }
  3093 + }
  3094 +
3077 // Put root first on queue. 3095 // Put root first on queue.
3078 QPDFObjectHandle trailer = getTrimmedTrailer(); 3096 QPDFObjectHandle trailer = getTrimmedTrailer();
3079 enqueueObject(trailer.getKey("/Root")); 3097 enqueueObject(trailer.getKey("/Root"));
manual/qpdf-manual.xml
@@ -839,6 +839,27 @@ outfile.pdf&lt;/option&gt; @@ -839,6 +839,27 @@ outfile.pdf&lt;/option&gt;
839 </listitem> 839 </listitem>
840 </varlistentry> 840 </varlistentry>
841 <varlistentry> 841 <varlistentry>
  842 + <term><option>--preserve-unreferenced</option></term>
  843 + <listitem>
  844 + <para>
  845 + Tells qpdf to preserve objects that are not referenced when
  846 + writing the file. Ordinarily any object that is not referenced
  847 + in a traversal of the document from the trailer dictionary
  848 + will be discarded. This may be useful in working with some
  849 + damaged files or inspecting files with known unreferenced
  850 + objects.
  851 + </para>
  852 + <para>
  853 + This flag is ignored for linearized files and has the effect
  854 + of causing objects in the new file to be written in order by
  855 + object ID from the original file. This does not mean that
  856 + object numbers will be the same since qpdf may create stream
  857 + lengths as direct or indirect differently from the original
  858 + file, and the original file may have gaps in its numbering.
  859 + </para>
  860 + </listitem>
  861 + </varlistentry>
  862 + <varlistentry>
842 <term><option>--qdf</option></term> 863 <term><option>--qdf</option></term>
843 <listitem> 864 <listitem>
844 <para> 865 <para>
qpdf/qpdf.cc
@@ -203,6 +203,7 @@ familiar with the PDF file format or who are PDF developers.\n\ @@ -203,6 +203,7 @@ familiar with the PDF file format or who are PDF developers.\n\
203 --object-streams=mode controls handing of object streams\n\ 203 --object-streams=mode controls handing of object streams\n\
204 --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\ 204 --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
205 --precheck-streams precheck ability to decode streams\n\ 205 --precheck-streams precheck ability to decode streams\n\
  206 +--preserve-unreferenced preserve unreferenced objects\n\
206 --qdf turns on \"QDF mode\" (below)\n\ 207 --qdf turns on \"QDF mode\" (below)\n\
207 --min-version=version sets the minimum PDF version of the output file\n\ 208 --min-version=version sets the minimum PDF version of the output file\n\
208 --force-version=version forces this to be the PDF version of the output file\n\ 209 --force-version=version forces this to be the PDF version of the output file\n\
@@ -1030,6 +1031,7 @@ int main(int argc, char* argv[]) @@ -1030,6 +1031,7 @@ int main(int argc, char* argv[])
1030 bool ignore_xref_streams = false; 1031 bool ignore_xref_streams = false;
1031 bool qdf_mode = false; 1032 bool qdf_mode = false;
1032 bool precheck_streams = false; 1033 bool precheck_streams = false;
  1034 + bool preserve_unreferenced_objects = false;
1033 std::string min_version; 1035 std::string min_version;
1034 std::string force_version; 1036 std::string force_version;
1035 1037
@@ -1219,6 +1221,10 @@ int main(int argc, char* argv[]) @@ -1219,6 +1221,10 @@ int main(int argc, char* argv[])
1219 { 1221 {
1220 precheck_streams = true; 1222 precheck_streams = true;
1221 } 1223 }
  1224 + else if (strcmp(arg, "preserve-unreferenced") == 0)
  1225 + {
  1226 + preserve_unreferenced_objects = true;
  1227 + }
1222 else if (strcmp(arg, "min-version") == 0) 1228 else if (strcmp(arg, "min-version") == 0)
1223 { 1229 {
1224 if (parameter == 0) 1230 if (parameter == 0)
@@ -1714,6 +1720,10 @@ int main(int argc, char* argv[]) @@ -1714,6 +1720,10 @@ int main(int argc, char* argv[])
1714 { 1720 {
1715 w.setPrecheckStreams(true); 1721 w.setPrecheckStreams(true);
1716 } 1722 }
  1723 + if (preserve_unreferenced_objects)
  1724 + {
  1725 + w.setPreserveUnreferencedObjects(true);
  1726 + }
1717 if (normalize_set) 1727 if (normalize_set)
1718 { 1728 {
1719 w.setContentNormalization(normalize); 1729 w.setContentNormalization(normalize);
qpdf/qpdf.testcov
@@ -280,3 +280,4 @@ QPDFObjectHandle found fake 1 @@ -280,3 +280,4 @@ QPDFObjectHandle found fake 1
280 QPDFObjectHandle no val for last key 0 280 QPDFObjectHandle no val for last key 0
281 QPDF resolve failure to null 0 281 QPDF resolve failure to null 0
282 QPDFWriter precheck stream 0 282 QPDFWriter precheck stream 0
  283 +QPDFWriter preserve unreferenced standard 0
qpdf/qtest/qpdf.test
@@ -743,6 +743,24 @@ $td-&gt;runtest(&quot;check output&quot;, @@ -743,6 +743,24 @@ $td-&gt;runtest(&quot;check output&quot;,
743 {$td->FILE => "bad-data-precheck.pdf"}); 743 {$td->FILE => "bad-data-precheck.pdf"});
744 show_ntests(); 744 show_ntests();
745 # ---------- 745 # ----------
  746 +$td->notify("--- Preserve unreferenced objects ---");
  747 +$n_tests += 4;
  748 +
  749 +$td->runtest("drop unused objects",
  750 + {$td->COMMAND => "qpdf --static-id unreferenced-objects.pdf a.pdf"},
  751 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  752 +$td->runtest("check output",
  753 + {$td->FILE => "a.pdf"},
  754 + {$td->FILE => "unreferenced-dropped.pdf"});
  755 +$td->runtest("keep unused objects",
  756 + {$td->COMMAND => "qpdf --static-id --preserve-unreferenced" .
  757 + " unreferenced-objects.pdf a.pdf"},
  758 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  759 +$td->runtest("check output",
  760 + {$td->FILE => "a.pdf"},
  761 + {$td->FILE => "unreferenced-preserved.pdf"});
  762 +show_ntests();
  763 +# ----------
746 $td->notify("--- Copy Foreign Objects ---"); 764 $td->notify("--- Copy Foreign Objects ---");
747 $n_tests += 7; 765 $n_tests += 7;
748 766
qpdf/qtest/qpdf/unreferenced-dropped.pdf 0 → 100644
No preview for this file type
qpdf/qtest/qpdf/unreferenced-objects.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +%QDF-1.0
  4 +
  5 +2 0 obj
  6 +<<
  7 + /Pages 1 0 R
  8 + /Type /Catalog
  9 +>>
  10 +endobj
  11 +
  12 +1 0 obj
  13 +<<
  14 + /Count 1
  15 + /Kids [
  16 + 3 0 R
  17 + ]
  18 + /Type /Pages
  19 +>>
  20 +endobj
  21 +
  22 +%% Page 1
  23 +3 0 obj
  24 +<<
  25 + /Contents 4 0 R
  26 + /MediaBox [
  27 + 0
  28 + 0
  29 + 612
  30 + 792
  31 + ]
  32 + /Parent 1 0 R
  33 + /Resources <<
  34 + /Font <<
  35 + /F1 6 0 R
  36 + >>
  37 + /ProcSet 9 0 R
  38 + >>
  39 + /Type /Page
  40 +>>
  41 +endobj
  42 +
  43 +%% Contents for page 1
  44 +4 0 obj
  45 +<<
  46 + /Length 5 0 R
  47 +>>
  48 +stream
  49 +BT
  50 + /F1 24 Tf
  51 + 72 720 Td
  52 + (Potato) Tj
  53 +ET
  54 +endstream
  55 +endobj
  56 +
  57 +5 0 obj
  58 +44
  59 +endobj
  60 +
  61 +6 0 obj
  62 +<<
  63 + /BaseFont /Helvetica
  64 + /Encoding /WinAnsiEncoding
  65 + /Name /F1
  66 + /Subtype /Type1
  67 + /Type /Font
  68 +>>
  69 +endobj
  70 +
  71 +7 0 obj
  72 +[ 8 0 R ]
  73 +endobj
  74 +
  75 +8 0 obj
  76 +/Potato
  77 +endobj
  78 +
  79 +9 0 obj
  80 +[
  81 + /PDF
  82 + /Text
  83 +]
  84 +endobj
  85 +
  86 +xref
  87 +0 10
  88 +0000000000 65535 f
  89 +0000000079 00000 n
  90 +0000000025 00000 n
  91 +0000000161 00000 n
  92 +0000000376 00000 n
  93 +0000000475 00000 n
  94 +0000000494 00000 n
  95 +0000000612 00000 n
  96 +0000000638 00000 n
  97 +0000000662 00000 n
  98 +trailer <<
  99 + /Root 2 0 R
  100 + /Size 10
  101 + /ID [<5c2381b459937c988290150df782f1fd><5c2381b459937c988290150df782f1fd>]
  102 +>>
  103 +startxref
  104 +697
  105 +%%EOF
qpdf/qtest/qpdf/unreferenced-preserved.pdf 0 → 100644
No preview for this file type