Commit ddc6cf0cf6c11bb50f9f576bf547df3674142c97

Authored by Jay Berkenbilt
1 parent 9744414c

Precheck streams by default

There is no need for a --precheck-streams option. We can do the
precheck without imposing any penalty, only re-encoding the stream if
it fails the first time.
include/qpdf/QPDFWriter.hh
@@ -200,17 +200,6 @@ class QPDFWriter @@ -200,17 +200,6 @@ class QPDFWriter
200 QPDF_DLL 200 QPDF_DLL
201 void setQDFMode(bool); 201 void setQDFMode(bool);
202 202
203 - // Enable stream precheck mode. In this mode, all filterable  
204 - // streams are checked by actually attempting to decode them  
205 - // before filtering. This may add significant time to the process  
206 - // of writing the data because all streams from the input must be  
207 - // read twice, but it enables the raw stream data to be preserved  
208 - // even in cases where qpdf would run into errors decoding the  
209 - // stream after it determines that it should be able to do it.  
210 - // Examples would include compressed data with errors in it.  
211 - QPDF_DLL  
212 - void setPrecheckStreams(bool);  
213 -  
214 // Preserve unreferenced objects. The default behavior is to 203 // Preserve unreferenced objects. The default behavior is to
215 // discard any object that is not visited during a traversal of 204 // discard any object that is not visited during a traversal of
216 // the object structure from the trailer. 205 // the object structure from the trailer.
@@ -495,7 +484,6 @@ class QPDFWriter @@ -495,7 +484,6 @@ class QPDFWriter
495 qpdf_stream_decode_level_e stream_decode_level; 484 qpdf_stream_decode_level_e stream_decode_level;
496 bool stream_decode_level_set; 485 bool stream_decode_level_set;
497 bool qdf_mode; 486 bool qdf_mode;
498 - bool precheck_streams;  
499 bool preserve_unreferenced_objects; 487 bool preserve_unreferenced_objects;
500 bool newline_before_endstream; 488 bool newline_before_endstream;
501 bool static_id; 489 bool static_id;
libqpdf/QPDFWriter.cc
@@ -59,7 +59,6 @@ QPDFWriter::init() @@ -59,7 +59,6 @@ QPDFWriter::init()
59 stream_decode_level = qpdf_dl_none; 59 stream_decode_level = qpdf_dl_none;
60 stream_decode_level_set = false; 60 stream_decode_level_set = false;
61 qdf_mode = false; 61 qdf_mode = false;
62 - precheck_streams = false;  
63 preserve_unreferenced_objects = false; 62 preserve_unreferenced_objects = false;
64 newline_before_endstream = false; 63 newline_before_endstream = false;
65 static_id = false; 64 static_id = false;
@@ -216,12 +215,6 @@ QPDFWriter::setQDFMode(bool val) @@ -216,12 +215,6 @@ QPDFWriter::setQDFMode(bool val)
216 } 215 }
217 216
218 void 217 void
219 -QPDFWriter::setPrecheckStreams(bool val)  
220 -{  
221 - this->precheck_streams = val;  
222 -}  
223 -  
224 -void  
225 QPDFWriter::setPreserveUnreferencedObjects(bool val) 218 QPDFWriter::setPreserveUnreferencedObjects(bool val)
226 { 219 {
227 this->preserve_unreferenced_objects = val; 220 this->preserve_unreferenced_objects = val;
@@ -1590,34 +1583,32 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, @@ -1590,34 +1583,32 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1590 1583
1591 flags |= f_stream; 1584 flags |= f_stream;
1592 1585
1593 - if (filter && this->precheck_streams) 1586 + PointerHolder<Buffer> stream_data;
  1587 + bool filtered = false;
  1588 + for (int attempt = 1; attempt <= 2; ++attempt)
1594 { 1589 {
1595 - try 1590 + pushPipeline(new Pl_Buffer("stream data"));
  1591 + activatePipelineStack();
  1592 +
  1593 + filtered =
  1594 + object.pipeStreamData(
  1595 + this->pipeline,
  1596 + (((filter && normalize) ? qpdf_ef_normalize : 0) |
  1597 + ((filter && compress) ? qpdf_ef_compress : 0)),
  1598 + (filter
  1599 + ? (uncompress ? qpdf_dl_all : this->stream_decode_level)
  1600 + : qpdf_dl_none));
  1601 + popPipelineStack(&stream_data);
  1602 + if (filter && (! filtered))
1596 { 1603 {
1597 - QTC::TC("qpdf", "QPDFWriter precheck stream");  
1598 - Pl_Discard discard;  
1599 - filter = object.pipeStreamData(  
1600 - &discard, 0, qpdf_dl_all, true); 1604 + // Try again
  1605 + filter = false;
1601 } 1606 }
1602 - catch (std::exception&) 1607 + else
1603 { 1608 {
1604 - filter = false; 1609 + break;
1605 } 1610 }
1606 } 1611 }
1607 -  
1608 - pushPipeline(new Pl_Buffer("stream data"));  
1609 - activatePipelineStack();  
1610 -  
1611 - bool filtered =  
1612 - object.pipeStreamData(  
1613 - this->pipeline,  
1614 - (((filter && normalize) ? qpdf_ef_normalize : 0) |  
1615 - ((filter && compress) ? qpdf_ef_compress : 0)),  
1616 - (filter  
1617 - ? (uncompress ? qpdf_dl_all : this->stream_decode_level)  
1618 - : qpdf_dl_none));  
1619 - PointerHolder<Buffer> stream_data;  
1620 - popPipelineStack(&stream_data);  
1621 if (filtered) 1612 if (filtered)
1622 { 1613 {
1623 flags |= f_filtered; 1614 flags |= f_filtered;
manual/qpdf-manual.xml
@@ -997,23 +997,6 @@ outfile.pdf&lt;/option&gt; @@ -997,23 +997,6 @@ outfile.pdf&lt;/option&gt;
997 </listitem> 997 </listitem>
998 </varlistentry> 998 </varlistentry>
999 <varlistentry> 999 <varlistentry>
1000 - <term><option>--precheck-streams</option></term>  
1001 - <listitem>  
1002 - <para>  
1003 - Tells qpdf to precheck each stream for the ability to decode  
1004 - it. Ordinarily qpdf tries to decode streams that it thinks it  
1005 - can decode based on the filters, and if there ends up being an  
1006 - error when actually trying to do the decode, the stream data  
1007 - is truncated. This flag causes qpdf to actually read the  
1008 - stream fully before deciding whether to filter the stream.  
1009 - This option will slow qpdf down since it will have to read the  
1010 - stream twice, but it allows raw stream data to be preserved in  
1011 - cases where the decoding of the stream would fail for some  
1012 - reason. This may be useful in working with some damaged files.  
1013 - </para>  
1014 - </listitem>  
1015 - </varlistentry>  
1016 - <varlistentry>  
1017 <term><option>--preserve-unreferenced</option></term> 1000 <term><option>--preserve-unreferenced</option></term>
1018 <listitem> 1001 <listitem>
1019 <para> 1002 <para>
qpdf/qpdf.cc
@@ -87,7 +87,6 @@ struct Options @@ -87,7 +87,6 @@ struct Options
87 object_stream_mode(qpdf_o_preserve), 87 object_stream_mode(qpdf_o_preserve),
88 ignore_xref_streams(false), 88 ignore_xref_streams(false),
89 qdf_mode(false), 89 qdf_mode(false),
90 - precheck_streams(false),  
91 preserve_unreferenced_objects(false), 90 preserve_unreferenced_objects(false),
92 newline_before_endstream(false), 91 newline_before_endstream(false),
93 show_npages(false), 92 show_npages(false),
@@ -149,7 +148,6 @@ struct Options @@ -149,7 +148,6 @@ struct Options
149 qpdf_object_stream_e object_stream_mode; 148 qpdf_object_stream_e object_stream_mode;
150 bool ignore_xref_streams; 149 bool ignore_xref_streams;
151 bool qdf_mode; 150 bool qdf_mode;
152 - bool precheck_streams;  
153 bool preserve_unreferenced_objects; 151 bool preserve_unreferenced_objects;
154 bool newline_before_endstream; 152 bool newline_before_endstream;
155 std::string min_version; 153 std::string min_version;
@@ -371,7 +369,6 @@ familiar with the PDF file format or who are PDF developers.\n\ @@ -371,7 +369,6 @@ familiar with the PDF file format or who are PDF developers.\n\
371 --suppress-recovery prevents qpdf from attempting to recover damaged files\n\ 369 --suppress-recovery prevents qpdf from attempting to recover damaged files\n\
372 --object-streams=mode controls handing of object streams\n\ 370 --object-streams=mode controls handing of object streams\n\
373 --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\ 371 --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
374 ---precheck-streams precheck ability to decode streams\n\  
375 --preserve-unreferenced preserve unreferenced objects\n\ 372 --preserve-unreferenced preserve unreferenced objects\n\
376 --newline-before-endstream always put a newline before endstream\n\ 373 --newline-before-endstream always put a newline before endstream\n\
377 --qdf turns on \"QDF mode\" (below)\n\ 374 --qdf turns on \"QDF mode\" (below)\n\
@@ -1467,10 +1464,6 @@ static void parse_options(int argc, char* argv[], Options&amp; o) @@ -1467,10 +1464,6 @@ static void parse_options(int argc, char* argv[], Options&amp; o)
1467 { 1464 {
1468 o.qdf_mode = true; 1465 o.qdf_mode = true;
1469 } 1466 }
1470 - else if (strcmp(arg, "precheck-streams") == 0)  
1471 - {  
1472 - o.precheck_streams = true;  
1473 - }  
1474 else if (strcmp(arg, "preserve-unreferenced") == 0) 1467 else if (strcmp(arg, "preserve-unreferenced") == 0)
1475 { 1468 {
1476 o.preserve_unreferenced_objects = true; 1469 o.preserve_unreferenced_objects = true;
@@ -2094,10 +2087,6 @@ static void set_writer_options(QPDF&amp; pdf, Options&amp; o, QPDFWriter&amp; w) @@ -2094,10 +2087,6 @@ static void set_writer_options(QPDF&amp; pdf, Options&amp; o, QPDFWriter&amp; w)
2094 { 2087 {
2095 w.setQDFMode(true); 2088 w.setQDFMode(true);
2096 } 2089 }
2097 - if (o.precheck_streams)  
2098 - {  
2099 - w.setPrecheckStreams(true);  
2100 - }  
2101 if (o.preserve_unreferenced_objects) 2090 if (o.preserve_unreferenced_objects)
2102 { 2091 {
2103 w.setPreserveUnreferencedObjects(true); 2092 w.setPreserveUnreferencedObjects(true);
qpdf/qpdf.testcov
@@ -277,7 +277,6 @@ QPDFObjectHandle treat word as string 0 @@ -277,7 +277,6 @@ QPDFObjectHandle treat word as string 0
277 QPDFObjectHandle found fake 1 277 QPDFObjectHandle found fake 1
278 QPDFObjectHandle no val for last key 0 278 QPDFObjectHandle no val for last key 0
279 QPDF resolve failure to null 0 279 QPDF resolve failure to null 0
280 -QPDFWriter precheck stream 0  
281 QPDFWriter preserve unreferenced standard 0 280 QPDFWriter preserve unreferenced standard 0
282 QPDFObjectHandle non-stream in parsecontent 0 281 QPDFObjectHandle non-stream in parsecontent 0
283 QPDFObjectHandle errors in parsecontent 0 282 QPDFObjectHandle errors in parsecontent 0
qpdf/qtest/qpdf.test
@@ -918,27 +918,20 @@ $td-&gt;runtest(&quot;check output&quot;, @@ -918,27 +918,20 @@ $td-&gt;runtest(&quot;check output&quot;,
918 show_ntests(); 918 show_ntests();
919 # ---------- 919 # ----------
920 $td->notify("--- Precheck streams ---"); 920 $td->notify("--- Precheck streams ---");
921 -$n_tests += 4; 921 +$n_tests += 2;
922 922
923 -$td->runtest("bad stream without precheck", 923 +$td->runtest("bad stream",
924 {$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"}, 924 {$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"},
925 {$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3}, 925 {$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3},
926 $td->NORMALIZE_NEWLINES); 926 $td->NORMALIZE_NEWLINES);
927 $td->runtest("check output", 927 $td->runtest("check output",
928 {$td->FILE => "a.pdf"}, 928 {$td->FILE => "a.pdf"},
929 {$td->FILE => "bad-data-out.pdf"}); 929 {$td->FILE => "bad-data-out.pdf"});
930 -$td->runtest("bad stream with precheck",  
931 - {$td->COMMAND =>  
932 - "qpdf --static-id --precheck-streams bad-data.pdf a.pdf"},  
933 - {$td->STRING => "", $td->EXIT_STATUS => 0},  
934 - $td->NORMALIZE_NEWLINES);  
935 -$td->runtest("check output",  
936 - {$td->FILE => "a.pdf"},  
937 - {$td->FILE => "bad-data-precheck.pdf"}); 930 +
938 show_ntests(); 931 show_ntests();
939 # ---------- 932 # ----------
940 $td->notify("--- Decode levels ---"); 933 $td->notify("--- Decode levels ---");
941 -$n_tests += 10; 934 +$n_tests += 12;
942 935
943 # image-streams.pdf is the output of examples/pdf-create. 936 # image-streams.pdf is the output of examples/pdf-create.
944 # examples/pdf-create validates the actual image data. 937 # examples/pdf-create validates the actual image data.
@@ -962,6 +955,14 @@ $td-&gt;runtest(&quot;check finds bad jpeg data&quot;, @@ -962,6 +955,14 @@ $td-&gt;runtest(&quot;check finds bad jpeg data&quot;,
962 {$td->FILE => "bad-jpeg-check.out", 955 {$td->FILE => "bad-jpeg-check.out",
963 $td->EXIT_STATUS => 3}, 956 $td->EXIT_STATUS => 3},
964 $td->NORMALIZE_NEWLINES); 957 $td->NORMALIZE_NEWLINES);
  958 +$td->runtest("precheck detects bad jpeg data",
  959 + {$td->COMMAND => "qpdf --static-id --decode-level=all" .
  960 + " bad-jpeg.pdf a.pdf"},
  961 + {$td->FILE => "bad-jpeg.out", $td->EXIT_STATUS => 3},
  962 + $td->NORMALIZE_NEWLINES);
  963 +$td->runtest("check file",
  964 + {$td->FILE => "a.pdf"},
  965 + {$td->FILE => "bad-jpeg-out.pdf"});
965 $td->runtest("get data", 966 $td->runtest("get data",
966 {$td->COMMAND => "qpdf --show-object=6" . 967 {$td->COMMAND => "qpdf --show-object=6" .
967 " --filtered-stream-data bad-jpeg.pdf"}, 968 " --filtered-stream-data bad-jpeg.pdf"},
qpdf/qtest/qpdf/bad-data-out.pdf
No preview for this file type
qpdf/qtest/qpdf/bad-data-precheck.pdf deleted
No preview for this file type