Commit ddc6cf0cf6c11bb50f9f576bf547df3674142c97

Authored by Jay Berkenbilt
1 parent 9744414c

Precheck streams by default

There is no need for a --precheck-streams option. We can do the
precheck without imposing any penalty, only re-encoding the stream if
it fails the first time.
include/qpdf/QPDFWriter.hh
... ... @@ -200,17 +200,6 @@ class QPDFWriter
200 200 QPDF_DLL
201 201 void setQDFMode(bool);
202 202  
203   - // Enable stream precheck mode. In this mode, all filterable
204   - // streams are checked by actually attempting to decode them
205   - // before filtering. This may add significant time to the process
206   - // of writing the data because all streams from the input must be
207   - // read twice, but it enables the raw stream data to be preserved
208   - // even in cases where qpdf would run into errors decoding the
209   - // stream after it determines that it should be able to do it.
210   - // Examples would include compressed data with errors in it.
211   - QPDF_DLL
212   - void setPrecheckStreams(bool);
213   -
214 203 // Preserve unreferenced objects. The default behavior is to
215 204 // discard any object that is not visited during a traversal of
216 205 // the object structure from the trailer.
... ... @@ -495,7 +484,6 @@ class QPDFWriter
495 484 qpdf_stream_decode_level_e stream_decode_level;
496 485 bool stream_decode_level_set;
497 486 bool qdf_mode;
498   - bool precheck_streams;
499 487 bool preserve_unreferenced_objects;
500 488 bool newline_before_endstream;
501 489 bool static_id;
... ...
libqpdf/QPDFWriter.cc
... ... @@ -59,7 +59,6 @@ QPDFWriter::init()
59 59 stream_decode_level = qpdf_dl_none;
60 60 stream_decode_level_set = false;
61 61 qdf_mode = false;
62   - precheck_streams = false;
63 62 preserve_unreferenced_objects = false;
64 63 newline_before_endstream = false;
65 64 static_id = false;
... ... @@ -216,12 +215,6 @@ QPDFWriter::setQDFMode(bool val)
216 215 }
217 216  
218 217 void
219   -QPDFWriter::setPrecheckStreams(bool val)
220   -{
221   - this->precheck_streams = val;
222   -}
223   -
224   -void
225 218 QPDFWriter::setPreserveUnreferencedObjects(bool val)
226 219 {
227 220 this->preserve_unreferenced_objects = val;
... ... @@ -1590,34 +1583,32 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1590 1583  
1591 1584 flags |= f_stream;
1592 1585  
1593   - if (filter && this->precheck_streams)
  1586 + PointerHolder<Buffer> stream_data;
  1587 + bool filtered = false;
  1588 + for (int attempt = 1; attempt <= 2; ++attempt)
1594 1589 {
1595   - try
  1590 + pushPipeline(new Pl_Buffer("stream data"));
  1591 + activatePipelineStack();
  1592 +
  1593 + filtered =
  1594 + object.pipeStreamData(
  1595 + this->pipeline,
  1596 + (((filter && normalize) ? qpdf_ef_normalize : 0) |
  1597 + ((filter && compress) ? qpdf_ef_compress : 0)),
  1598 + (filter
  1599 + ? (uncompress ? qpdf_dl_all : this->stream_decode_level)
  1600 + : qpdf_dl_none));
  1601 + popPipelineStack(&stream_data);
  1602 + if (filter && (! filtered))
1596 1603 {
1597   - QTC::TC("qpdf", "QPDFWriter precheck stream");
1598   - Pl_Discard discard;
1599   - filter = object.pipeStreamData(
1600   - &discard, 0, qpdf_dl_all, true);
  1604 + // Try again
  1605 + filter = false;
1601 1606 }
1602   - catch (std::exception&)
  1607 + else
1603 1608 {
1604   - filter = false;
  1609 + break;
1605 1610 }
1606 1611 }
1607   -
1608   - pushPipeline(new Pl_Buffer("stream data"));
1609   - activatePipelineStack();
1610   -
1611   - bool filtered =
1612   - object.pipeStreamData(
1613   - this->pipeline,
1614   - (((filter && normalize) ? qpdf_ef_normalize : 0) |
1615   - ((filter && compress) ? qpdf_ef_compress : 0)),
1616   - (filter
1617   - ? (uncompress ? qpdf_dl_all : this->stream_decode_level)
1618   - : qpdf_dl_none));
1619   - PointerHolder<Buffer> stream_data;
1620   - popPipelineStack(&stream_data);
1621 1612 if (filtered)
1622 1613 {
1623 1614 flags |= f_filtered;
... ...
manual/qpdf-manual.xml
... ... @@ -997,23 +997,6 @@ outfile.pdf&lt;/option&gt;
997 997 </listitem>
998 998 </varlistentry>
999 999 <varlistentry>
1000   - <term><option>--precheck-streams</option></term>
1001   - <listitem>
1002   - <para>
1003   - Tells qpdf to precheck each stream for the ability to decode
1004   - it. Ordinarily qpdf tries to decode streams that it thinks it
1005   - can decode based on the filters, and if there ends up being an
1006   - error when actually trying to do the decode, the stream data
1007   - is truncated. This flag causes qpdf to actually read the
1008   - stream fully before deciding whether to filter the stream.
1009   - This option will slow qpdf down since it will have to read the
1010   - stream twice, but it allows raw stream data to be preserved in
1011   - cases where the decoding of the stream would fail for some
1012   - reason. This may be useful in working with some damaged files.
1013   - </para>
1014   - </listitem>
1015   - </varlistentry>
1016   - <varlistentry>
1017 1000 <term><option>--preserve-unreferenced</option></term>
1018 1001 <listitem>
1019 1002 <para>
... ...
qpdf/qpdf.cc
... ... @@ -87,7 +87,6 @@ struct Options
87 87 object_stream_mode(qpdf_o_preserve),
88 88 ignore_xref_streams(false),
89 89 qdf_mode(false),
90   - precheck_streams(false),
91 90 preserve_unreferenced_objects(false),
92 91 newline_before_endstream(false),
93 92 show_npages(false),
... ... @@ -149,7 +148,6 @@ struct Options
149 148 qpdf_object_stream_e object_stream_mode;
150 149 bool ignore_xref_streams;
151 150 bool qdf_mode;
152   - bool precheck_streams;
153 151 bool preserve_unreferenced_objects;
154 152 bool newline_before_endstream;
155 153 std::string min_version;
... ... @@ -371,7 +369,6 @@ familiar with the PDF file format or who are PDF developers.\n\
371 369 --suppress-recovery prevents qpdf from attempting to recover damaged files\n\
372 370 --object-streams=mode controls handing of object streams\n\
373 371 --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
374   ---precheck-streams precheck ability to decode streams\n\
375 372 --preserve-unreferenced preserve unreferenced objects\n\
376 373 --newline-before-endstream always put a newline before endstream\n\
377 374 --qdf turns on \"QDF mode\" (below)\n\
... ... @@ -1467,10 +1464,6 @@ static void parse_options(int argc, char* argv[], Options&amp; o)
1467 1464 {
1468 1465 o.qdf_mode = true;
1469 1466 }
1470   - else if (strcmp(arg, "precheck-streams") == 0)
1471   - {
1472   - o.precheck_streams = true;
1473   - }
1474 1467 else if (strcmp(arg, "preserve-unreferenced") == 0)
1475 1468 {
1476 1469 o.preserve_unreferenced_objects = true;
... ... @@ -2094,10 +2087,6 @@ static void set_writer_options(QPDF&amp; pdf, Options&amp; o, QPDFWriter&amp; w)
2094 2087 {
2095 2088 w.setQDFMode(true);
2096 2089 }
2097   - if (o.precheck_streams)
2098   - {
2099   - w.setPrecheckStreams(true);
2100   - }
2101 2090 if (o.preserve_unreferenced_objects)
2102 2091 {
2103 2092 w.setPreserveUnreferencedObjects(true);
... ...
qpdf/qpdf.testcov
... ... @@ -277,7 +277,6 @@ QPDFObjectHandle treat word as string 0
277 277 QPDFObjectHandle found fake 1
278 278 QPDFObjectHandle no val for last key 0
279 279 QPDF resolve failure to null 0
280   -QPDFWriter precheck stream 0
281 280 QPDFWriter preserve unreferenced standard 0
282 281 QPDFObjectHandle non-stream in parsecontent 0
283 282 QPDFObjectHandle errors in parsecontent 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -918,27 +918,20 @@ $td-&gt;runtest(&quot;check output&quot;,
918 918 show_ntests();
919 919 # ----------
920 920 $td->notify("--- Precheck streams ---");
921   -$n_tests += 4;
  921 +$n_tests += 2;
922 922  
923   -$td->runtest("bad stream without precheck",
  923 +$td->runtest("bad stream",
924 924 {$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"},
925 925 {$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3},
926 926 $td->NORMALIZE_NEWLINES);
927 927 $td->runtest("check output",
928 928 {$td->FILE => "a.pdf"},
929 929 {$td->FILE => "bad-data-out.pdf"});
930   -$td->runtest("bad stream with precheck",
931   - {$td->COMMAND =>
932   - "qpdf --static-id --precheck-streams bad-data.pdf a.pdf"},
933   - {$td->STRING => "", $td->EXIT_STATUS => 0},
934   - $td->NORMALIZE_NEWLINES);
935   -$td->runtest("check output",
936   - {$td->FILE => "a.pdf"},
937   - {$td->FILE => "bad-data-precheck.pdf"});
  930 +
938 931 show_ntests();
939 932 # ----------
940 933 $td->notify("--- Decode levels ---");
941   -$n_tests += 10;
  934 +$n_tests += 12;
942 935  
943 936 # image-streams.pdf is the output of examples/pdf-create.
944 937 # examples/pdf-create validates the actual image data.
... ... @@ -962,6 +955,14 @@ $td-&gt;runtest(&quot;check finds bad jpeg data&quot;,
962 955 {$td->FILE => "bad-jpeg-check.out",
963 956 $td->EXIT_STATUS => 3},
964 957 $td->NORMALIZE_NEWLINES);
  958 +$td->runtest("precheck detects bad jpeg data",
  959 + {$td->COMMAND => "qpdf --static-id --decode-level=all" .
  960 + " bad-jpeg.pdf a.pdf"},
  961 + {$td->FILE => "bad-jpeg.out", $td->EXIT_STATUS => 3},
  962 + $td->NORMALIZE_NEWLINES);
  963 +$td->runtest("check file",
  964 + {$td->FILE => "a.pdf"},
  965 + {$td->FILE => "bad-jpeg-out.pdf"});
965 966 $td->runtest("get data",
966 967 {$td->COMMAND => "qpdf --show-object=6" .
967 968 " --filtered-stream-data bad-jpeg.pdf"},
... ...
qpdf/qtest/qpdf/bad-data-out.pdf
No preview for this file type
qpdf/qtest/qpdf/bad-data-precheck.pdf deleted
No preview for this file type