Commit ddc6cf0cf6c11bb50f9f576bf547df3674142c97
1 parent
9744414c
Precheck streams by default
There is no need for a --precheck-streams option. We can do the precheck without imposing any penalty, only re-encoding the stream if it fails the first time.
Showing
8 changed files
with
32 additions
and
81 deletions
include/qpdf/QPDFWriter.hh
| ... | ... | @@ -200,17 +200,6 @@ class QPDFWriter |
| 200 | 200 | QPDF_DLL |
| 201 | 201 | void setQDFMode(bool); |
| 202 | 202 | |
| 203 | - // Enable stream precheck mode. In this mode, all filterable | |
| 204 | - // streams are checked by actually attempting to decode them | |
| 205 | - // before filtering. This may add significant time to the process | |
| 206 | - // of writing the data because all streams from the input must be | |
| 207 | - // read twice, but it enables the raw stream data to be preserved | |
| 208 | - // even in cases where qpdf would run into errors decoding the | |
| 209 | - // stream after it determines that it should be able to do it. | |
| 210 | - // Examples would include compressed data with errors in it. | |
| 211 | - QPDF_DLL | |
| 212 | - void setPrecheckStreams(bool); | |
| 213 | - | |
| 214 | 203 | // Preserve unreferenced objects. The default behavior is to |
| 215 | 204 | // discard any object that is not visited during a traversal of |
| 216 | 205 | // the object structure from the trailer. |
| ... | ... | @@ -495,7 +484,6 @@ class QPDFWriter |
| 495 | 484 | qpdf_stream_decode_level_e stream_decode_level; |
| 496 | 485 | bool stream_decode_level_set; |
| 497 | 486 | bool qdf_mode; |
| 498 | - bool precheck_streams; | |
| 499 | 487 | bool preserve_unreferenced_objects; |
| 500 | 488 | bool newline_before_endstream; |
| 501 | 489 | bool static_id; | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -59,7 +59,6 @@ QPDFWriter::init() |
| 59 | 59 | stream_decode_level = qpdf_dl_none; |
| 60 | 60 | stream_decode_level_set = false; |
| 61 | 61 | qdf_mode = false; |
| 62 | - precheck_streams = false; | |
| 63 | 62 | preserve_unreferenced_objects = false; |
| 64 | 63 | newline_before_endstream = false; |
| 65 | 64 | static_id = false; |
| ... | ... | @@ -216,12 +215,6 @@ QPDFWriter::setQDFMode(bool val) |
| 216 | 215 | } |
| 217 | 216 | |
| 218 | 217 | void |
| 219 | -QPDFWriter::setPrecheckStreams(bool val) | |
| 220 | -{ | |
| 221 | - this->precheck_streams = val; | |
| 222 | -} | |
| 223 | - | |
| 224 | -void | |
| 225 | 218 | QPDFWriter::setPreserveUnreferencedObjects(bool val) |
| 226 | 219 | { |
| 227 | 220 | this->preserve_unreferenced_objects = val; |
| ... | ... | @@ -1590,34 +1583,32 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, |
| 1590 | 1583 | |
| 1591 | 1584 | flags |= f_stream; |
| 1592 | 1585 | |
| 1593 | - if (filter && this->precheck_streams) | |
| 1586 | + PointerHolder<Buffer> stream_data; | |
| 1587 | + bool filtered = false; | |
| 1588 | + for (int attempt = 1; attempt <= 2; ++attempt) | |
| 1594 | 1589 | { |
| 1595 | - try | |
| 1590 | + pushPipeline(new Pl_Buffer("stream data")); | |
| 1591 | + activatePipelineStack(); | |
| 1592 | + | |
| 1593 | + filtered = | |
| 1594 | + object.pipeStreamData( | |
| 1595 | + this->pipeline, | |
| 1596 | + (((filter && normalize) ? qpdf_ef_normalize : 0) | | |
| 1597 | + ((filter && compress) ? qpdf_ef_compress : 0)), | |
| 1598 | + (filter | |
| 1599 | + ? (uncompress ? qpdf_dl_all : this->stream_decode_level) | |
| 1600 | + : qpdf_dl_none)); | |
| 1601 | + popPipelineStack(&stream_data); | |
| 1602 | + if (filter && (! filtered)) | |
| 1596 | 1603 | { |
| 1597 | - QTC::TC("qpdf", "QPDFWriter precheck stream"); | |
| 1598 | - Pl_Discard discard; | |
| 1599 | - filter = object.pipeStreamData( | |
| 1600 | - &discard, 0, qpdf_dl_all, true); | |
| 1604 | + // Try again | |
| 1605 | + filter = false; | |
| 1601 | 1606 | } |
| 1602 | - catch (std::exception&) | |
| 1607 | + else | |
| 1603 | 1608 | { |
| 1604 | - filter = false; | |
| 1609 | + break; | |
| 1605 | 1610 | } |
| 1606 | 1611 | } |
| 1607 | - | |
| 1608 | - pushPipeline(new Pl_Buffer("stream data")); | |
| 1609 | - activatePipelineStack(); | |
| 1610 | - | |
| 1611 | - bool filtered = | |
| 1612 | - object.pipeStreamData( | |
| 1613 | - this->pipeline, | |
| 1614 | - (((filter && normalize) ? qpdf_ef_normalize : 0) | | |
| 1615 | - ((filter && compress) ? qpdf_ef_compress : 0)), | |
| 1616 | - (filter | |
| 1617 | - ? (uncompress ? qpdf_dl_all : this->stream_decode_level) | |
| 1618 | - : qpdf_dl_none)); | |
| 1619 | - PointerHolder<Buffer> stream_data; | |
| 1620 | - popPipelineStack(&stream_data); | |
| 1621 | 1612 | if (filtered) |
| 1622 | 1613 | { |
| 1623 | 1614 | flags |= f_filtered; | ... | ... |
manual/qpdf-manual.xml
| ... | ... | @@ -997,23 +997,6 @@ outfile.pdf</option> |
| 997 | 997 | </listitem> |
| 998 | 998 | </varlistentry> |
| 999 | 999 | <varlistentry> |
| 1000 | - <term><option>--precheck-streams</option></term> | |
| 1001 | - <listitem> | |
| 1002 | - <para> | |
| 1003 | - Tells qpdf to precheck each stream for the ability to decode | |
| 1004 | - it. Ordinarily qpdf tries to decode streams that it thinks it | |
| 1005 | - can decode based on the filters, and if there ends up being an | |
| 1006 | - error when actually trying to do the decode, the stream data | |
| 1007 | - is truncated. This flag causes qpdf to actually read the | |
| 1008 | - stream fully before deciding whether to filter the stream. | |
| 1009 | - This option will slow qpdf down since it will have to read the | |
| 1010 | - stream twice, but it allows raw stream data to be preserved in | |
| 1011 | - cases where the decoding of the stream would fail for some | |
| 1012 | - reason. This may be useful in working with some damaged files. | |
| 1013 | - </para> | |
| 1014 | - </listitem> | |
| 1015 | - </varlistentry> | |
| 1016 | - <varlistentry> | |
| 1017 | 1000 | <term><option>--preserve-unreferenced</option></term> |
| 1018 | 1001 | <listitem> |
| 1019 | 1002 | <para> | ... | ... |
qpdf/qpdf.cc
| ... | ... | @@ -87,7 +87,6 @@ struct Options |
| 87 | 87 | object_stream_mode(qpdf_o_preserve), |
| 88 | 88 | ignore_xref_streams(false), |
| 89 | 89 | qdf_mode(false), |
| 90 | - precheck_streams(false), | |
| 91 | 90 | preserve_unreferenced_objects(false), |
| 92 | 91 | newline_before_endstream(false), |
| 93 | 92 | show_npages(false), |
| ... | ... | @@ -149,7 +148,6 @@ struct Options |
| 149 | 148 | qpdf_object_stream_e object_stream_mode; |
| 150 | 149 | bool ignore_xref_streams; |
| 151 | 150 | bool qdf_mode; |
| 152 | - bool precheck_streams; | |
| 153 | 151 | bool preserve_unreferenced_objects; |
| 154 | 152 | bool newline_before_endstream; |
| 155 | 153 | std::string min_version; |
| ... | ... | @@ -371,7 +369,6 @@ familiar with the PDF file format or who are PDF developers.\n\ |
| 371 | 369 | --suppress-recovery prevents qpdf from attempting to recover damaged files\n\ |
| 372 | 370 | --object-streams=mode controls handing of object streams\n\ |
| 373 | 371 | --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\ |
| 374 | ---precheck-streams precheck ability to decode streams\n\ | |
| 375 | 372 | --preserve-unreferenced preserve unreferenced objects\n\ |
| 376 | 373 | --newline-before-endstream always put a newline before endstream\n\ |
| 377 | 374 | --qdf turns on \"QDF mode\" (below)\n\ |
| ... | ... | @@ -1467,10 +1464,6 @@ static void parse_options(int argc, char* argv[], Options& o) |
| 1467 | 1464 | { |
| 1468 | 1465 | o.qdf_mode = true; |
| 1469 | 1466 | } |
| 1470 | - else if (strcmp(arg, "precheck-streams") == 0) | |
| 1471 | - { | |
| 1472 | - o.precheck_streams = true; | |
| 1473 | - } | |
| 1474 | 1467 | else if (strcmp(arg, "preserve-unreferenced") == 0) |
| 1475 | 1468 | { |
| 1476 | 1469 | o.preserve_unreferenced_objects = true; |
| ... | ... | @@ -2094,10 +2087,6 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) |
| 2094 | 2087 | { |
| 2095 | 2088 | w.setQDFMode(true); |
| 2096 | 2089 | } |
| 2097 | - if (o.precheck_streams) | |
| 2098 | - { | |
| 2099 | - w.setPrecheckStreams(true); | |
| 2100 | - } | |
| 2101 | 2090 | if (o.preserve_unreferenced_objects) |
| 2102 | 2091 | { |
| 2103 | 2092 | w.setPreserveUnreferencedObjects(true); | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -277,7 +277,6 @@ QPDFObjectHandle treat word as string 0 |
| 277 | 277 | QPDFObjectHandle found fake 1 |
| 278 | 278 | QPDFObjectHandle no val for last key 0 |
| 279 | 279 | QPDF resolve failure to null 0 |
| 280 | -QPDFWriter precheck stream 0 | |
| 281 | 280 | QPDFWriter preserve unreferenced standard 0 |
| 282 | 281 | QPDFObjectHandle non-stream in parsecontent 0 |
| 283 | 282 | QPDFObjectHandle errors in parsecontent 0 | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -918,27 +918,20 @@ $td->runtest("check output", |
| 918 | 918 | show_ntests(); |
| 919 | 919 | # ---------- |
| 920 | 920 | $td->notify("--- Precheck streams ---"); |
| 921 | -$n_tests += 4; | |
| 921 | +$n_tests += 2; | |
| 922 | 922 | |
| 923 | -$td->runtest("bad stream without precheck", | |
| 923 | +$td->runtest("bad stream", | |
| 924 | 924 | {$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"}, |
| 925 | 925 | {$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3}, |
| 926 | 926 | $td->NORMALIZE_NEWLINES); |
| 927 | 927 | $td->runtest("check output", |
| 928 | 928 | {$td->FILE => "a.pdf"}, |
| 929 | 929 | {$td->FILE => "bad-data-out.pdf"}); |
| 930 | -$td->runtest("bad stream with precheck", | |
| 931 | - {$td->COMMAND => | |
| 932 | - "qpdf --static-id --precheck-streams bad-data.pdf a.pdf"}, | |
| 933 | - {$td->STRING => "", $td->EXIT_STATUS => 0}, | |
| 934 | - $td->NORMALIZE_NEWLINES); | |
| 935 | -$td->runtest("check output", | |
| 936 | - {$td->FILE => "a.pdf"}, | |
| 937 | - {$td->FILE => "bad-data-precheck.pdf"}); | |
| 930 | + | |
| 938 | 931 | show_ntests(); |
| 939 | 932 | # ---------- |
| 940 | 933 | $td->notify("--- Decode levels ---"); |
| 941 | -$n_tests += 10; | |
| 934 | +$n_tests += 12; | |
| 942 | 935 | |
| 943 | 936 | # image-streams.pdf is the output of examples/pdf-create. |
| 944 | 937 | # examples/pdf-create validates the actual image data. |
| ... | ... | @@ -962,6 +955,14 @@ $td->runtest("check finds bad jpeg data", |
| 962 | 955 | {$td->FILE => "bad-jpeg-check.out", |
| 963 | 956 | $td->EXIT_STATUS => 3}, |
| 964 | 957 | $td->NORMALIZE_NEWLINES); |
| 958 | +$td->runtest("precheck detects bad jpeg data", | |
| 959 | + {$td->COMMAND => "qpdf --static-id --decode-level=all" . | |
| 960 | + " bad-jpeg.pdf a.pdf"}, | |
| 961 | + {$td->FILE => "bad-jpeg.out", $td->EXIT_STATUS => 3}, | |
| 962 | + $td->NORMALIZE_NEWLINES); | |
| 963 | +$td->runtest("check file", | |
| 964 | + {$td->FILE => "a.pdf"}, | |
| 965 | + {$td->FILE => "bad-jpeg-out.pdf"}); | |
| 965 | 966 | $td->runtest("get data", |
| 966 | 967 | {$td->COMMAND => "qpdf --show-object=6" . |
| 967 | 968 | " --filtered-stream-data bad-jpeg.pdf"}, | ... | ... |
qpdf/qtest/qpdf/bad-data-out.pdf
No preview for this file type
qpdf/qtest/qpdf/bad-data-precheck.pdf deleted
No preview for this file type