Commit aa583f293a01ddb25866702fc94766d83b795722

Authored by m-holger
Committed by GitHub
2 parents d9e230bf e7dcbe93

Merge pull request #1340 from m-holger/i1286

Change QPDFWriter stream_decode_level default to qpdf_dl_generalized (fixes #1286)
ChangeLog
... ... @@ -9,6 +9,11 @@
9 9 * Add C API function qpdf_oh_free_buffer to release memory allocated
10 10 by stream data functions.
11 11  
  12 +2024-09-19 M Holger <m.holger@qpdf.org>
  13 +
  14 + * Bug fix: QPDFWriter stream DecodeLevel incorrectly defaulted to
  15 + none instead of generalied. Fixes #1286.
  16 +
12 17 2024-08-25 M Holger <m.holger@qpdf.org>
13 18  
14 19 * Add new command-line arguments --remove-metadata and --remove-info
... ...
libqpdf/QPDFWriter.cc
... ... @@ -26,6 +26,8 @@
26 26 #include <cstdlib>
27 27 #include <stdexcept>
28 28  
  29 +using namespace std::literals;
  30 +
29 31 QPDFWriter::ProgressReporter::~ProgressReporter() // NOLINT (modernize-use-equals-default)
30 32 {
31 33 // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
... ... @@ -1249,7 +1251,7 @@ QPDFWriter::willFilterStream(
1249 1251 if (stream_dict.isDictionaryOfType("/Metadata")) {
1250 1252 is_metadata = true;
1251 1253 }
1252   - bool filter = (stream.isDataModified() || m->compress_streams || m->stream_decode_level);
  1254 + bool filter = stream.isDataModified() || m->compress_streams || m->stream_decode_level;
1253 1255 bool filter_on_write = stream.getFilterOnWrite();
1254 1256 if (!filter_on_write) {
1255 1257 QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
... ... @@ -1261,15 +1263,15 @@ QPDFWriter::willFilterStream(
1261 1263 // CPU cycles uncompressing and recompressing stuff. This can be overridden with
1262 1264 // setRecompressFlate(true).
1263 1265 QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1264   - if ((!m->recompress_flate) && (!stream.isDataModified()) && filter_obj.isName() &&
1265   - ((filter_obj.getName() == "/FlateDecode") || (filter_obj.getName() == "/Fl"))) {
  1266 + if (!m->recompress_flate && !stream.isDataModified() && filter_obj.isName() &&
  1267 + (filter_obj.getName() == "/FlateDecode" || filter_obj.getName() == "/Fl")) {
1266 1268 QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1267 1269 filter = false;
1268 1270 }
1269 1271 }
1270 1272 bool normalize = false;
1271 1273 bool uncompress = false;
1272   - if (filter_on_write && is_metadata && ((!m->encrypted) || (m->encrypt_metadata == false))) {
  1274 + if (filter_on_write && is_metadata && (!m->encrypted || !m->encrypt_metadata)) {
1273 1275 QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1274 1276 filter = true;
1275 1277 compress_stream = false;
... ... @@ -1283,29 +1285,37 @@ QPDFWriter::willFilterStream(
1283 1285 }
1284 1286  
1285 1287 bool filtered = false;
1286   - for (int attempt = 1; attempt <= 2; ++attempt) {
  1288 + for (bool first_attempt: {true, false}) {
1287 1289 pushPipeline(new Pl_Buffer("stream data"));
1288 1290 PipelinePopper pp_stream_data(this, stream_data);
1289 1291 activatePipelineStack(pp_stream_data);
1290 1292 try {
1291 1293 filtered = stream.pipeStreamData(
1292 1294 m->pipeline,
1293   - (((filter && normalize) ? qpdf_ef_normalize : 0) |
1294   - ((filter && compress_stream) ? qpdf_ef_compress : 0)),
1295   - (filter ? (uncompress ? qpdf_dl_all : m->stream_decode_level) : qpdf_dl_none),
  1295 + !filter ? 0
  1296 + : ((normalize ? qpdf_ef_normalize : 0) |
  1297 + (compress_stream ? qpdf_ef_compress : 0)),
  1298 + !filter ? qpdf_dl_none : (uncompress ? qpdf_dl_all : m->stream_decode_level),
1296 1299 false,
1297   - (attempt == 1));
  1300 + first_attempt);
  1301 + if (filter && !filtered) {
  1302 + // Try again
  1303 + filter = false;
  1304 + stream.setFilterOnWrite(false);
  1305 + } else {
  1306 + break;
  1307 + }
1298 1308 } catch (std::runtime_error& e) {
  1309 + if (filter && first_attempt) {
  1310 + stream.warnIfPossible("error while getting stream data: "s + e.what());
  1311 + stream.warnIfPossible("qpdf will attempt to write the damaged stream unchanged");
  1312 + filter = false;
  1313 + stream.setFilterOnWrite(false);
  1314 + continue;
  1315 + }
1299 1316 throw std::runtime_error(
1300 1317 "error while getting stream data for " + stream.unparse() + ": " + e.what());
1301 1318 }
1302   - if (filter && !filtered) {
1303   - // Try again
1304   - filter = false;
1305   - stream.setFilterOnWrite(false);
1306   - } else {
1307   - break;
1308   - }
1309 1319 }
1310 1320 if (!filtered) {
1311 1321 compress_stream = false;
... ... @@ -1866,7 +1876,7 @@ QPDFWriter::generateID()
1866 1876 if (m->deterministic_id) {
1867 1877 if (m->deterministic_id_data.empty()) {
1868 1878 QTC::TC("qpdf", "QPDFWriter deterministic with no data");
1869   - throw std::logic_error("INTERNAL ERROR: QPDFWriter::generateID has no data for "
  1879 + throw std::runtime_error("INTERNAL ERROR: QPDFWriter::generateID has no data for "
1870 1880 "deterministic ID. This may happen if deterministic ID and "
1871 1881 "file encryption are requested together.");
1872 1882 }
... ... @@ -2116,7 +2126,7 @@ QPDFWriter::doWriteSetup()
2116 2126 if (m->encrypted) {
2117 2127 // Encryption has been explicitly set
2118 2128 m->preserve_encryption = false;
2119   - } else if (m->normalize_content || m->stream_decode_level || m->pclm || m->qdf_mode) {
  2129 + } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
2120 2130 // Encryption makes looking at contents pretty useless. If the user explicitly encrypted
2121 2131 // though, we still obey that.
2122 2132 m->preserve_encryption = false;
... ...
libqpdf/qpdf/QPDFWriter_private.hh
... ... @@ -65,7 +65,7 @@ class QPDFWriter::Members
65 65 bool normalize_content{false};
66 66 bool compress_streams{true};
67 67 bool compress_streams_set{false};
68   - qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none};
  68 + qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_generalized};
69 69 bool stream_decode_level_set{false};
70 70 bool recompress_flate{false};
71 71 bool qdf_mode{false};
... ...
qpdf/qtest/encryption.test
... ... @@ -723,7 +723,7 @@ $td-&gt;runtest(&quot;check file&#39;s validity&quot;,
723 723 $td->runtest("handle missing/invalid Length",
724 724 {$td->COMMAND => "qpdf --check bad-encryption-length.pdf"},
725 725 {$td->FILE => "bad-encryption-length.out",
726   - $td->EXIT_STATUS => 0},
  726 + $td->EXIT_STATUS => 3},
727 727 $td->NORMALIZE_NEWLINES);
728 728  
729 729 cleanup();
... ...
qpdf/qtest/inline-images.test
... ... @@ -56,7 +56,7 @@ $td-&gt;runtest(&quot;externalize damaged image&quot;,
56 56 "qpdf --externalize-inline-images" .
57 57 " --compress-streams=n --static-id" .
58 58 " damaged-inline-image.pdf a.pdf"},
59   - {$td->STRING => "", $td->EXIT_STATUS => 0},
  59 + {$td->FILE => "damaged-inline-image.out", $td->EXIT_STATUS => 3},
60 60 $td->NORMALIZE_NEWLINES);
61 61 $td->runtest("check output",
62 62 {$td->FILE => "a.pdf"},
... ...
qpdf/qtest/qpdf/bad-encryption-length.out
... ... @@ -14,5 +14,5 @@ modify annotations: allowed
14 14 modify other: allowed
15 15 modify anything: allowed
16 16 File is not linearized
17   -No syntax or stream encoding errors found; the file may still contain
18   -errors that qpdf cannot detect
  17 +WARNING: bad-encryption-length.pdf, object 7 0 at offset 531 -> dictionary key /Length: operation for integer attempted on object of type null: returning 0
  18 +qpdf: operation succeeded with warnings
... ...
qpdf/qtest/qpdf/damaged-inline-image.out 0 → 100644
  1 +WARNING: damaged-inline-image.pdf, stream object 8 0: error while getting stream data: stream inflate: inflate: data: invalid distance too far back
  2 +WARNING: damaged-inline-image.pdf, stream object 8 0: qpdf will attempt to write the damaged stream unchanged
  3 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/fuzz-16214.out
... ... @@ -6,6 +6,11 @@ WARNING: fuzz-16214.pdf (xref stream, offset 116): Cross-reference stream data h
6 6 WARNING: fuzz-16214.pdf: reported number of objects (6) is not one plus the highest object number (35)
7 7 WARNING: fuzz-16214.pdf (object 14 0, offset 652): expected dictionary key but found non-name object; inserting key /QPDFFake1
8 8 WARNING: fuzz-16214.pdf (object 14 0, offset 734): expected endobj
  9 +WARNING: fuzz-16214.pdf (object 15 0, offset 869): unknown token while reading object; treating as string
  10 +WARNING: fuzz-16214.pdf (object 15 0, offset 745): expected dictionary key but found non-name object; inserting key /QPDFFake1
  11 +WARNING: fuzz-16214.pdf (object 15 0, offset 894): expected endobj
  12 +WARNING: fuzz-16214.pdf, object 15 0 at offset 745: kid 0 (from 0) MediaBox is undefined; setting to letter / ANSI A
  13 +WARNING: fuzz-16214.pdf, object 15 0 at offset 745: /Type key should be /Page but is not; overriding
9 14 WARNING: fuzz-16214.pdf: file is damaged
10 15 WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj
11 16 WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table
... ...
qpdf/qtest/qpdf/invalid-id-xref.out
... ... @@ -15,4 +15,6 @@ modify annotations: allowed
15 15 modify other: not allowed
16 16 modify anything: not allowed
17 17 File is not linearized
  18 +WARNING: invalid-id-xref.pdf, trailer at offset 910 -> dictionary key /ID: operation for array attempted on object of type null: returning null
  19 +WARNING: invalid-id-xref.pdf, trailer at offset 910 -> dictionary key /ID -> null returned from invalid array access: operation for string attempted on object of type null: returning empty string
18 20 qpdf: operation succeeded with warnings
... ...
qpdf/qtest/qpdf/job-json-input-file-password.pdf
1 1 Binary files a/qpdf/qtest/qpdf/job-json-input-file-password.pdf and b/qpdf/qtest/qpdf/job-json-input-file-password.pdf differ
... ...
qpdf/qtest/qpdf/qjson-bad-data.out
  1 +WARNING: qjson-bad-data.json, obj:4 0 R at offset 0: error while getting stream data: base64-decode: base64 decode: invalid input
  2 +WARNING: qjson-bad-data.json, obj:4 0 R at offset 0: qpdf will attempt to write the damaged stream unchanged
1 3 qpdf: error while getting stream data for 4 0 R: base64-decode: base64 decode: invalid input
... ...
qpdf/qtest/qpdf/qjson-bad-datafile.out
  1 +WARNING: qjson-bad-datafile.json, obj:4 0 R at offset 0: error while getting stream data: open file does not exist: No such file or directory
  2 +WARNING: qjson-bad-datafile.json, obj:4 0 R at offset 0: qpdf will attempt to write the damaged stream unchanged
1 3 qpdf: error while getting stream data for 4 0 R: open file does not exist: No such file or directory
... ...