Commit 30ac51bc78f7d723e4fc94ff8af5527e88b56f23
1 parent
16c19e94
Exclude unreferenced objects in object streams (fixes #520)
Showing
9 changed files
with
1059 additions
and
1848 deletions
ChangeLog
TODO
| ... | ... | @@ -511,9 +511,6 @@ I find it useful to make reference to them in this list. |
| 511 | 511 | implemented, update the docs on crypto providers, which mention |
| 512 | 512 | that this may happen in the future. |
| 513 | 513 | |
| 514 | - * See if we can avoid preserving unreferenced objects in object | |
| 515 | - streams even when preserving the object streams. | |
| 516 | - | |
| 517 | 514 | * Provide APIs for embedded files. See *attachments*.pdf in test |
| 518 | 515 | suite. The private method findAttachmentStreams finds at least |
| 519 | 516 | cases for modern versions of Adobe Reader (>= 1.7, maybe earlier). | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -2363,17 +2363,36 @@ QPDFWriter::preserveObjectStreams() |
| 2363 | 2363 | { |
| 2364 | 2364 | // Our object_to_object_stream map has to map ObjGen -> ObjGen |
| 2365 | 2365 | // since we may be generating object streams out of old objects |
| 2366 | - // that have generation numbers greater than zero. However in an | |
| 2366 | + // that have generation numbers greater than zero. However in an | |
| 2367 | 2367 | // existing PDF, all object stream objects and all objects in them |
| 2368 | 2368 | // must have generation 0 because the PDF spec does not provide |
| 2369 | - // any way to do otherwise. | |
| 2369 | + // any way to do otherwise. This code filters out objects that are | |
| 2370 | + // not allowed to be in object streams. In addition to removing | |
| 2371 | + // objects that were erroneously included in object streams in the | |
| 2372 | + // source PDF, it also prevents unreferenced objects from being | |
| 2373 | + // included. | |
| 2374 | + std::set<QPDFObjGen> eligible; | |
| 2375 | + if (! this->m->preserve_unreferenced_objects) | |
| 2376 | + { | |
| 2377 | + std::vector<QPDFObjGen> eligible_v = | |
| 2378 | + QPDF::Writer::getCompressibleObjGens(this->m->pdf); | |
| 2379 | + eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end()); | |
| 2380 | + } | |
| 2381 | + QTC::TC("qpdf", "QPDFWriter preserve object streams", | |
| 2382 | + this->m->preserve_unreferenced_objects ? 0 : 1); | |
| 2370 | 2383 | std::map<int, int> omap; |
| 2371 | 2384 | QPDF::Writer::getObjectStreamData(this->m->pdf, omap); |
| 2372 | - for (std::map<int, int>::iterator iter = omap.begin(); | |
| 2373 | - iter != omap.end(); ++iter) | |
| 2385 | + for (auto iter: omap) | |
| 2374 | 2386 | { |
| 2375 | - this->m->object_to_object_stream[QPDFObjGen((*iter).first, 0)] = | |
| 2376 | - (*iter).second; | |
| 2387 | + QPDFObjGen og(iter.first, 0); | |
| 2388 | + if (eligible.count(og) || this->m->preserve_unreferenced_objects) | |
| 2389 | + { | |
| 2390 | + this->m->object_to_object_stream[og] = iter.second; | |
| 2391 | + } | |
| 2392 | + else | |
| 2393 | + { | |
| 2394 | + QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | |
| 2395 | + } | |
| 2377 | 2396 | } |
| 2378 | 2397 | } |
| 2379 | 2398 | ... | ... |
manual/qpdf-manual.xml
| ... | ... | @@ -5061,6 +5061,27 @@ print "\n"; |
| 5061 | 5061 | </varlistentry> |
| 5062 | 5062 | --> |
| 5063 | 5063 | <varlistentry> |
| 5064 | + <term>10.3.2: May 8, 2021</term> | |
| 5065 | + <listitem> | |
| 5066 | + <itemizedlist> | |
| 5067 | + <listitem> | |
| 5068 | + <para> | |
| 5069 | + Bug Fixes | |
| 5070 | + </para> | |
| 5071 | + <itemizedlist> | |
| 5072 | + <listitem> | |
| 5073 | + <para> | |
| 5074 | + When generating a file while preserving object streams, | |
| 5075 | + unreferenced objects are correctly removed unless | |
| 5076 | + <option>--preserve-unreferenced</option> is specified. | |
| 5077 | + </para> | |
| 5078 | + </listitem> | |
| 5079 | + </itemizedlist> | |
| 5080 | + </listitem> | |
| 5081 | + </itemizedlist> | |
| 5082 | + </listitem> | |
| 5083 | + </varlistentry> | |
| 5084 | + <varlistentry> | |
| 5064 | 5085 | <term>10.3.1: March 11, 2021</term> |
| 5065 | 5086 | <listitem> |
| 5066 | 5087 | <itemizedlist> | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -592,3 +592,5 @@ QPDFAcroFormDocumentHelper AP parse error 0 |
| 592 | 592 | qpdf copy fields not this file 0 |
| 593 | 593 | qpdf copy fields non-first from orig 0 |
| 594 | 594 | QPDF resolve duplicated page in insert 0 |
| 595 | +QPDFWriter preserve object streams 1 | |
| 596 | +QPDFWriter exclude from object stream 0 | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -986,7 +986,7 @@ my @bug_tests = ( |
| 986 | 986 | ["106", "zlib data error", 3], |
| 987 | 987 | ["141a", "/W entry size 0", 2], |
| 988 | 988 | ["141b", "/W entry size 0", 2], |
| 989 | - ["143", "self-referential ostream", 3], | |
| 989 | + ["143", "self-referential ostream", 3, "--preserve-unreferenced"], | |
| 990 | 990 | ["146", "very deeply nested array", 2], |
| 991 | 991 | ["147", "previously caused memory error", 2], |
| 992 | 992 | ["148", "free memory on bad flate", 2], |
| ... | ... | @@ -996,14 +996,18 @@ my @bug_tests = ( |
| 996 | 996 | ["263", "empty xref stream", 2], |
| 997 | 997 | ["335a", "ozz-fuzz-12152", 2], |
| 998 | 998 | ["335b", "ozz-fuzz-14845", 2], |
| 999 | - ["fuzz-16214", "stream in object stream", 3], | |
| 999 | + ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"], | |
| 1000 | 1000 | # When adding to this list, consider adding to SEED_CORPUS_FILES |
| 1001 | 1001 | # in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test. |
| 1002 | 1002 | ); |
| 1003 | 1003 | $n_tests += scalar(@bug_tests); |
| 1004 | 1004 | foreach my $d (@bug_tests) |
| 1005 | 1005 | { |
| 1006 | - my ($n, $description, $exit_status) = @$d; | |
| 1006 | + my ($n, $description, $exit_status, $xargs) = @$d; | |
| 1007 | + if (! defined $xargs) | |
| 1008 | + { | |
| 1009 | + $xargs = ""; | |
| 1010 | + } | |
| 1007 | 1011 | if (-f "issue-$n.obfuscated") |
| 1008 | 1012 | { |
| 1009 | 1013 | # Some of the PDF files in the test suite trigger anti-virus |
| ... | ... | @@ -1025,7 +1029,7 @@ foreach my $d (@bug_tests) |
| 1025 | 1029 | { |
| 1026 | 1030 | my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n"; |
| 1027 | 1031 | $td->runtest($description, |
| 1028 | - {$td->COMMAND => "qpdf $base.pdf a.pdf"}, | |
| 1032 | + {$td->COMMAND => "qpdf $xargs $base.pdf a.pdf"}, | |
| 1029 | 1033 | {$td->FILE => "$base.out", |
| 1030 | 1034 | $td->EXIT_STATUS => $exit_status}, |
| 1031 | 1035 | $td->NORMALIZE_NEWLINES); |
| ... | ... | @@ -1304,7 +1308,8 @@ $n_tests += 2; |
| 1304 | 1308 | # that in turn contains an indirect scalar (bug 2974522). |
| 1305 | 1309 | $td->runtest("unreferenced indirect scalar", |
| 1306 | 1310 | {$td->COMMAND => |
| 1307 | - "qpdf --qdf --static-id --object-streams=preserve" . | |
| 1311 | + "qpdf --qdf --static-id --preserve-unreferenced" . | |
| 1312 | + " --object-streams=preserve" . | |
| 1308 | 1313 | " unreferenced-indirect-scalar.pdf a.qdf"}, |
| 1309 | 1314 | {$td->STRING => "", |
| 1310 | 1315 | $td->EXIT_STATUS => 0}, | ... | ... |
qpdf/qtest/qpdf/nontrivial-crypt-filter-decrypted.pdf
No preview for this file type
qpdf/qtest/qpdf/sample-form-out.pdf
No preview for this file type
qpdf/qtest/qpdf/unreferenced-indirect-scalar.out
No preview for this file type