Commit 30ac51bc78f7d723e4fc94ff8af5527e88b56f23

Authored by Jay Berkenbilt
1 parent 16c19e94

Exclude unreferenced objects in object streams (fixes #520)

ChangeLog
  1 +2021-05-08 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Fix 11-year-old bug of leaving unreferenced objects in preserved
  4 + object streams. Fixes #520.
  5 +
1 6 2021-04-17 Jay Berkenbilt <ejb@ql.org>
2 7  
3 8 * Portability fix: use tm_gmtoff rather than global timezone
... ...
... ... @@ -511,9 +511,6 @@ I find it useful to make reference to them in this list.
511 511 implemented, update the docs on crypto providers, which mention
512 512 that this may happen in the future.
513 513  
514   - * See if we can avoid preserving unreferenced objects in object
515   - streams even when preserving the object streams.
516   -
517 514 * Provide APIs for embedded files. See *attachments*.pdf in test
518 515 suite. The private method findAttachmentStreams finds at least
519 516 cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).
... ...
libqpdf/QPDFWriter.cc
... ... @@ -2363,17 +2363,36 @@ QPDFWriter::preserveObjectStreams()
2363 2363 {
2364 2364 // Our object_to_object_stream map has to map ObjGen -> ObjGen
2365 2365 // since we may be generating object streams out of old objects
2366   - // that have generation numbers greater than zero. However in an
  2366 + // that have generation numbers greater than zero. However in an
2367 2367 // existing PDF, all object stream objects and all objects in them
2368 2368 // must have generation 0 because the PDF spec does not provide
2369   - // any way to do otherwise.
  2369 + // any way to do otherwise. This code filters out objects that are
  2370 + // not allowed to be in object streams. In addition to removing
  2371 + // objects that were erroneously included in object streams in the
  2372 + // source PDF, it also prevents unreferenced objects from being
  2373 + // included.
  2374 + std::set<QPDFObjGen> eligible;
  2375 + if (! this->m->preserve_unreferenced_objects)
  2376 + {
  2377 + std::vector<QPDFObjGen> eligible_v =
  2378 + QPDF::Writer::getCompressibleObjGens(this->m->pdf);
  2379 + eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end());
  2380 + }
  2381 + QTC::TC("qpdf", "QPDFWriter preserve object streams",
  2382 + this->m->preserve_unreferenced_objects ? 0 : 1);
2370 2383 std::map<int, int> omap;
2371 2384 QPDF::Writer::getObjectStreamData(this->m->pdf, omap);
2372   - for (std::map<int, int>::iterator iter = omap.begin();
2373   - iter != omap.end(); ++iter)
  2385 + for (auto iter: omap)
2374 2386 {
2375   - this->m->object_to_object_stream[QPDFObjGen((*iter).first, 0)] =
2376   - (*iter).second;
  2387 + QPDFObjGen og(iter.first, 0);
  2388 + if (eligible.count(og) || this->m->preserve_unreferenced_objects)
  2389 + {
  2390 + this->m->object_to_object_stream[og] = iter.second;
  2391 + }
  2392 + else
  2393 + {
  2394 + QTC::TC("qpdf", "QPDFWriter exclude from object stream");
  2395 + }
2377 2396 }
2378 2397 }
2379 2398  
... ...
manual/qpdf-manual.xml
... ... @@ -5061,6 +5061,27 @@ print &quot;\n&quot;;
5061 5061 </varlistentry>
5062 5062 -->
5063 5063 <varlistentry>
  5064 + <term>10.3.2: May 8, 2021</term>
  5065 + <listitem>
  5066 + <itemizedlist>
  5067 + <listitem>
  5068 + <para>
  5069 + Bug Fixes
  5070 + </para>
  5071 + <itemizedlist>
  5072 + <listitem>
  5073 + <para>
  5074 + When generating a file while preserving object streams,
  5075 + unreferenced objects are correctly removed unless
  5076 + <option>--preserve-unreferenced</option> is specified.
  5077 + </para>
  5078 + </listitem>
  5079 + </itemizedlist>
  5080 + </listitem>
  5081 + </itemizedlist>
  5082 + </listitem>
  5083 + </varlistentry>
  5084 + <varlistentry>
5064 5085 <term>10.3.1: March 11, 2021</term>
5065 5086 <listitem>
5066 5087 <itemizedlist>
... ...
qpdf/qpdf.testcov
... ... @@ -592,3 +592,5 @@ QPDFAcroFormDocumentHelper AP parse error 0
592 592 qpdf copy fields not this file 0
593 593 qpdf copy fields non-first from orig 0
594 594 QPDF resolve duplicated page in insert 0
  595 +QPDFWriter preserve object streams 1
  596 +QPDFWriter exclude from object stream 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -986,7 +986,7 @@ my @bug_tests = (
986 986 ["106", "zlib data error", 3],
987 987 ["141a", "/W entry size 0", 2],
988 988 ["141b", "/W entry size 0", 2],
989   - ["143", "self-referential ostream", 3],
  989 + ["143", "self-referential ostream", 3, "--preserve-unreferenced"],
990 990 ["146", "very deeply nested array", 2],
991 991 ["147", "previously caused memory error", 2],
992 992 ["148", "free memory on bad flate", 2],
... ... @@ -996,14 +996,18 @@ my @bug_tests = (
996 996 ["263", "empty xref stream", 2],
997 997 ["335a", "ozz-fuzz-12152", 2],
998 998 ["335b", "ozz-fuzz-14845", 2],
999   - ["fuzz-16214", "stream in object stream", 3],
  999 + ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"],
1000 1000 # When adding to this list, consider adding to SEED_CORPUS_FILES
1001 1001 # in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test.
1002 1002 );
1003 1003 $n_tests += scalar(@bug_tests);
1004 1004 foreach my $d (@bug_tests)
1005 1005 {
1006   - my ($n, $description, $exit_status) = @$d;
  1006 + my ($n, $description, $exit_status, $xargs) = @$d;
  1007 + if (! defined $xargs)
  1008 + {
  1009 + $xargs = "";
  1010 + }
1007 1011 if (-f "issue-$n.obfuscated")
1008 1012 {
1009 1013 # Some of the PDF files in the test suite trigger anti-virus
... ... @@ -1025,7 +1029,7 @@ foreach my $d (@bug_tests)
1025 1029 {
1026 1030 my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n";
1027 1031 $td->runtest($description,
1028   - {$td->COMMAND => "qpdf $base.pdf a.pdf"},
  1032 + {$td->COMMAND => "qpdf $xargs $base.pdf a.pdf"},
1029 1033 {$td->FILE => "$base.out",
1030 1034 $td->EXIT_STATUS => $exit_status},
1031 1035 $td->NORMALIZE_NEWLINES);
... ... @@ -1304,7 +1308,8 @@ $n_tests += 2;
1304 1308 # that in turn contains an indirect scalar (bug 2974522).
1305 1309 $td->runtest("unreferenced indirect scalar",
1306 1310 {$td->COMMAND =>
1307   - "qpdf --qdf --static-id --object-streams=preserve" .
  1311 + "qpdf --qdf --static-id --preserve-unreferenced" .
  1312 + " --object-streams=preserve" .
1308 1313 " unreferenced-indirect-scalar.pdf a.qdf"},
1309 1314 {$td->STRING => "",
1310 1315 $td->EXIT_STATUS => 0},
... ...
qpdf/qtest/qpdf/nontrivial-crypt-filter-decrypted.pdf
No preview for this file type
qpdf/qtest/qpdf/sample-form-out.pdf
No preview for this file type
qpdf/qtest/qpdf/unreferenced-indirect-scalar.out
No preview for this file type