Commit 30ac51bc78f7d723e4fc94ff8af5527e88b56f23

Authored by Jay Berkenbilt
1 parent 16c19e94

Exclude unreferenced objects in object streams (fixes #520)

ChangeLog
  1 +2021-05-08 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Fix 11-year-old bug of leaving unreferenced objects in preserved
  4 + object streams. Fixes #520.
  5 +
1 2021-04-17 Jay Berkenbilt <ejb@ql.org> 6 2021-04-17 Jay Berkenbilt <ejb@ql.org>
2 7
3 * Portability fix: use tm_gmtoff rather than global timezone 8 * Portability fix: use tm_gmtoff rather than global timezone
@@ -511,9 +511,6 @@ I find it useful to make reference to them in this list. @@ -511,9 +511,6 @@ I find it useful to make reference to them in this list.
511 implemented, update the docs on crypto providers, which mention 511 implemented, update the docs on crypto providers, which mention
512 that this may happen in the future. 512 that this may happen in the future.
513 513
514 - * See if we can avoid preserving unreferenced objects in object  
515 - streams even when preserving the object streams.  
516 -  
517 * Provide APIs for embedded files. See *attachments*.pdf in test 514 * Provide APIs for embedded files. See *attachments*.pdf in test
518 suite. The private method findAttachmentStreams finds at least 515 suite. The private method findAttachmentStreams finds at least
519 cases for modern versions of Adobe Reader (>= 1.7, maybe earlier). 516 cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).
libqpdf/QPDFWriter.cc
@@ -2363,17 +2363,36 @@ QPDFWriter::preserveObjectStreams() @@ -2363,17 +2363,36 @@ QPDFWriter::preserveObjectStreams()
2363 { 2363 {
2364 // Our object_to_object_stream map has to map ObjGen -> ObjGen 2364 // Our object_to_object_stream map has to map ObjGen -> ObjGen
2365 // since we may be generating object streams out of old objects 2365 // since we may be generating object streams out of old objects
2366 - // that have generation numbers greater than zero. However in an 2366 + // that have generation numbers greater than zero. However in an
2367 // existing PDF, all object stream objects and all objects in them 2367 // existing PDF, all object stream objects and all objects in them
2368 // must have generation 0 because the PDF spec does not provide 2368 // must have generation 0 because the PDF spec does not provide
2369 - // any way to do otherwise. 2369 + // any way to do otherwise. This code filters out objects that are
  2370 + // not allowed to be in object streams. In addition to removing
  2371 + // objects that were erroneously included in object streams in the
  2372 + // source PDF, it also prevents unreferenced objects from being
  2373 + // included.
  2374 + std::set<QPDFObjGen> eligible;
  2375 + if (! this->m->preserve_unreferenced_objects)
  2376 + {
  2377 + std::vector<QPDFObjGen> eligible_v =
  2378 + QPDF::Writer::getCompressibleObjGens(this->m->pdf);
  2379 + eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end());
  2380 + }
  2381 + QTC::TC("qpdf", "QPDFWriter preserve object streams",
  2382 + this->m->preserve_unreferenced_objects ? 0 : 1);
2370 std::map<int, int> omap; 2383 std::map<int, int> omap;
2371 QPDF::Writer::getObjectStreamData(this->m->pdf, omap); 2384 QPDF::Writer::getObjectStreamData(this->m->pdf, omap);
2372 - for (std::map<int, int>::iterator iter = omap.begin();  
2373 - iter != omap.end(); ++iter) 2385 + for (auto iter: omap)
2374 { 2386 {
2375 - this->m->object_to_object_stream[QPDFObjGen((*iter).first, 0)] =  
2376 - (*iter).second; 2387 + QPDFObjGen og(iter.first, 0);
  2388 + if (eligible.count(og) || this->m->preserve_unreferenced_objects)
  2389 + {
  2390 + this->m->object_to_object_stream[og] = iter.second;
  2391 + }
  2392 + else
  2393 + {
  2394 + QTC::TC("qpdf", "QPDFWriter exclude from object stream");
  2395 + }
2377 } 2396 }
2378 } 2397 }
2379 2398
manual/qpdf-manual.xml
@@ -5061,6 +5061,27 @@ print &quot;\n&quot;; @@ -5061,6 +5061,27 @@ print &quot;\n&quot;;
5061 </varlistentry> 5061 </varlistentry>
5062 --> 5062 -->
5063 <varlistentry> 5063 <varlistentry>
  5064 + <term>10.3.2: May 8, 2021</term>
  5065 + <listitem>
  5066 + <itemizedlist>
  5067 + <listitem>
  5068 + <para>
  5069 + Bug Fixes
  5070 + </para>
  5071 + <itemizedlist>
  5072 + <listitem>
  5073 + <para>
  5074 + When generating a file while preserving object streams,
  5075 + unreferenced objects are correctly removed unless
  5076 + <option>--preserve-unreferenced</option> is specified.
  5077 + </para>
  5078 + </listitem>
  5079 + </itemizedlist>
  5080 + </listitem>
  5081 + </itemizedlist>
  5082 + </listitem>
  5083 + </varlistentry>
  5084 + <varlistentry>
5064 <term>10.3.1: March 11, 2021</term> 5085 <term>10.3.1: March 11, 2021</term>
5065 <listitem> 5086 <listitem>
5066 <itemizedlist> 5087 <itemizedlist>
qpdf/qpdf.testcov
@@ -592,3 +592,5 @@ QPDFAcroFormDocumentHelper AP parse error 0 @@ -592,3 +592,5 @@ QPDFAcroFormDocumentHelper AP parse error 0
592 qpdf copy fields not this file 0 592 qpdf copy fields not this file 0
593 qpdf copy fields non-first from orig 0 593 qpdf copy fields non-first from orig 0
594 QPDF resolve duplicated page in insert 0 594 QPDF resolve duplicated page in insert 0
  595 +QPDFWriter preserve object streams 1
  596 +QPDFWriter exclude from object stream 0
qpdf/qtest/qpdf.test
@@ -986,7 +986,7 @@ my @bug_tests = ( @@ -986,7 +986,7 @@ my @bug_tests = (
986 ["106", "zlib data error", 3], 986 ["106", "zlib data error", 3],
987 ["141a", "/W entry size 0", 2], 987 ["141a", "/W entry size 0", 2],
988 ["141b", "/W entry size 0", 2], 988 ["141b", "/W entry size 0", 2],
989 - ["143", "self-referential ostream", 3], 989 + ["143", "self-referential ostream", 3, "--preserve-unreferenced"],
990 ["146", "very deeply nested array", 2], 990 ["146", "very deeply nested array", 2],
991 ["147", "previously caused memory error", 2], 991 ["147", "previously caused memory error", 2],
992 ["148", "free memory on bad flate", 2], 992 ["148", "free memory on bad flate", 2],
@@ -996,14 +996,18 @@ my @bug_tests = ( @@ -996,14 +996,18 @@ my @bug_tests = (
996 ["263", "empty xref stream", 2], 996 ["263", "empty xref stream", 2],
997 ["335a", "ozz-fuzz-12152", 2], 997 ["335a", "ozz-fuzz-12152", 2],
998 ["335b", "ozz-fuzz-14845", 2], 998 ["335b", "ozz-fuzz-14845", 2],
999 - ["fuzz-16214", "stream in object stream", 3], 999 + ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"],
1000 # When adding to this list, consider adding to SEED_CORPUS_FILES 1000 # When adding to this list, consider adding to SEED_CORPUS_FILES
1001 # in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test. 1001 # in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test.
1002 ); 1002 );
1003 $n_tests += scalar(@bug_tests); 1003 $n_tests += scalar(@bug_tests);
1004 foreach my $d (@bug_tests) 1004 foreach my $d (@bug_tests)
1005 { 1005 {
1006 - my ($n, $description, $exit_status) = @$d; 1006 + my ($n, $description, $exit_status, $xargs) = @$d;
  1007 + if (! defined $xargs)
  1008 + {
  1009 + $xargs = "";
  1010 + }
1007 if (-f "issue-$n.obfuscated") 1011 if (-f "issue-$n.obfuscated")
1008 { 1012 {
1009 # Some of the PDF files in the test suite trigger anti-virus 1013 # Some of the PDF files in the test suite trigger anti-virus
@@ -1025,7 +1029,7 @@ foreach my $d (@bug_tests) @@ -1025,7 +1029,7 @@ foreach my $d (@bug_tests)
1025 { 1029 {
1026 my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n"; 1030 my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n";
1027 $td->runtest($description, 1031 $td->runtest($description,
1028 - {$td->COMMAND => "qpdf $base.pdf a.pdf"}, 1032 + {$td->COMMAND => "qpdf $xargs $base.pdf a.pdf"},
1029 {$td->FILE => "$base.out", 1033 {$td->FILE => "$base.out",
1030 $td->EXIT_STATUS => $exit_status}, 1034 $td->EXIT_STATUS => $exit_status},
1031 $td->NORMALIZE_NEWLINES); 1035 $td->NORMALIZE_NEWLINES);
@@ -1304,7 +1308,8 @@ $n_tests += 2; @@ -1304,7 +1308,8 @@ $n_tests += 2;
1304 # that in turn contains an indirect scalar (bug 2974522). 1308 # that in turn contains an indirect scalar (bug 2974522).
1305 $td->runtest("unreferenced indirect scalar", 1309 $td->runtest("unreferenced indirect scalar",
1306 {$td->COMMAND => 1310 {$td->COMMAND =>
1307 - "qpdf --qdf --static-id --object-streams=preserve" . 1311 + "qpdf --qdf --static-id --preserve-unreferenced" .
  1312 + " --object-streams=preserve" .
1308 " unreferenced-indirect-scalar.pdf a.qdf"}, 1313 " unreferenced-indirect-scalar.pdf a.qdf"},
1309 {$td->STRING => "", 1314 {$td->STRING => "",
1310 $td->EXIT_STATUS => 0}, 1315 $td->EXIT_STATUS => 0},
qpdf/qtest/qpdf/nontrivial-crypt-filter-decrypted.pdf
No preview for this file type
qpdf/qtest/qpdf/sample-form-out.pdf
No preview for this file type
qpdf/qtest/qpdf/unreferenced-indirect-scalar.out
No preview for this file type