Commit 07f6c635a95d8c20040896646394c1e5a8a64784

Authored by Jay Berkenbilt
1 parent 1d96af8b

Bug fix: treat old generations of reused objects as null

ChangeLog
  1 +2024-01-07 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Bug fix: treat references to older generations of objects as
  4 + null.
  5 +
1 6 2024-01-06 Jay Berkenbilt <ejb@ql.org>
2 7  
3 8 * When recovering a file's xref table, attempt to find xref
... ...
include/qpdf/QPDF.hh
... ... @@ -1038,6 +1038,7 @@ class QPDF
1038 1038 QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);
1039 1039 bool isCached(QPDFObjGen const& og);
1040 1040 bool isUnresolved(QPDFObjGen const& og);
  1041 + void removeObject(QPDFObjGen const& og);
1041 1042 void updateCache(
1042 1043 QPDFObjGen const& og,
1043 1044 std::shared_ptr<QPDFObject> const& object,
... ...
libqpdf/QPDF.cc
... ... @@ -706,6 +706,19 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
706 706 // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
707 707 // never depend on its being set.
708 708 m->deleted_objects.clear();
  709 +
  710 + // Make sure we keep only the highest generation for any object.
  711 + QPDFObjGen::set to_delete;
  712 + QPDFObjGen last_og;
  713 + for (auto const& og: m->xref_table) {
  714 + if (og.first.getObj() == last_og.getObj()) {
  715 + to_delete.emplace(last_og);
  716 + }
  717 + last_og = og.first;
  718 + }
  719 + for (auto const& og: to_delete) {
  720 + removeObject(og);
  721 + }
709 722 }
710 723  
711 724 bool
... ... @@ -1979,6 +1992,18 @@ QPDF::replaceObject(QPDFObjGen const&amp; og, QPDFObjectHandle oh)
1979 1992 }
1980 1993  
1981 1994 void
  1995 +QPDF::removeObject(QPDFObjGen const& og)
  1996 +{
  1997 + auto null = QPDFObjectHandle::newNull();
  1998 + m->xref_table.erase(og);
  1999 + if (isCached(og)) {
  2000 + // Take care of any object handles that may be floating around.
  2001 + replaceObject(og, null);
  2002 + }
  2003 + m->obj_cache.erase(og);
  2004 +}
  2005 +
  2006 +void
1982 2007 QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)
1983 2008 {
1984 2009 QTC::TC("qpdf", "QPDF replaceReserved");
... ...
qpdf/qtest/incremental.test
... ... @@ -14,12 +14,8 @@ cleanup();
14 14  
15 15 my $td = new TestDriver('incremental');
16 16  
17   -my $n_tests = 6;
  17 +my $n_tests = 9;
18 18  
19   -# Since the beginning but discovered at the time of releasing 11.8.0:
20   -# qpdf doesn't delete earlier generations of an object when they are
21   -# reused. See also EXPECT_FAILURE in object-stream.test and
22   -# linearization.test.
23 19 $td->runtest("handle delete and reuse",
24 20 {$td->COMMAND => "qpdf --qdf --static-id incremental-1.pdf a.pdf"},
25 21 {$td->STRING => "", $td->EXIT_STATUS => 0},
... ... @@ -27,11 +23,11 @@ $td-&gt;runtest(&quot;handle delete and reuse&quot;,
27 23 $td->runtest("check output",
28 24 {$td->FILE => "a.pdf"},
29 25 {$td->FILE => "incremental-1-out.qdf"},
30   - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE);
  26 + $td->NORMALIZE_NEWLINES);
31 27 $td->runtest("check xref",
32 28 {$td->COMMAND => "qpdf --show-xref incremental-1.pdf"},
33 29 {$td->FILE => "incremental-1-xref.out", $td->EXIT_STATUS => 0},
34   - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE);
  30 + $td->NORMALIZE_NEWLINES);
35 31 $td->runtest("handle delete and reuse",
36 32 {$td->COMMAND => "qpdf --qdf --static-id incremental-2.pdf a.pdf"},
37 33 {$td->STRING => "", $td->EXIT_STATUS => 0},
... ... @@ -40,11 +36,23 @@ $td-&gt;runtest(&quot;check output&quot;,
40 36 {$td->FILE => "a.pdf"},
41 37 # intentionally comparing incremental-2 with incremental-1-out
42 38 {$td->FILE => "incremental-1-out.qdf"},
43   - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE);
  39 + $td->NORMALIZE_NEWLINES);
44 40 $td->runtest("check xref",
45 41 {$td->COMMAND => "qpdf --show-xref incremental-1.pdf"},
46 42 {$td->FILE => "incremental-2-xref.out", $td->EXIT_STATUS => 0},
47   - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE);
  43 + $td->NORMALIZE_NEWLINES);
  44 +$td->runtest("handle delete and don't reuse",
  45 + {$td->COMMAND => "qpdf --qdf --static-id incremental-3.pdf a.pdf"},
  46 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  47 + $td->NORMALIZE_NEWLINES);
  48 +$td->runtest("check output",
  49 + {$td->FILE => "a.pdf"},
  50 + {$td->FILE => "incremental-3-out.qdf"},
  51 + $td->NORMALIZE_NEWLINES);
  52 +$td->runtest("check xref",
  53 + {$td->COMMAND => "qpdf --show-xref incremental-3.pdf"},
  54 + {$td->FILE => "incremental-3-xref.out", $td->EXIT_STATUS => 0},
  55 + $td->NORMALIZE_NEWLINES);
48 56  
49 57 cleanup();
50 58 $td->report($n_tests);
... ...
qpdf/qtest/linearization.test
... ... @@ -84,11 +84,6 @@ foreach my $base (@to_linearize)
84 84 {
85 85 foreach my $omode (qw(disable preserve generate))
86 86 {
87   - my $xflags = 0;
88   - if ($base eq 'gen1')
89   - {
90   - $xflags = $td->EXPECT_FAILURE;
91   - }
92 87 my $oarg = "-object-streams=$omode";
93 88 my $sdarg = "";
94 89 if (($base eq 'lin-special') || ($base eq 'object-stream'))
... ... @@ -100,13 +95,12 @@ foreach my $base (@to_linearize)
100 95 {$td->COMMAND =>
101 96 "qpdf -linearize $oarg $sdarg" .
102 97 " --static-id $base.pdf a.pdf"},
103   - {$td->STRING => "", $td->EXIT_STATUS => 0},
104   - $xflags);
  98 + {$td->STRING => "", $td->EXIT_STATUS => 0});
105 99 $td->runtest("check linearization",
106 100 {$td->COMMAND => "qpdf --check-linearization a.pdf"},
107 101 {$td->STRING => "a.pdf: no linearization errors\n",
108 102 $td->EXIT_STATUS => 0},
109   - $td->NORMALIZE_NEWLINES | $xflags);
  103 + $td->NORMALIZE_NEWLINES);
110 104 # Relinearizing twice should produce identical results. We
111 105 # have to do it twice because, if objects changed ordering
112 106 # during the original linearization, the hint tables won't
... ... @@ -117,17 +111,14 @@ foreach my $base (@to_linearize)
117 111 $td->runtest("relinearize $base 1",
118 112 {$td->COMMAND =>
119 113 "qpdf -linearize $sdarg --static-id a.pdf b.pdf"},
120   - {$td->STRING => "", $td->EXIT_STATUS => 0},
121   - $xflags);
  114 + {$td->STRING => "", $td->EXIT_STATUS => 0});
122 115 $td->runtest("relinearize $base 2",
123 116 {$td->COMMAND =>
124 117 "qpdf -linearize $sdarg --static-id b.pdf c.pdf"},
125   - {$td->STRING => "", $td->EXIT_STATUS => 0},
126   - $xflags);
  118 + {$td->STRING => "", $td->EXIT_STATUS => 0});
127 119 $td->runtest("compare files ($omode)",
128 120 {$td->FILE => "b.pdf"},
129   - {$td->FILE => "c.pdf"},
130   - $xflags);
  121 + {$td->FILE => "c.pdf"});
131 122 if (($base eq 'lin-special') || ($base eq 'object-stream'))
132 123 {
133 124 $td->runtest("check $base ($omode)",
... ...
qpdf/qtest/object-stream.test
... ... @@ -82,13 +82,9 @@ $td-&gt;runtest(&quot;generate object streams for gen &gt; 0&quot;,
82 82 {$td->COMMAND => "qpdf --qdf --static-id" .
83 83 " --object-streams=generate gen1.pdf a.pdf"},
84 84 {$td->STRING => "", $td->EXIT_STATUS => 0});
85   -# qpdf 11.8.0 -- it was discovered that qpdf was incorrectly handling
86   -# references to older generations of reused objects in incrementally
87   -# updated files.
88 85 $td->runtest("check file",
89 86 {$td->FILE => "a.pdf"},
90   - {$td->FILE => "gen1.qdf"},
91   - $td->EXPECT_FAILURE);
  87 + {$td->FILE => "gen1.qdf"});
92 88  
93 89 $td->runtest("generate object streams for gen > 0",
94 90 {$td->COMMAND => "qpdf --qdf --static-id" .
... ...
qpdf/qtest/qpdf/incremental-2-xref.out
1 1 1/0: uncompressed; offset = 9
2 2 2/0: uncompressed; offset = 63
3 3 3/0: uncompressed; offset = 1069
4   -4/0: uncompressed; offset = 307
  4 +4/1: uncompressed; offset = 948
5 5 5/0: uncompressed; offset = 403
6 6 6/0: uncompressed; offset = 438
7 7 7/0: uncompressed; offset = 974
... ...
qpdf/qtest/qpdf/incremental-3-out.qdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 2 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 2 0
  14 +2 0 obj
  15 +<<
  16 + /Count 1
  17 + /Kids [
  18 + 3 0 R
  19 + ]
  20 + /Type /Pages
  21 +>>
  22 +endobj
  23 +
  24 +%% Page 1
  25 +%% Original object ID: 3 0
  26 +3 0 obj
  27 +<<
  28 + /MediaBox [
  29 + 0
  30 + 0
  31 + 612
  32 + 792
  33 + ]
  34 + /Parent 2 0 R
  35 + /Resources <<
  36 + /Font <<
  37 + /F1 4 0 R
  38 + >>
  39 + /ProcSet 5 0 R
  40 + >>
  41 + /Type /Page
  42 +>>
  43 +endobj
  44 +
  45 +%% Original object ID: 6 0
  46 +4 0 obj
  47 +<<
  48 + /BaseFont /Helvetica
  49 + /Encoding /WinAnsiEncoding
  50 + /Name /F1
  51 + /Subtype /Type1
  52 + /Type /Font
  53 +>>
  54 +endobj
  55 +
  56 +%% Original object ID: 5 0
  57 +5 0 obj
  58 +[
  59 + /PDF
  60 + /Text
  61 +]
  62 +endobj
  63 +
  64 +xref
  65 +0 6
  66 +0000000000 65535 f
  67 +0000000052 00000 n
  68 +0000000133 00000 n
  69 +0000000242 00000 n
  70 +0000000443 00000 n
  71 +0000000588 00000 n
  72 +trailer <<
  73 + /Root 1 0 R
  74 + /Size 6
  75 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
  76 +>>
  77 +startxref
  78 +623
  79 +%%EOF
... ...
qpdf/qtest/qpdf/incremental-3-xref.out 0 → 100644
  1 +1/0: uncompressed; offset = 9
  2 +2/0: uncompressed; offset = 63
  3 +3/0: uncompressed; offset = 135
  4 +5/0: uncompressed; offset = 403
  5 +6/0: uncompressed; offset = 438
... ...
qpdf/qtest/qpdf/incremental-3.pdf 0 → 100644
  1 +%PDF-1.3
  2 +1 0 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 2 0 R
  6 +>>
  7 +endobj
  8 +
  9 +2 0 obj
  10 +<<
  11 + /Type /Pages
  12 + /Kids [
  13 + 3 0 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 0 obj
  20 +<<
  21 + /Type /Page
  22 + /Parent 2 0 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 0 R
  25 + /Resources <<
  26 + /ProcSet 5 0 R
  27 + /Font <<
  28 + /F1 6 0 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 0 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 0 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 0 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00000 n
  68 +0000000063 00000 n
  69 +0000000135 00000 n
  70 +0000000307 00000 n
  71 +0000000403 00000 n
  72 +0000000438 00000 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 0 R
  76 +>>
  77 +startxref
  78 +556
  79 +%%EOF
  80 +
  81 +% Delete object 4 and increment generation
  82 +xref
  83 +0 1
  84 +0000000004 65535 f
  85 +4 1
  86 +0000000000 00001 f
  87 +trailer <<
  88 + /Size 7
  89 + /Root 1 0 R
  90 + /Prev 556
  91 + /Gone 4 0 R
  92 +>>
  93 +startxref
  94 +807
  95 +%%EOF
... ...