Commit 8ba968bd86a34ee09fb9e463381be10286fefabb

Authored by Jay Berkenbilt
Committed by GitHub
2 parents 1d96af8b 8715d6a6

Merge pull request #1112 from m-holger/gcog

Tweaks to QPDF::read_xref, removeObject and getCompressibleObjGens
ChangeLog
  1 +2024-01-07 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Bug fix: treat references to older generations of objects as
  4 + null.
  5 +
1 2024-01-06 Jay Berkenbilt <ejb@ql.org> 6 2024-01-06 Jay Berkenbilt <ejb@ql.org>
2 7
3 * When recovering a file's xref table, attempt to find xref 8 * When recovering a file's xref table, attempt to find xref
include/qpdf/QPDF.hh
@@ -1038,6 +1038,7 @@ class QPDF @@ -1038,6 +1038,7 @@ class QPDF
1038 QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj); 1038 QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);
1039 bool isCached(QPDFObjGen const& og); 1039 bool isCached(QPDFObjGen const& og);
1040 bool isUnresolved(QPDFObjGen const& og); 1040 bool isUnresolved(QPDFObjGen const& og);
  1041 + void removeObject(QPDFObjGen og);
1041 void updateCache( 1042 void updateCache(
1042 QPDFObjGen const& og, 1043 QPDFObjGen const& og,
1043 std::shared_ptr<QPDFObject> const& object, 1044 std::shared_ptr<QPDFObject> const& object,
libqpdf/QPDF.cc
@@ -706,6 +706,14 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -706,6 +706,14 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
706 // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we 706 // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
707 // never depend on its being set. 707 // never depend on its being set.
708 m->deleted_objects.clear(); 708 m->deleted_objects.clear();
  709 +
  710 + // Make sure we keep only the highest generation for any object.
  711 + QPDFObjGen last_og{-1, 0};
  712 + for (auto const& [og, _xref]: m->xref_table) {
  713 + if (og.getObj() == last_og.getObj())
  714 + removeObject(last_og);
  715 + last_og = og;
  716 + }
709 } 717 }
710 718
711 bool 719 bool
@@ -1979,6 +1987,18 @@ QPDF::replaceObject(QPDFObjGen const&amp; og, QPDFObjectHandle oh) @@ -1979,6 +1987,18 @@ QPDF::replaceObject(QPDFObjGen const&amp; og, QPDFObjectHandle oh)
1979 } 1987 }
1980 1988
1981 void 1989 void
  1990 +QPDF::removeObject(QPDFObjGen og)
  1991 +{
  1992 + m->xref_table.erase(og);
  1993 + if (auto cached = m->obj_cache.find(og); cached != m->obj_cache.end()) {
  1994 + // Take care of any object handles that may be floating around.
  1995 + cached->second.object->assign(QPDF_Null::create());
  1996 + cached->second.object->setObjGen(nullptr, QPDFObjGen());
  1997 + m->obj_cache.erase(cached);
  1998 + }
  1999 +}
  2000 +
  2001 +void
1982 QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement) 2002 QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)
1983 { 2003 {
1984 QTC::TC("qpdf", "QPDF replaceReserved"); 2004 QTC::TC("qpdf", "QPDF replaceReserved");
qpdf/qtest/incremental.test
@@ -14,12 +14,8 @@ cleanup(); @@ -14,12 +14,8 @@ cleanup();
14 14
15 my $td = new TestDriver('incremental'); 15 my $td = new TestDriver('incremental');
16 16
17 -my $n_tests = 6; 17 +my $n_tests = 9;
18 18
19 -# Since the beginning but discovered at the time of releasing 11.8.0:  
20 -# qpdf doesn't delete earlier generations of an object when they are  
21 -# reused. See also EXPECT_FAILURE in object-stream.test and  
22 -# linearization.test.  
23 $td->runtest("handle delete and reuse", 19 $td->runtest("handle delete and reuse",
24 {$td->COMMAND => "qpdf --qdf --static-id incremental-1.pdf a.pdf"}, 20 {$td->COMMAND => "qpdf --qdf --static-id incremental-1.pdf a.pdf"},
25 {$td->STRING => "", $td->EXIT_STATUS => 0}, 21 {$td->STRING => "", $td->EXIT_STATUS => 0},
@@ -27,11 +23,11 @@ $td-&gt;runtest(&quot;handle delete and reuse&quot;, @@ -27,11 +23,11 @@ $td-&gt;runtest(&quot;handle delete and reuse&quot;,
27 $td->runtest("check output", 23 $td->runtest("check output",
28 {$td->FILE => "a.pdf"}, 24 {$td->FILE => "a.pdf"},
29 {$td->FILE => "incremental-1-out.qdf"}, 25 {$td->FILE => "incremental-1-out.qdf"},
30 - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE); 26 + $td->NORMALIZE_NEWLINES);
31 $td->runtest("check xref", 27 $td->runtest("check xref",
32 {$td->COMMAND => "qpdf --show-xref incremental-1.pdf"}, 28 {$td->COMMAND => "qpdf --show-xref incremental-1.pdf"},
33 {$td->FILE => "incremental-1-xref.out", $td->EXIT_STATUS => 0}, 29 {$td->FILE => "incremental-1-xref.out", $td->EXIT_STATUS => 0},
34 - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE); 30 + $td->NORMALIZE_NEWLINES);
35 $td->runtest("handle delete and reuse", 31 $td->runtest("handle delete and reuse",
36 {$td->COMMAND => "qpdf --qdf --static-id incremental-2.pdf a.pdf"}, 32 {$td->COMMAND => "qpdf --qdf --static-id incremental-2.pdf a.pdf"},
37 {$td->STRING => "", $td->EXIT_STATUS => 0}, 33 {$td->STRING => "", $td->EXIT_STATUS => 0},
@@ -40,11 +36,23 @@ $td-&gt;runtest(&quot;check output&quot;, @@ -40,11 +36,23 @@ $td-&gt;runtest(&quot;check output&quot;,
40 {$td->FILE => "a.pdf"}, 36 {$td->FILE => "a.pdf"},
41 # intentionally comparing incremental-2 with incremental-1-out 37 # intentionally comparing incremental-2 with incremental-1-out
42 {$td->FILE => "incremental-1-out.qdf"}, 38 {$td->FILE => "incremental-1-out.qdf"},
43 - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE); 39 + $td->NORMALIZE_NEWLINES);
44 $td->runtest("check xref", 40 $td->runtest("check xref",
45 {$td->COMMAND => "qpdf --show-xref incremental-1.pdf"}, 41 {$td->COMMAND => "qpdf --show-xref incremental-1.pdf"},
46 {$td->FILE => "incremental-2-xref.out", $td->EXIT_STATUS => 0}, 42 {$td->FILE => "incremental-2-xref.out", $td->EXIT_STATUS => 0},
47 - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE); 43 + $td->NORMALIZE_NEWLINES);
  44 +$td->runtest("handle delete and don't reuse",
  45 + {$td->COMMAND => "qpdf --qdf --static-id incremental-3.pdf a.pdf"},
  46 + {$td->STRING => "", $td->EXIT_STATUS => 0},
  47 + $td->NORMALIZE_NEWLINES);
  48 +$td->runtest("check output",
  49 + {$td->FILE => "a.pdf"},
  50 + {$td->FILE => "incremental-3-out.qdf"},
  51 + $td->NORMALIZE_NEWLINES);
  52 +$td->runtest("check xref",
  53 + {$td->COMMAND => "qpdf --show-xref incremental-3.pdf"},
  54 + {$td->FILE => "incremental-3-xref.out", $td->EXIT_STATUS => 0},
  55 + $td->NORMALIZE_NEWLINES);
48 56
49 cleanup(); 57 cleanup();
50 $td->report($n_tests); 58 $td->report($n_tests);
qpdf/qtest/linearization.test
@@ -84,11 +84,6 @@ foreach my $base (@to_linearize) @@ -84,11 +84,6 @@ foreach my $base (@to_linearize)
84 { 84 {
85 foreach my $omode (qw(disable preserve generate)) 85 foreach my $omode (qw(disable preserve generate))
86 { 86 {
87 - my $xflags = 0;  
88 - if ($base eq 'gen1')  
89 - {  
90 - $xflags = $td->EXPECT_FAILURE;  
91 - }  
92 my $oarg = "-object-streams=$omode"; 87 my $oarg = "-object-streams=$omode";
93 my $sdarg = ""; 88 my $sdarg = "";
94 if (($base eq 'lin-special') || ($base eq 'object-stream')) 89 if (($base eq 'lin-special') || ($base eq 'object-stream'))
@@ -100,13 +95,12 @@ foreach my $base (@to_linearize) @@ -100,13 +95,12 @@ foreach my $base (@to_linearize)
100 {$td->COMMAND => 95 {$td->COMMAND =>
101 "qpdf -linearize $oarg $sdarg" . 96 "qpdf -linearize $oarg $sdarg" .
102 " --static-id $base.pdf a.pdf"}, 97 " --static-id $base.pdf a.pdf"},
103 - {$td->STRING => "", $td->EXIT_STATUS => 0},  
104 - $xflags); 98 + {$td->STRING => "", $td->EXIT_STATUS => 0});
105 $td->runtest("check linearization", 99 $td->runtest("check linearization",
106 {$td->COMMAND => "qpdf --check-linearization a.pdf"}, 100 {$td->COMMAND => "qpdf --check-linearization a.pdf"},
107 {$td->STRING => "a.pdf: no linearization errors\n", 101 {$td->STRING => "a.pdf: no linearization errors\n",
108 $td->EXIT_STATUS => 0}, 102 $td->EXIT_STATUS => 0},
109 - $td->NORMALIZE_NEWLINES | $xflags); 103 + $td->NORMALIZE_NEWLINES);
110 # Relinearizing twice should produce identical results. We 104 # Relinearizing twice should produce identical results. We
111 # have to do it twice because, if objects changed ordering 105 # have to do it twice because, if objects changed ordering
112 # during the original linearization, the hint tables won't 106 # during the original linearization, the hint tables won't
@@ -117,17 +111,14 @@ foreach my $base (@to_linearize) @@ -117,17 +111,14 @@ foreach my $base (@to_linearize)
117 $td->runtest("relinearize $base 1", 111 $td->runtest("relinearize $base 1",
118 {$td->COMMAND => 112 {$td->COMMAND =>
119 "qpdf -linearize $sdarg --static-id a.pdf b.pdf"}, 113 "qpdf -linearize $sdarg --static-id a.pdf b.pdf"},
120 - {$td->STRING => "", $td->EXIT_STATUS => 0},  
121 - $xflags); 114 + {$td->STRING => "", $td->EXIT_STATUS => 0});
122 $td->runtest("relinearize $base 2", 115 $td->runtest("relinearize $base 2",
123 {$td->COMMAND => 116 {$td->COMMAND =>
124 "qpdf -linearize $sdarg --static-id b.pdf c.pdf"}, 117 "qpdf -linearize $sdarg --static-id b.pdf c.pdf"},
125 - {$td->STRING => "", $td->EXIT_STATUS => 0},  
126 - $xflags); 118 + {$td->STRING => "", $td->EXIT_STATUS => 0});
127 $td->runtest("compare files ($omode)", 119 $td->runtest("compare files ($omode)",
128 {$td->FILE => "b.pdf"}, 120 {$td->FILE => "b.pdf"},
129 - {$td->FILE => "c.pdf"},  
130 - $xflags); 121 + {$td->FILE => "c.pdf"});
131 if (($base eq 'lin-special') || ($base eq 'object-stream')) 122 if (($base eq 'lin-special') || ($base eq 'object-stream'))
132 { 123 {
133 $td->runtest("check $base ($omode)", 124 $td->runtest("check $base ($omode)",
qpdf/qtest/object-stream.test
@@ -82,13 +82,9 @@ $td-&gt;runtest(&quot;generate object streams for gen &gt; 0&quot;, @@ -82,13 +82,9 @@ $td-&gt;runtest(&quot;generate object streams for gen &gt; 0&quot;,
82 {$td->COMMAND => "qpdf --qdf --static-id" . 82 {$td->COMMAND => "qpdf --qdf --static-id" .
83 " --object-streams=generate gen1.pdf a.pdf"}, 83 " --object-streams=generate gen1.pdf a.pdf"},
84 {$td->STRING => "", $td->EXIT_STATUS => 0}); 84 {$td->STRING => "", $td->EXIT_STATUS => 0});
85 -# qpdf 11.8.0 -- it was discovered that qpdf was incorrectly handling  
86 -# references to older generations of reused objects in incrementally  
87 -# updated files.  
88 $td->runtest("check file", 85 $td->runtest("check file",
89 {$td->FILE => "a.pdf"}, 86 {$td->FILE => "a.pdf"},
90 - {$td->FILE => "gen1.qdf"},  
91 - $td->EXPECT_FAILURE); 87 + {$td->FILE => "gen1.qdf"});
92 88
93 $td->runtest("generate object streams for gen > 0", 89 $td->runtest("generate object streams for gen > 0",
94 {$td->COMMAND => "qpdf --qdf --static-id" . 90 {$td->COMMAND => "qpdf --qdf --static-id" .
qpdf/qtest/qpdf/incremental-2-xref.out
1 1/0: uncompressed; offset = 9 1 1/0: uncompressed; offset = 9
2 2/0: uncompressed; offset = 63 2 2/0: uncompressed; offset = 63
3 3/0: uncompressed; offset = 1069 3 3/0: uncompressed; offset = 1069
4 -4/0: uncompressed; offset = 307 4 +4/1: uncompressed; offset = 948
5 5/0: uncompressed; offset = 403 5 5/0: uncompressed; offset = 403
6 6/0: uncompressed; offset = 438 6 6/0: uncompressed; offset = 438
7 7/0: uncompressed; offset = 974 7 7/0: uncompressed; offset = 974
qpdf/qtest/qpdf/incremental-3-out.qdf 0 โ†’ 100644
  1 +%PDF-1.3
  2 +%ยฟรทยขรพ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 2 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 2 0
  14 +2 0 obj
  15 +<<
  16 + /Count 1
  17 + /Kids [
  18 + 3 0 R
  19 + ]
  20 + /Type /Pages
  21 +>>
  22 +endobj
  23 +
  24 +%% Page 1
  25 +%% Original object ID: 3 0
  26 +3 0 obj
  27 +<<
  28 + /MediaBox [
  29 + 0
  30 + 0
  31 + 612
  32 + 792
  33 + ]
  34 + /Parent 2 0 R
  35 + /Resources <<
  36 + /Font <<
  37 + /F1 4 0 R
  38 + >>
  39 + /ProcSet 5 0 R
  40 + >>
  41 + /Type /Page
  42 +>>
  43 +endobj
  44 +
  45 +%% Original object ID: 6 0
  46 +4 0 obj
  47 +<<
  48 + /BaseFont /Helvetica
  49 + /Encoding /WinAnsiEncoding
  50 + /Name /F1
  51 + /Subtype /Type1
  52 + /Type /Font
  53 +>>
  54 +endobj
  55 +
  56 +%% Original object ID: 5 0
  57 +5 0 obj
  58 +[
  59 + /PDF
  60 + /Text
  61 +]
  62 +endobj
  63 +
  64 +xref
  65 +0 6
  66 +0000000000 65535 f
  67 +0000000052 00000 n
  68 +0000000133 00000 n
  69 +0000000242 00000 n
  70 +0000000443 00000 n
  71 +0000000588 00000 n
  72 +trailer <<
  73 + /Root 1 0 R
  74 + /Size 6
  75 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
  76 +>>
  77 +startxref
  78 +623
  79 +%%EOF
qpdf/qtest/qpdf/incremental-3-xref.out 0 โ†’ 100644
  1 +1/0: uncompressed; offset = 9
  2 +2/0: uncompressed; offset = 63
  3 +3/0: uncompressed; offset = 135
  4 +5/0: uncompressed; offset = 403
  5 +6/0: uncompressed; offset = 438
qpdf/qtest/qpdf/incremental-3.pdf 0 โ†’ 100644
  1 +%PDF-1.3
  2 +1 0 obj
  3 +<<
  4 + /Type /Catalog
  5 + /Pages 2 0 R
  6 +>>
  7 +endobj
  8 +
  9 +2 0 obj
  10 +<<
  11 + /Type /Pages
  12 + /Kids [
  13 + 3 0 R
  14 + ]
  15 + /Count 1
  16 +>>
  17 +endobj
  18 +
  19 +3 0 obj
  20 +<<
  21 + /Type /Page
  22 + /Parent 2 0 R
  23 + /MediaBox [0 0 612 792]
  24 + /Contents 4 0 R
  25 + /Resources <<
  26 + /ProcSet 5 0 R
  27 + /Font <<
  28 + /F1 6 0 R
  29 + >>
  30 + >>
  31 +>>
  32 +endobj
  33 +
  34 +4 0 obj
  35 +<<
  36 + /Length 44
  37 +>>
  38 +stream
  39 +BT
  40 + /F1 24 Tf
  41 + 72 720 Td
  42 + (Potato) Tj
  43 +ET
  44 +endstream
  45 +endobj
  46 +
  47 +5 0 obj
  48 +[
  49 + /PDF
  50 + /Text
  51 +]
  52 +endobj
  53 +
  54 +6 0 obj
  55 +<<
  56 + /Type /Font
  57 + /Subtype /Type1
  58 + /Name /F1
  59 + /BaseFont /Helvetica
  60 + /Encoding /WinAnsiEncoding
  61 +>>
  62 +endobj
  63 +
  64 +xref
  65 +0 7
  66 +0000000000 65535 f
  67 +0000000009 00000 n
  68 +0000000063 00000 n
  69 +0000000135 00000 n
  70 +0000000307 00000 n
  71 +0000000403 00000 n
  72 +0000000438 00000 n
  73 +trailer <<
  74 + /Size 7
  75 + /Root 1 0 R
  76 +>>
  77 +startxref
  78 +556
  79 +%%EOF
  80 +
  81 +% Delete object 4 and increment generation
  82 +xref
  83 +0 1
  84 +0000000004 65535 f
  85 +4 1
  86 +0000000000 00001 f
  87 +trailer <<
  88 + /Size 7
  89 + /Root 1 0 R
  90 + /Prev 556
  91 + /Gone 4 0 R
  92 +>>
  93 +startxref
  94 +807
  95 +%%EOF