Commit e9980efec87a7a678a1a00cfaf8fc60263c54d24

Authored by Jay Berkenbilt
Committed by Jay Berkenbilt
1 parent d79a823d

Correctly handle reuse of xref stream (fixes #809)

ChangeLog
  1 +2022-11-19 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Bug fix: handle special case of an earlier xref stream object's
  4 + object number being reused by an update made by appending the
  5 + file. Fixes #809.
  6 +
1 2022-10-08 Jay Berkenbilt <ejb@ql.org> 7 2022-10-08 Jay Berkenbilt <ejb@ql.org>
2 8
3 * Fix major performance bug with the openssl crypto provider when 9 * Fix major performance bug with the openssl crypto provider when
include/qpdf/QPDF.hh
@@ -1209,7 +1209,8 @@ class QPDF @@ -1209,7 +1209,8 @@ class QPDF
1209 qpdf_offset_t offset, 1209 qpdf_offset_t offset,
1210 std::string const& description, 1210 std::string const& description,
1211 QPDFObjGen const& exp_og, 1211 QPDFObjGen const& exp_og,
1212 - QPDFObjGen& og); 1212 + QPDFObjGen& og,
  1213 + bool skip_cache_if_in_xref);
1213 void resolve(QPDFObjGen const& og); 1214 void resolve(QPDFObjGen const& og);
1214 void resolveObjectsInStream(int obj_stream_number); 1215 void resolveObjectsInStream(int obj_stream_number);
1215 void stopOnError(std::string const& message); 1216 void stopOnError(std::string const& message);
libqpdf/QPDF.cc
@@ -999,7 +999,12 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) @@ -999,7 +999,12 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
999 QPDFObjectHandle xref_obj; 999 QPDFObjectHandle xref_obj;
1000 try { 1000 try {
1001 xref_obj = readObjectAtOffset( 1001 xref_obj = readObjectAtOffset(
1002 - false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og); 1002 + false,
  1003 + xref_offset,
  1004 + "xref stream",
  1005 + QPDFObjGen(0, 0),
  1006 + x_og,
  1007 + true);
1003 } catch (QPDFExc&) { 1008 } catch (QPDFExc&) {
1004 // ignore -- report error below 1009 // ignore -- report error below
1005 } 1010 }
@@ -1638,7 +1643,8 @@ QPDF::readObjectAtOffset( @@ -1638,7 +1643,8 @@ QPDF::readObjectAtOffset(
1638 qpdf_offset_t offset, 1643 qpdf_offset_t offset,
1639 std::string const& description, 1644 std::string const& description,
1640 QPDFObjGen const& exp_og, 1645 QPDFObjGen const& exp_og,
1641 - QPDFObjGen& og) 1646 + QPDFObjGen& og,
  1647 + bool skip_cache_if_in_xref)
1642 { 1648 {
1643 bool check_og = true; 1649 bool check_og = true;
1644 if (exp_og.getObj() == 0) { 1650 if (exp_og.getObj() == 0) {
@@ -1718,7 +1724,7 @@ QPDF::readObjectAtOffset( @@ -1718,7 +1724,7 @@ QPDF::readObjectAtOffset(
1718 qpdf_offset_t new_offset = 1724 qpdf_offset_t new_offset =
1719 this->m->xref_table[exp_og].getOffset(); 1725 this->m->xref_table[exp_og].getOffset();
1720 QPDFObjectHandle result = readObjectAtOffset( 1726 QPDFObjectHandle result = readObjectAtOffset(
1721 - false, new_offset, description, exp_og, og); 1727 + false, new_offset, description, exp_og, og, false);
1722 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); 1728 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
1723 return result; 1729 return result;
1724 } else { 1730 } else {
@@ -1770,11 +1776,50 @@ QPDF::readObjectAtOffset( @@ -1770,11 +1776,50 @@ QPDF::readObjectAtOffset(
1770 } 1776 }
1771 } 1777 }
1772 qpdf_offset_t end_after_space = this->m->file->tell(); 1778 qpdf_offset_t end_after_space = this->m->file->tell();
1773 - updateCache(  
1774 - og,  
1775 - QPDFObjectHandle::ObjAccessor::getObject(oh),  
1776 - end_before_space,  
1777 - end_after_space); 1779 + if (skip_cache_if_in_xref && this->m->xref_table.count(og)) {
  1780 + // Ordinarily, an object gets read here when resolved
  1781 + // through xref table or stream. In the special case of
  1782 + // the xref stream and linearization hint tables, the
  1783 + // offset comes from another source. For the specific case
  1784 + // of xref streams, the xref stream is read and loaded
  1785 + // into the object cache very early in parsing.
  1786 + // Ordinarily, when a file is updated by appending, items
  1787 + // inserted into the xref table in later updates take
  1788 + // precedence over earlier items. In the special case of
  1789 + // reusing the object number previously used as the xref
  1790 + // stream, we have the following order of events:
  1791 + //
  1792 + // * reused object gets loaded into the xref table
  1793 + // * old object is read here while reading xref streams
  1794 + // * original xref entry is ignored (since already in xref table)
  1795 + //
  1796 + // It is the second step that causes a problem. Even
  1797 + // though the xref table is correct in this case, the old
  1798 + // object is already in the cache and so effectively
  1799 + // prevails over the reused object. To work around this
  1800 + // issue, we have a special case for the xref stream (via
  1801 + // the skip_cache_if_in_xref): if the object is already in
  1802 + // the xref stream, don't cache what we read here.
  1803 + //
  1804 + // It is likely that the same bug may exist for
  1805 + // linearization hint tables, but the existing code uses
  1806 + // end_before_space and end_after_space from the cache, so
  1807 + // fixing that would require more significant rework. The
  1808 + // chances of a linearization hint stream being reused
  1809 + // seems smaller because the xref stream is probably the
  1810 + // highest object in the file and the linearization hint
  1811 + // stream would be some random place in the middle, so I'm
  1812 + // leaving that bug unfixed for now. If the bug were to be
  1813 + // fixed, we could use !check_og in place of
  1814 + // skip_cache_if_in_xref.
  1815 + QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
  1816 + } else {
  1817 + updateCache(
  1818 + og,
  1819 + QPDFObjectHandle::ObjAccessor::getObject(oh),
  1820 + end_before_space,
  1821 + end_after_space);
  1822 + }
1778 } 1823 }
1779 1824
1780 return oh; 1825 return oh;
@@ -1809,7 +1854,7 @@ QPDF::resolve(QPDFObjGen const&amp; og) @@ -1809,7 +1854,7 @@ QPDF::resolve(QPDFObjGen const&amp; og)
1809 // Object stored in cache by readObjectAtOffset 1854 // Object stored in cache by readObjectAtOffset
1810 QPDFObjGen a_og; 1855 QPDFObjGen a_og;
1811 QPDFObjectHandle oh = 1856 QPDFObjectHandle oh =
1812 - readObjectAtOffset(true, offset, "", og, a_og); 1857 + readObjectAtOffset(true, offset, "", og, a_og, false);
1813 } 1858 }
1814 break; 1859 break;
1815 1860
libqpdf/QPDF_linearization.cc
@@ -303,7 +303,12 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length) @@ -303,7 +303,12 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
303 { 303 {
304 QPDFObjGen og; 304 QPDFObjGen og;
305 QPDFObjectHandle H = readObjectAtOffset( 305 QPDFObjectHandle H = readObjectAtOffset(
306 - false, offset, "linearization hint stream", QPDFObjGen(0, 0), og); 306 + false,
  307 + offset,
  308 + "linearization hint stream",
  309 + QPDFObjGen(0, 0),
  310 + og,
  311 + false);
307 ObjCache& oc = this->m->obj_cache[og]; 312 ObjCache& oc = this->m->obj_cache[og];
308 qpdf_offset_t min_end_offset = oc.end_before_space; 313 qpdf_offset_t min_end_offset = oc.end_before_space;
309 qpdf_offset_t max_end_offset = oc.end_after_space; 314 qpdf_offset_t max_end_offset = oc.end_after_space;
qpdf/qpdf.testcov
@@ -678,3 +678,4 @@ QPDF_json bad calledgetallpages 0 @@ -678,3 +678,4 @@ QPDF_json bad calledgetallpages 0
678 QPDF_json bad pushedinheritedpageresources 0 678 QPDF_json bad pushedinheritedpageresources 0
679 QPDFPageObjectHelper copied fallback 0 679 QPDFPageObjectHelper copied fallback 0
680 QPDFPageObjectHelper used fallback without copying 0 680 QPDFPageObjectHelper used fallback without copying 0
  681 +QPDF skipping cache for known unchecked object 0
qpdf/qtest/qpdf/reuse-xref-stream-obj9.out 0 โ†’ 100644
  1 +<< /BaseFont /Times-Italics /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
qpdf/qtest/qpdf/reuse-xref-stream-xref.out 0 โ†’ 100644
  1 +1/0: uncompressed; offset = 25
  2 +2/0: compressed; stream = 1, index = 0
  3 +3/0: compressed; stream = 1, index = 1
  4 +4/0: compressed; stream = 1, index = 2
  5 +5/0: compressed; stream = 1, index = 3
  6 +6/0: uncompressed; offset = 1054
  7 +7/0: uncompressed; offset = 692
  8 +8/0: uncompressed; offset = 791
  9 +9/0: uncompressed; offset = 1087
  10 +10/0: uncompressed; offset = 1196
qpdf/qtest/qpdf/reuse-xref-stream.pdf 0 โ†’ 100644
No preview for this file type
qpdf/qtest/xref-streams.test
@@ -14,7 +14,7 @@ cleanup(); @@ -14,7 +14,7 @@ cleanup();
14 14
15 my $td = new TestDriver('xref-streams'); 15 my $td = new TestDriver('xref-streams');
16 16
17 -my $n_tests = 3; 17 +my $n_tests = 5;
18 18
19 # Handle xref stream with more entries than reported (bug 2872265) 19 # Handle xref stream with more entries than reported (bug 2872265)
20 $td->runtest("xref with short size", 20 $td->runtest("xref with short size",
@@ -32,6 +32,17 @@ $td-&gt;runtest(&quot;show new xref stream&quot;, @@ -32,6 +32,17 @@ $td-&gt;runtest(&quot;show new xref stream&quot;,
32 {$td->FILE => "xref-with-short-size-new.out", 32 {$td->FILE => "xref-with-short-size-new.out",
33 $td->EXIT_STATUS => 0}, 33 $td->EXIT_STATUS => 0},
34 $td->NORMALIZE_NEWLINES); 34 $td->NORMALIZE_NEWLINES);
  35 +# Handle appended file that reuses the xref stream object number
  36 +$td->runtest("override xref stream - xref",
  37 + {$td->COMMAND => "qpdf --show-xref reuse-xref-stream.pdf"},
  38 + {$td->FILE => "reuse-xref-stream-xref.out",
  39 + $td->EXIT_STATUS => 0},
  40 + $td->NORMALIZE_NEWLINES);
  41 +$td->runtest("override xref stream - object",
  42 + {$td->COMMAND => "qpdf --show-object=9 reuse-xref-stream.pdf"},
  43 + {$td->FILE => "reuse-xref-stream-obj9.out",
  44 + $td->EXIT_STATUS => 0},
  45 + $td->NORMALIZE_NEWLINES);
35 46
36 cleanup(); 47 cleanup();
37 $td->report($n_tests); 48 $td->report($n_tests);