Commit e9980efec87a7a678a1a00cfaf8fc60263c54d24
Committed by
Jay Berkenbilt
1 parent
d79a823d
Correctly handle reuse of xref stream (fixes #809)
Showing
9 changed files
with
92 additions
and
12 deletions
ChangeLog
| 1 | +2022-11-19 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Bug fix: handle special case of an earlier xref stream object's | ||
| 4 | + object number being reused by an update made by appending the | ||
| 5 | + file. Fixes #809. | ||
| 6 | + | ||
| 1 | 2022-10-08 Jay Berkenbilt <ejb@ql.org> | 7 | 2022-10-08 Jay Berkenbilt <ejb@ql.org> |
| 2 | 8 | ||
| 3 | * Fix major performance bug with the openssl crypto provider when | 9 | * Fix major performance bug with the openssl crypto provider when |
include/qpdf/QPDF.hh
| @@ -1209,7 +1209,8 @@ class QPDF | @@ -1209,7 +1209,8 @@ class QPDF | ||
| 1209 | qpdf_offset_t offset, | 1209 | qpdf_offset_t offset, |
| 1210 | std::string const& description, | 1210 | std::string const& description, |
| 1211 | QPDFObjGen const& exp_og, | 1211 | QPDFObjGen const& exp_og, |
| 1212 | - QPDFObjGen& og); | 1212 | + QPDFObjGen& og, |
| 1213 | + bool skip_cache_if_in_xref); | ||
| 1213 | void resolve(QPDFObjGen const& og); | 1214 | void resolve(QPDFObjGen const& og); |
| 1214 | void resolveObjectsInStream(int obj_stream_number); | 1215 | void resolveObjectsInStream(int obj_stream_number); |
| 1215 | void stopOnError(std::string const& message); | 1216 | void stopOnError(std::string const& message); |
libqpdf/QPDF.cc
| @@ -999,7 +999,12 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) | @@ -999,7 +999,12 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) | ||
| 999 | QPDFObjectHandle xref_obj; | 999 | QPDFObjectHandle xref_obj; |
| 1000 | try { | 1000 | try { |
| 1001 | xref_obj = readObjectAtOffset( | 1001 | xref_obj = readObjectAtOffset( |
| 1002 | - false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og); | 1002 | + false, |
| 1003 | + xref_offset, | ||
| 1004 | + "xref stream", | ||
| 1005 | + QPDFObjGen(0, 0), | ||
| 1006 | + x_og, | ||
| 1007 | + true); | ||
| 1003 | } catch (QPDFExc&) { | 1008 | } catch (QPDFExc&) { |
| 1004 | // ignore -- report error below | 1009 | // ignore -- report error below |
| 1005 | } | 1010 | } |
| @@ -1638,7 +1643,8 @@ QPDF::readObjectAtOffset( | @@ -1638,7 +1643,8 @@ QPDF::readObjectAtOffset( | ||
| 1638 | qpdf_offset_t offset, | 1643 | qpdf_offset_t offset, |
| 1639 | std::string const& description, | 1644 | std::string const& description, |
| 1640 | QPDFObjGen const& exp_og, | 1645 | QPDFObjGen const& exp_og, |
| 1641 | - QPDFObjGen& og) | 1646 | + QPDFObjGen& og, |
| 1647 | + bool skip_cache_if_in_xref) | ||
| 1642 | { | 1648 | { |
| 1643 | bool check_og = true; | 1649 | bool check_og = true; |
| 1644 | if (exp_og.getObj() == 0) { | 1650 | if (exp_og.getObj() == 0) { |
| @@ -1718,7 +1724,7 @@ QPDF::readObjectAtOffset( | @@ -1718,7 +1724,7 @@ QPDF::readObjectAtOffset( | ||
| 1718 | qpdf_offset_t new_offset = | 1724 | qpdf_offset_t new_offset = |
| 1719 | this->m->xref_table[exp_og].getOffset(); | 1725 | this->m->xref_table[exp_og].getOffset(); |
| 1720 | QPDFObjectHandle result = readObjectAtOffset( | 1726 | QPDFObjectHandle result = readObjectAtOffset( |
| 1721 | - false, new_offset, description, exp_og, og); | 1727 | + false, new_offset, description, exp_og, og, false); |
| 1722 | QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); | 1728 | QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); |
| 1723 | return result; | 1729 | return result; |
| 1724 | } else { | 1730 | } else { |
| @@ -1770,11 +1776,50 @@ QPDF::readObjectAtOffset( | @@ -1770,11 +1776,50 @@ QPDF::readObjectAtOffset( | ||
| 1770 | } | 1776 | } |
| 1771 | } | 1777 | } |
| 1772 | qpdf_offset_t end_after_space = this->m->file->tell(); | 1778 | qpdf_offset_t end_after_space = this->m->file->tell(); |
| 1773 | - updateCache( | ||
| 1774 | - og, | ||
| 1775 | - QPDFObjectHandle::ObjAccessor::getObject(oh), | ||
| 1776 | - end_before_space, | ||
| 1777 | - end_after_space); | 1779 | + if (skip_cache_if_in_xref && this->m->xref_table.count(og)) { |
| 1780 | + // Ordinarily, an object gets read here when resolved | ||
| 1781 | + // through xref table or stream. In the special case of | ||
| 1782 | + // the xref stream and linearization hint tables, the | ||
| 1783 | + // offset comes from another source. For the specific case | ||
| 1784 | + // of xref streams, the xref stream is read and loaded | ||
| 1785 | + // into the object cache very early in parsing. | ||
| 1786 | + // Ordinarily, when a file is updated by appending, items | ||
| 1787 | + // inserted into the xref table in later updates take | ||
| 1788 | + // precedence over earlier items. In the special case of | ||
| 1789 | + // reusing the object number previously used as the xref | ||
| 1790 | + // stream, we have the following order of events: | ||
| 1791 | + // | ||
| 1792 | + // * reused object gets loaded into the xref table | ||
| 1793 | + // * old object is read here while reading xref streams | ||
| 1794 | + // * original xref entry is ignored (since already in xref table) | ||
| 1795 | + // | ||
| 1796 | + // It is the second step that causes a problem. Even | ||
| 1797 | + // though the xref table is correct in this case, the old | ||
| 1798 | + // object is already in the cache and so effectively | ||
| 1799 | + // prevails over the reused object. To work around this | ||
| 1800 | + // issue, we have a special case for the xref stream (via | ||
| 1801 | + // the skip_cache_if_in_xref): if the object is already in | ||
| 1802 | + // the xref stream, don't cache what we read here. | ||
| 1803 | + // | ||
| 1804 | + // It is likely that the same bug may exist for | ||
| 1805 | + // linearization hint tables, but the existing code uses | ||
| 1806 | + // end_before_space and end_after_space from the cache, so | ||
| 1807 | + // fixing that would require more significant rework. The | ||
| 1808 | + // chances of a linearization hint stream being reused | ||
| 1809 | + // seems smaller because the xref stream is probably the | ||
| 1810 | + // highest object in the file and the linearization hint | ||
| 1811 | + // stream would be some random place in the middle, so I'm | ||
| 1812 | + // leaving that bug unfixed for now. If the bug were to be | ||
| 1813 | + // fixed, we could use !check_og in place of | ||
| 1814 | + // skip_cache_if_in_xref. | ||
| 1815 | + QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); | ||
| 1816 | + } else { | ||
| 1817 | + updateCache( | ||
| 1818 | + og, | ||
| 1819 | + QPDFObjectHandle::ObjAccessor::getObject(oh), | ||
| 1820 | + end_before_space, | ||
| 1821 | + end_after_space); | ||
| 1822 | + } | ||
| 1778 | } | 1823 | } |
| 1779 | 1824 | ||
| 1780 | return oh; | 1825 | return oh; |
| @@ -1809,7 +1854,7 @@ QPDF::resolve(QPDFObjGen const& og) | @@ -1809,7 +1854,7 @@ QPDF::resolve(QPDFObjGen const& og) | ||
| 1809 | // Object stored in cache by readObjectAtOffset | 1854 | // Object stored in cache by readObjectAtOffset |
| 1810 | QPDFObjGen a_og; | 1855 | QPDFObjGen a_og; |
| 1811 | QPDFObjectHandle oh = | 1856 | QPDFObjectHandle oh = |
| 1812 | - readObjectAtOffset(true, offset, "", og, a_og); | 1857 | + readObjectAtOffset(true, offset, "", og, a_og, false); |
| 1813 | } | 1858 | } |
| 1814 | break; | 1859 | break; |
| 1815 | 1860 |
libqpdf/QPDF_linearization.cc
| @@ -303,7 +303,12 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | @@ -303,7 +303,12 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | ||
| 303 | { | 303 | { |
| 304 | QPDFObjGen og; | 304 | QPDFObjGen og; |
| 305 | QPDFObjectHandle H = readObjectAtOffset( | 305 | QPDFObjectHandle H = readObjectAtOffset( |
| 306 | - false, offset, "linearization hint stream", QPDFObjGen(0, 0), og); | 306 | + false, |
| 307 | + offset, | ||
| 308 | + "linearization hint stream", | ||
| 309 | + QPDFObjGen(0, 0), | ||
| 310 | + og, | ||
| 311 | + false); | ||
| 307 | ObjCache& oc = this->m->obj_cache[og]; | 312 | ObjCache& oc = this->m->obj_cache[og]; |
| 308 | qpdf_offset_t min_end_offset = oc.end_before_space; | 313 | qpdf_offset_t min_end_offset = oc.end_before_space; |
| 309 | qpdf_offset_t max_end_offset = oc.end_after_space; | 314 | qpdf_offset_t max_end_offset = oc.end_after_space; |
qpdf/qpdf.testcov
| @@ -678,3 +678,4 @@ QPDF_json bad calledgetallpages 0 | @@ -678,3 +678,4 @@ QPDF_json bad calledgetallpages 0 | ||
| 678 | QPDF_json bad pushedinheritedpageresources 0 | 678 | QPDF_json bad pushedinheritedpageresources 0 |
| 679 | QPDFPageObjectHelper copied fallback 0 | 679 | QPDFPageObjectHelper copied fallback 0 |
| 680 | QPDFPageObjectHelper used fallback without copying 0 | 680 | QPDFPageObjectHelper used fallback without copying 0 |
| 681 | +QPDF skipping cache for known unchecked object 0 |
qpdf/qtest/qpdf/reuse-xref-stream-obj9.out
0 โ 100644
| 1 | +<< /BaseFont /Times-Italics /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> |
qpdf/qtest/qpdf/reuse-xref-stream-xref.out
0 โ 100644
| 1 | +1/0: uncompressed; offset = 25 | ||
| 2 | +2/0: compressed; stream = 1, index = 0 | ||
| 3 | +3/0: compressed; stream = 1, index = 1 | ||
| 4 | +4/0: compressed; stream = 1, index = 2 | ||
| 5 | +5/0: compressed; stream = 1, index = 3 | ||
| 6 | +6/0: uncompressed; offset = 1054 | ||
| 7 | +7/0: uncompressed; offset = 692 | ||
| 8 | +8/0: uncompressed; offset = 791 | ||
| 9 | +9/0: uncompressed; offset = 1087 | ||
| 10 | +10/0: uncompressed; offset = 1196 |
qpdf/qtest/qpdf/reuse-xref-stream.pdf
0 โ 100644
No preview for this file type
qpdf/qtest/xref-streams.test
| @@ -14,7 +14,7 @@ cleanup(); | @@ -14,7 +14,7 @@ cleanup(); | ||
| 14 | 14 | ||
| 15 | my $td = new TestDriver('xref-streams'); | 15 | my $td = new TestDriver('xref-streams'); |
| 16 | 16 | ||
| 17 | -my $n_tests = 3; | 17 | +my $n_tests = 5; |
| 18 | 18 | ||
| 19 | # Handle xref stream with more entries than reported (bug 2872265) | 19 | # Handle xref stream with more entries than reported (bug 2872265) |
| 20 | $td->runtest("xref with short size", | 20 | $td->runtest("xref with short size", |
| @@ -32,6 +32,17 @@ $td->runtest("show new xref stream", | @@ -32,6 +32,17 @@ $td->runtest("show new xref stream", | ||
| 32 | {$td->FILE => "xref-with-short-size-new.out", | 32 | {$td->FILE => "xref-with-short-size-new.out", |
| 33 | $td->EXIT_STATUS => 0}, | 33 | $td->EXIT_STATUS => 0}, |
| 34 | $td->NORMALIZE_NEWLINES); | 34 | $td->NORMALIZE_NEWLINES); |
| 35 | +# Handle appended file that reuses the xref stream object number | ||
| 36 | +$td->runtest("override xref stream - xref", | ||
| 37 | + {$td->COMMAND => "qpdf --show-xref reuse-xref-stream.pdf"}, | ||
| 38 | + {$td->FILE => "reuse-xref-stream-xref.out", | ||
| 39 | + $td->EXIT_STATUS => 0}, | ||
| 40 | + $td->NORMALIZE_NEWLINES); | ||
| 41 | +$td->runtest("override xref stream - object", | ||
| 42 | + {$td->COMMAND => "qpdf --show-object=9 reuse-xref-stream.pdf"}, | ||
| 43 | + {$td->FILE => "reuse-xref-stream-obj9.out", | ||
| 44 | + $td->EXIT_STATUS => 0}, | ||
| 45 | + $td->NORMALIZE_NEWLINES); | ||
| 35 | 46 | ||
| 36 | cleanup(); | 47 | cleanup(); |
| 37 | $td->report($n_tests); | 48 | $td->report($n_tests); |