Commit e9980efec87a7a678a1a00cfaf8fc60263c54d24
Committed by
Jay Berkenbilt
1 parent
d79a823d
Correctly handle reuse of xref stream (fixes #809)
Showing
9 changed files
with
92 additions
and
12 deletions
ChangeLog
| 1 | +2022-11-19 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Bug fix: handle special case of an earlier xref stream object's | |
| 4 | + object number being reused by an update made by appending the | |
| 5 | + file. Fixes #809. | |
| 6 | + | |
| 1 | 7 | 2022-10-08 Jay Berkenbilt <ejb@ql.org> |
| 2 | 8 | |
| 3 | 9 | * Fix major performance bug with the openssl crypto provider when | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -1209,7 +1209,8 @@ class QPDF |
| 1209 | 1209 | qpdf_offset_t offset, |
| 1210 | 1210 | std::string const& description, |
| 1211 | 1211 | QPDFObjGen const& exp_og, |
| 1212 | - QPDFObjGen& og); | |
| 1212 | + QPDFObjGen& og, | |
| 1213 | + bool skip_cache_if_in_xref); | |
| 1213 | 1214 | void resolve(QPDFObjGen const& og); |
| 1214 | 1215 | void resolveObjectsInStream(int obj_stream_number); |
| 1215 | 1216 | void stopOnError(std::string const& message); | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -999,7 +999,12 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 999 | 999 | QPDFObjectHandle xref_obj; |
| 1000 | 1000 | try { |
| 1001 | 1001 | xref_obj = readObjectAtOffset( |
| 1002 | - false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og); | |
| 1002 | + false, | |
| 1003 | + xref_offset, | |
| 1004 | + "xref stream", | |
| 1005 | + QPDFObjGen(0, 0), | |
| 1006 | + x_og, | |
| 1007 | + true); | |
| 1003 | 1008 | } catch (QPDFExc&) { |
| 1004 | 1009 | // ignore -- report error below |
| 1005 | 1010 | } |
| ... | ... | @@ -1638,7 +1643,8 @@ QPDF::readObjectAtOffset( |
| 1638 | 1643 | qpdf_offset_t offset, |
| 1639 | 1644 | std::string const& description, |
| 1640 | 1645 | QPDFObjGen const& exp_og, |
| 1641 | - QPDFObjGen& og) | |
| 1646 | + QPDFObjGen& og, | |
| 1647 | + bool skip_cache_if_in_xref) | |
| 1642 | 1648 | { |
| 1643 | 1649 | bool check_og = true; |
| 1644 | 1650 | if (exp_og.getObj() == 0) { |
| ... | ... | @@ -1718,7 +1724,7 @@ QPDF::readObjectAtOffset( |
| 1718 | 1724 | qpdf_offset_t new_offset = |
| 1719 | 1725 | this->m->xref_table[exp_og].getOffset(); |
| 1720 | 1726 | QPDFObjectHandle result = readObjectAtOffset( |
| 1721 | - false, new_offset, description, exp_og, og); | |
| 1727 | + false, new_offset, description, exp_og, og, false); | |
| 1722 | 1728 | QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); |
| 1723 | 1729 | return result; |
| 1724 | 1730 | } else { |
| ... | ... | @@ -1770,11 +1776,50 @@ QPDF::readObjectAtOffset( |
| 1770 | 1776 | } |
| 1771 | 1777 | } |
| 1772 | 1778 | qpdf_offset_t end_after_space = this->m->file->tell(); |
| 1773 | - updateCache( | |
| 1774 | - og, | |
| 1775 | - QPDFObjectHandle::ObjAccessor::getObject(oh), | |
| 1776 | - end_before_space, | |
| 1777 | - end_after_space); | |
| 1779 | + if (skip_cache_if_in_xref && this->m->xref_table.count(og)) { | |
| 1780 | + // Ordinarily, an object gets read here when resolved | |
| 1781 | + // through xref table or stream. In the special case of | |
| 1782 | + // the xref stream and linearization hint tables, the | |
| 1783 | + // offset comes from another source. For the specific case | |
| 1784 | + // of xref streams, the xref stream is read and loaded | |
| 1785 | + // into the object cache very early in parsing. | |
| 1786 | + // Ordinarily, when a file is updated by appending, items | |
| 1787 | + // inserted into the xref table in later updates take | |
| 1788 | + // precedence over earlier items. In the special case of | |
| 1789 | + // reusing the object number previously used as the xref | |
| 1790 | + // stream, we have the following order of events: | |
| 1791 | + // | |
| 1792 | + // * reused object gets loaded into the xref table | |
| 1793 | + // * old object is read here while reading xref streams | |
| 1794 | + // * original xref entry is ignored (since already in xref table) | |
| 1795 | + // | |
| 1796 | + // It is the second step that causes a problem. Even | |
| 1797 | + // though the xref table is correct in this case, the old | |
| 1798 | + // object is already in the cache and so effectively | |
| 1799 | + // prevails over the reused object. To work around this | |
| 1800 | + // issue, we have a special case for the xref stream (via | |
| 1801 | + // the skip_cache_if_in_xref): if the object is already in | |
| 1802 | + // the xref stream, don't cache what we read here. | |
| 1803 | + // | |
| 1804 | + // It is likely that the same bug may exist for | |
| 1805 | + // linearization hint tables, but the existing code uses | |
| 1806 | + // end_before_space and end_after_space from the cache, so | |
| 1807 | + // fixing that would require more significant rework. The | |
| 1808 | + // chances of a linearization hint stream being reused | |
| 1809 | + // seems smaller because the xref stream is probably the | |
| 1810 | + // highest object in the file and the linearization hint | |
| 1811 | + // stream would be some random place in the middle, so I'm | |
| 1812 | + // leaving that bug unfixed for now. If the bug were to be | |
| 1813 | + // fixed, we could use !check_og in place of | |
| 1814 | + // skip_cache_if_in_xref. | |
| 1815 | + QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); | |
| 1816 | + } else { | |
| 1817 | + updateCache( | |
| 1818 | + og, | |
| 1819 | + QPDFObjectHandle::ObjAccessor::getObject(oh), | |
| 1820 | + end_before_space, | |
| 1821 | + end_after_space); | |
| 1822 | + } | |
| 1778 | 1823 | } |
| 1779 | 1824 | |
| 1780 | 1825 | return oh; |
| ... | ... | @@ -1809,7 +1854,7 @@ QPDF::resolve(QPDFObjGen const& og) |
| 1809 | 1854 | // Object stored in cache by readObjectAtOffset |
| 1810 | 1855 | QPDFObjGen a_og; |
| 1811 | 1856 | QPDFObjectHandle oh = |
| 1812 | - readObjectAtOffset(true, offset, "", og, a_og); | |
| 1857 | + readObjectAtOffset(true, offset, "", og, a_og, false); | |
| 1813 | 1858 | } |
| 1814 | 1859 | break; |
| 1815 | 1860 | ... | ... |
libqpdf/QPDF_linearization.cc
| ... | ... | @@ -303,7 +303,12 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) |
| 303 | 303 | { |
| 304 | 304 | QPDFObjGen og; |
| 305 | 305 | QPDFObjectHandle H = readObjectAtOffset( |
| 306 | - false, offset, "linearization hint stream", QPDFObjGen(0, 0), og); | |
| 306 | + false, | |
| 307 | + offset, | |
| 308 | + "linearization hint stream", | |
| 309 | + QPDFObjGen(0, 0), | |
| 310 | + og, | |
| 311 | + false); | |
| 307 | 312 | ObjCache& oc = this->m->obj_cache[og]; |
| 308 | 313 | qpdf_offset_t min_end_offset = oc.end_before_space; |
| 309 | 314 | qpdf_offset_t max_end_offset = oc.end_after_space; | ... | ... |
qpdf/qpdf.testcov
qpdf/qtest/qpdf/reuse-xref-stream-obj9.out
0 โ 100644
| 1 | +<< /BaseFont /Times-Italics /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> | ... | ... |
qpdf/qtest/qpdf/reuse-xref-stream-xref.out
0 โ 100644
| 1 | +1/0: uncompressed; offset = 25 | |
| 2 | +2/0: compressed; stream = 1, index = 0 | |
| 3 | +3/0: compressed; stream = 1, index = 1 | |
| 4 | +4/0: compressed; stream = 1, index = 2 | |
| 5 | +5/0: compressed; stream = 1, index = 3 | |
| 6 | +6/0: uncompressed; offset = 1054 | |
| 7 | +7/0: uncompressed; offset = 692 | |
| 8 | +8/0: uncompressed; offset = 791 | |
| 9 | +9/0: uncompressed; offset = 1087 | |
| 10 | +10/0: uncompressed; offset = 1196 | ... | ... |
qpdf/qtest/qpdf/reuse-xref-stream.pdf
0 โ 100644
No preview for this file type
qpdf/qtest/xref-streams.test
| ... | ... | @@ -14,7 +14,7 @@ cleanup(); |
| 14 | 14 | |
| 15 | 15 | my $td = new TestDriver('xref-streams'); |
| 16 | 16 | |
| 17 | -my $n_tests = 3; | |
| 17 | +my $n_tests = 5; | |
| 18 | 18 | |
| 19 | 19 | # Handle xref stream with more entries than reported (bug 2872265) |
| 20 | 20 | $td->runtest("xref with short size", |
| ... | ... | @@ -32,6 +32,17 @@ $td->runtest("show new xref stream", |
| 32 | 32 | {$td->FILE => "xref-with-short-size-new.out", |
| 33 | 33 | $td->EXIT_STATUS => 0}, |
| 34 | 34 | $td->NORMALIZE_NEWLINES); |
| 35 | +# Handle appended file that reuses the xref stream object number | |
| 36 | +$td->runtest("override xref stream - xref", | |
| 37 | + {$td->COMMAND => "qpdf --show-xref reuse-xref-stream.pdf"}, | |
| 38 | + {$td->FILE => "reuse-xref-stream-xref.out", | |
| 39 | + $td->EXIT_STATUS => 0}, | |
| 40 | + $td->NORMALIZE_NEWLINES); | |
| 41 | +$td->runtest("override xref stream - object", | |
| 42 | + {$td->COMMAND => "qpdf --show-object=9 reuse-xref-stream.pdf"}, | |
| 43 | + {$td->FILE => "reuse-xref-stream-obj9.out", | |
| 44 | + $td->EXIT_STATUS => 0}, | |
| 45 | + $td->NORMALIZE_NEWLINES); | |
| 35 | 46 | |
| 36 | 47 | cleanup(); |
| 37 | 48 | $td->report($n_tests); | ... | ... |