Commit 65e7e3db36ac731fb56d3c675c3bacfcd21dec61
Committed by
GitHub
Merge pull request #1400 from m-holger/pr1266
Fix parsing of object streams containing objects with no white-space between them
Showing
7 changed files
with
134 additions
and
12 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -45,6 +45,11 @@ |
| 45 | 45 | #include <qpdf/QPDFWriter.hh> |
| 46 | 46 | #include <qpdf/QPDFXRefEntry.hh> |
| 47 | 47 | |
| 48 | +namespace qpdf::is | |
| 49 | +{ | |
| 50 | + class OffsetBuffer; | |
| 51 | +} | |
| 52 | + | |
| 48 | 53 | class QPDF_Stream; |
| 49 | 54 | class BitStream; |
| 50 | 55 | class BitWriter; |
| ... | ... | @@ -785,7 +790,7 @@ class QPDF |
| 785 | 790 | QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); |
| 786 | 791 | void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
| 787 | 792 | void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
| 788 | - QPDFObjectHandle readObjectInStream(BufferInputSource& input, int stream_id, int obj_id); | |
| 793 | + QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id); | |
| 789 | 794 | size_t recoverStreamLength( |
| 790 | 795 | std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); |
| 791 | 796 | QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); | ... | ... |
libqpdf/QPDFParser.cc
| ... | ... | @@ -12,6 +12,7 @@ |
| 12 | 12 | #include <memory> |
| 13 | 13 | |
| 14 | 14 | using namespace std::literals; |
| 15 | +using namespace qpdf; | |
| 15 | 16 | |
| 16 | 17 | using ObjectPtr = std::shared_ptr<QPDFObject>; |
| 17 | 18 | |
| ... | ... | @@ -87,7 +88,7 @@ QPDFParser::parse( |
| 87 | 88 | |
| 88 | 89 | std::pair<QPDFObjectHandle, bool> |
| 89 | 90 | QPDFParser::parse( |
| 90 | - BufferInputSource& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) | |
| 91 | + is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) | |
| 91 | 92 | { |
| 92 | 93 | bool empty{false}; |
| 93 | 94 | auto result = QPDFParser( | ... | ... |
libqpdf/QPDF_objects.cc
| ... | ... | @@ -1288,7 +1288,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset |
| 1288 | 1288 | } |
| 1289 | 1289 | |
| 1290 | 1290 | QPDFObjectHandle |
| 1291 | -QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) | |
| 1291 | +QPDF::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) | |
| 1292 | 1292 | { |
| 1293 | 1293 | auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this); |
| 1294 | 1294 | if (empty) { |
| ... | ... | @@ -1645,12 +1645,26 @@ QPDF::resolveObjectsInStream(int obj_stream_number) |
| 1645 | 1645 | "object stream " + std::to_string(obj_stream_number) + " has incorrect keys"); |
| 1646 | 1646 | } |
| 1647 | 1647 | |
| 1648 | - std::vector<std::pair<int, long long>> offsets; | |
| 1648 | + // id, offset, size | |
| 1649 | + std::vector<std::tuple<int, qpdf_offset_t, size_t>> offsets; | |
| 1649 | 1650 | |
| 1650 | 1651 | auto bp = obj_stream.getStreamData(qpdf_dl_specialized); |
| 1652 | + | |
| 1651 | 1653 | BufferInputSource input("", bp.get()); |
| 1652 | 1654 | |
| 1655 | + const auto b_size = bp->getSize(); | |
| 1656 | + const auto end_offset = static_cast<qpdf_offset_t>(b_size); | |
| 1657 | + auto b_start = bp->getBuffer(); | |
| 1658 | + | |
| 1659 | + if (first >= end_offset) { | |
| 1660 | + throw damagedPDF( | |
| 1661 | + "object " + std::to_string(obj_stream_number) + " 0", | |
| 1662 | + "object stream " + std::to_string(obj_stream_number) + " has invalid /First entry"); | |
| 1663 | + } | |
| 1664 | + | |
| 1665 | + int id = 0; | |
| 1653 | 1666 | long long last_offset = -1; |
| 1667 | + bool is_first = true; | |
| 1654 | 1668 | for (unsigned int i = 0; i < n; ++i) { |
| 1655 | 1669 | auto tnum = readToken(input); |
| 1656 | 1670 | auto toffset = readToken(input); |
| ... | ... | @@ -1682,26 +1696,45 @@ QPDF::resolveObjectsInStream(int obj_stream_number) |
| 1682 | 1696 | std::to_string(last_offset) + ")")); |
| 1683 | 1697 | continue; |
| 1684 | 1698 | } |
| 1685 | - last_offset = offset; | |
| 1686 | 1699 | |
| 1687 | 1700 | if (num > m->xref_table_max_id) { |
| 1688 | 1701 | continue; |
| 1689 | 1702 | } |
| 1690 | 1703 | |
| 1691 | - offsets.emplace_back(num, offset + first); | |
| 1704 | + if (first + offset >= end_offset) { | |
| 1705 | + warn(damaged(num, offset, "offset is too large")); | |
| 1706 | + continue; | |
| 1707 | + } | |
| 1708 | + | |
| 1709 | + if (is_first) { | |
| 1710 | + is_first = false; | |
| 1711 | + } else { | |
| 1712 | + offsets.emplace_back( | |
| 1713 | + id, last_offset + first, static_cast<size_t>(offset - last_offset)); | |
| 1714 | + } | |
| 1715 | + | |
| 1716 | + last_offset = offset; | |
| 1717 | + id = num; | |
| 1718 | + } | |
| 1719 | + | |
| 1720 | + if (!is_first) { | |
| 1721 | + // We found at least one valid entry. | |
| 1722 | + offsets.emplace_back( | |
| 1723 | + id, last_offset + first, b_size - static_cast<size_t>(last_offset + first)); | |
| 1692 | 1724 | } |
| 1693 | 1725 | |
| 1694 | 1726 | // To avoid having to read the object stream multiple times, store all objects that would be |
| 1695 | 1727 | // found here in the cache. Remember that some objects stored here might have been overridden |
| 1696 | 1728 | // by new objects appended to the file, so it is necessary to recheck the xref table and only |
| 1697 | 1729 | // cache what would actually be resolved here. |
| 1698 | - for (auto const& [id, offset]: offsets) { | |
| 1699 | - QPDFObjGen og(id, 0); | |
| 1730 | + for (auto const& [obj_id, obj_offset, obj_size]: offsets) { | |
| 1731 | + QPDFObjGen og(obj_id, 0); | |
| 1700 | 1732 | auto entry = m->xref_table.find(og); |
| 1701 | 1733 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && |
| 1702 | 1734 | entry->second.getObjStreamNumber() == obj_stream_number) { |
| 1703 | - input.seek(offset, SEEK_SET); | |
| 1704 | - QPDFObjectHandle oh = readObjectInStream(input, obj_stream_number, id); | |
| 1735 | + Buffer obj_buffer{b_start + obj_offset, obj_size}; | |
| 1736 | + is::OffsetBuffer in("", &obj_buffer, obj_offset); | |
| 1737 | + auto oh = readObjectInStream(in, obj_stream_number, obj_id); | |
| 1705 | 1738 | updateCache(og, oh.getObj(), end_before_space, end_after_space); |
| 1706 | 1739 | } else { |
| 1707 | 1740 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); | ... | ... |
libqpdf/qpdf/InputSource_private.hh
| 1 | 1 | #ifndef QPDF_INPUTSOURCE_PRIVATE_HH |
| 2 | 2 | #define QPDF_INPUTSOURCE_PRIVATE_HH |
| 3 | 3 | |
| 4 | +#include <qpdf/BufferInputSource.hh> | |
| 4 | 5 | #include <qpdf/InputSource.hh> |
| 5 | 6 | |
| 7 | +#include <limits> | |
| 8 | +#include <sstream> | |
| 9 | +#include <stdexcept> | |
| 10 | + | |
| 11 | +namespace qpdf::is | |
| 12 | +{ | |
| 13 | + class OffsetBuffer final: public InputSource | |
| 14 | + { | |
| 15 | + public: | |
| 16 | + OffsetBuffer(std::string const& description, Buffer* buf, qpdf_offset_t global_offset) : | |
| 17 | + proxied(description, buf), | |
| 18 | + global_offset(global_offset) | |
| 19 | + { | |
| 20 | + if (global_offset < 0) { | |
| 21 | + throw std::logic_error("is::OffsetBuffer constructed with negative offset"); | |
| 22 | + } | |
| 23 | + last_offset = global_offset; | |
| 24 | + } | |
| 25 | + | |
| 26 | + ~OffsetBuffer() final = default; | |
| 27 | + | |
| 28 | + qpdf_offset_t | |
| 29 | + findAndSkipNextEOL() final | |
| 30 | + { | |
| 31 | + return proxied.findAndSkipNextEOL() + global_offset; | |
| 32 | + } | |
| 33 | + | |
| 34 | + std::string const& | |
| 35 | + getName() const final | |
| 36 | + { | |
| 37 | + return proxied.getName(); | |
| 38 | + } | |
| 39 | + | |
| 40 | + qpdf_offset_t | |
| 41 | + tell() final | |
| 42 | + { | |
| 43 | + return proxied.tell() + global_offset; | |
| 44 | + } | |
| 45 | + | |
| 46 | + void | |
| 47 | + seek(qpdf_offset_t offset, int whence) final | |
| 48 | + { | |
| 49 | + if (whence == SEEK_SET) { | |
| 50 | + proxied.seek(offset - global_offset, whence); | |
| 51 | + } else { | |
| 52 | + proxied.seek(offset, whence); | |
| 53 | + } | |
| 54 | + } | |
| 55 | + | |
| 56 | + void | |
| 57 | + rewind() final | |
| 58 | + { | |
| 59 | + seek(0, SEEK_SET); | |
| 60 | + } | |
| 61 | + | |
| 62 | + size_t | |
| 63 | + read(char* buffer, size_t length) final | |
| 64 | + { | |
| 65 | + size_t result = proxied.read(buffer, length); | |
| 66 | + setLastOffset(proxied.getLastOffset() + global_offset); | |
| 67 | + return result; | |
| 68 | + } | |
| 69 | + | |
| 70 | + void | |
| 71 | + unreadCh(char ch) final | |
| 72 | + { | |
| 73 | + proxied.unreadCh(ch); | |
| 74 | + } | |
| 75 | + | |
| 76 | + private: | |
| 77 | + BufferInputSource proxied; | |
| 78 | + qpdf_offset_t global_offset; | |
| 79 | + }; | |
| 80 | + | |
| 81 | +} // namespace qpdf::is | |
| 82 | + | |
| 6 | 83 | inline size_t |
| 7 | 84 | InputSource::read(std::string& str, size_t count, qpdf_offset_t at) |
| 8 | 85 | { | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| 1 | 1 | #ifndef QPDFPARSER_HH |
| 2 | 2 | #define QPDFPARSER_HH |
| 3 | 3 | |
| 4 | +#include <qpdf/InputSource_private.hh> | |
| 4 | 5 | #include <qpdf/QPDFObjectHandle_private.hh> |
| 5 | 6 | #include <qpdf/QPDFObject_private.hh> |
| 6 | 7 | #include <qpdf/QPDFTokenizer_private.hh> |
| ... | ... | @@ -38,7 +39,7 @@ class QPDFParser |
| 38 | 39 | QPDF& context); |
| 39 | 40 | |
| 40 | 41 | static std::pair<QPDFObjectHandle, bool> parse( |
| 41 | - BufferInputSource& input, | |
| 42 | + qpdf::is::OffsetBuffer& input, | |
| 42 | 43 | int stream_id, |
| 43 | 44 | int obj_id, |
| 44 | 45 | qpdf::Tokenizer& tokenizer, | ... | ... |
manual/release-notes.rst
| ... | ... | @@ -21,6 +21,11 @@ more detail. |
| 21 | 21 | integer object. Previously the method returned false if the first |
| 22 | 22 | dictionary object was not a linearization parameter dictionary. |
| 23 | 23 | |
| 24 | + = Fix parsing of object streams containing objects not seperated by | |
| 25 | + white-space. Pre-2020 editions of the PDF specification incorrectly | |
| 26 | + stated that white-space was required between objects. qpdf relied on this | |
| 27 | + when parsing object streams. | |
| 28 | + | |
| 24 | 29 | - Fix two object stream error/warning messages that reported the wrong |
| 25 | 30 | object id. |
| 26 | 31 | ... | ... |
qpdf/qtest/object-stream.test
| ... | ... | @@ -124,7 +124,7 @@ $td->runtest("adjacent compressed objects", |
| 124 | 124 | {$td->COMMAND => "test_driver 99 no-space-compressed-object.pdf"}, |
| 125 | 125 | {$td->FILE => "no-space-compressed-object.out", |
| 126 | 126 | $td->EXIT_STATUS => 0}, |
| 127 | - $td->EXPECT_FAILURE); | |
| 127 | + $td->NORMALIZE_NEWLINES); | |
| 128 | 128 | |
| 129 | 129 | cleanup(); |
| 130 | 130 | $td->report(calc_ntests($n_tests, $n_compare_pdfs)); | ... | ... |