Commit 65e7e3db36ac731fb56d3c675c3bacfcd21dec61
Committed by
GitHub
Merge pull request #1400 from m-holger/pr1266
Fix parsing of object streams containing objects with no white-space between them
Showing
7 changed files
with
134 additions
and
12 deletions
include/qpdf/QPDF.hh
| @@ -45,6 +45,11 @@ | @@ -45,6 +45,11 @@ | ||
| 45 | #include <qpdf/QPDFWriter.hh> | 45 | #include <qpdf/QPDFWriter.hh> |
| 46 | #include <qpdf/QPDFXRefEntry.hh> | 46 | #include <qpdf/QPDFXRefEntry.hh> |
| 47 | 47 | ||
| 48 | +namespace qpdf::is | ||
| 49 | +{ | ||
| 50 | + class OffsetBuffer; | ||
| 51 | +} | ||
| 52 | + | ||
| 48 | class QPDF_Stream; | 53 | class QPDF_Stream; |
| 49 | class BitStream; | 54 | class BitStream; |
| 50 | class BitWriter; | 55 | class BitWriter; |
| @@ -785,7 +790,7 @@ class QPDF | @@ -785,7 +790,7 @@ class QPDF | ||
| 785 | QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); | 790 | QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); |
| 786 | void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); | 791 | void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
| 787 | void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); | 792 | void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
| 788 | - QPDFObjectHandle readObjectInStream(BufferInputSource& input, int stream_id, int obj_id); | 793 | + QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id); |
| 789 | size_t recoverStreamLength( | 794 | size_t recoverStreamLength( |
| 790 | std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); | 795 | std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); |
| 791 | QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); | 796 | QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); |
libqpdf/QPDFParser.cc
| @@ -12,6 +12,7 @@ | @@ -12,6 +12,7 @@ | ||
| 12 | #include <memory> | 12 | #include <memory> |
| 13 | 13 | ||
| 14 | using namespace std::literals; | 14 | using namespace std::literals; |
| 15 | +using namespace qpdf; | ||
| 15 | 16 | ||
| 16 | using ObjectPtr = std::shared_ptr<QPDFObject>; | 17 | using ObjectPtr = std::shared_ptr<QPDFObject>; |
| 17 | 18 | ||
| @@ -87,7 +88,7 @@ QPDFParser::parse( | @@ -87,7 +88,7 @@ QPDFParser::parse( | ||
| 87 | 88 | ||
| 88 | std::pair<QPDFObjectHandle, bool> | 89 | std::pair<QPDFObjectHandle, bool> |
| 89 | QPDFParser::parse( | 90 | QPDFParser::parse( |
| 90 | - BufferInputSource& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) | 91 | + is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) |
| 91 | { | 92 | { |
| 92 | bool empty{false}; | 93 | bool empty{false}; |
| 93 | auto result = QPDFParser( | 94 | auto result = QPDFParser( |
libqpdf/QPDF_objects.cc
| @@ -1288,7 +1288,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset | @@ -1288,7 +1288,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset | ||
| 1288 | } | 1288 | } |
| 1289 | 1289 | ||
| 1290 | QPDFObjectHandle | 1290 | QPDFObjectHandle |
| 1291 | -QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) | 1291 | +QPDF::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) |
| 1292 | { | 1292 | { |
| 1293 | auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this); | 1293 | auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this); |
| 1294 | if (empty) { | 1294 | if (empty) { |
| @@ -1645,12 +1645,26 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -1645,12 +1645,26 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 1645 | "object stream " + std::to_string(obj_stream_number) + " has incorrect keys"); | 1645 | "object stream " + std::to_string(obj_stream_number) + " has incorrect keys"); |
| 1646 | } | 1646 | } |
| 1647 | 1647 | ||
| 1648 | - std::vector<std::pair<int, long long>> offsets; | 1648 | + // id, offset, size |
| 1649 | + std::vector<std::tuple<int, qpdf_offset_t, size_t>> offsets; | ||
| 1649 | 1650 | ||
| 1650 | auto bp = obj_stream.getStreamData(qpdf_dl_specialized); | 1651 | auto bp = obj_stream.getStreamData(qpdf_dl_specialized); |
| 1652 | + | ||
| 1651 | BufferInputSource input("", bp.get()); | 1653 | BufferInputSource input("", bp.get()); |
| 1652 | 1654 | ||
| 1655 | + const auto b_size = bp->getSize(); | ||
| 1656 | + const auto end_offset = static_cast<qpdf_offset_t>(b_size); | ||
| 1657 | + auto b_start = bp->getBuffer(); | ||
| 1658 | + | ||
| 1659 | + if (first >= end_offset) { | ||
| 1660 | + throw damagedPDF( | ||
| 1661 | + "object " + std::to_string(obj_stream_number) + " 0", | ||
| 1662 | + "object stream " + std::to_string(obj_stream_number) + " has invalid /First entry"); | ||
| 1663 | + } | ||
| 1664 | + | ||
| 1665 | + int id = 0; | ||
| 1653 | long long last_offset = -1; | 1666 | long long last_offset = -1; |
| 1667 | + bool is_first = true; | ||
| 1654 | for (unsigned int i = 0; i < n; ++i) { | 1668 | for (unsigned int i = 0; i < n; ++i) { |
| 1655 | auto tnum = readToken(input); | 1669 | auto tnum = readToken(input); |
| 1656 | auto toffset = readToken(input); | 1670 | auto toffset = readToken(input); |
| @@ -1682,26 +1696,45 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -1682,26 +1696,45 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 1682 | std::to_string(last_offset) + ")")); | 1696 | std::to_string(last_offset) + ")")); |
| 1683 | continue; | 1697 | continue; |
| 1684 | } | 1698 | } |
| 1685 | - last_offset = offset; | ||
| 1686 | 1699 | ||
| 1687 | if (num > m->xref_table_max_id) { | 1700 | if (num > m->xref_table_max_id) { |
| 1688 | continue; | 1701 | continue; |
| 1689 | } | 1702 | } |
| 1690 | 1703 | ||
| 1691 | - offsets.emplace_back(num, offset + first); | 1704 | + if (first + offset >= end_offset) { |
| 1705 | + warn(damaged(num, offset, "offset is too large")); | ||
| 1706 | + continue; | ||
| 1707 | + } | ||
| 1708 | + | ||
| 1709 | + if (is_first) { | ||
| 1710 | + is_first = false; | ||
| 1711 | + } else { | ||
| 1712 | + offsets.emplace_back( | ||
| 1713 | + id, last_offset + first, static_cast<size_t>(offset - last_offset)); | ||
| 1714 | + } | ||
| 1715 | + | ||
| 1716 | + last_offset = offset; | ||
| 1717 | + id = num; | ||
| 1718 | + } | ||
| 1719 | + | ||
| 1720 | + if (!is_first) { | ||
| 1721 | + // We found at least one valid entry. | ||
| 1722 | + offsets.emplace_back( | ||
| 1723 | + id, last_offset + first, b_size - static_cast<size_t>(last_offset + first)); | ||
| 1692 | } | 1724 | } |
| 1693 | 1725 | ||
| 1694 | // To avoid having to read the object stream multiple times, store all objects that would be | 1726 | // To avoid having to read the object stream multiple times, store all objects that would be |
| 1695 | // found here in the cache. Remember that some objects stored here might have been overridden | 1727 | // found here in the cache. Remember that some objects stored here might have been overridden |
| 1696 | // by new objects appended to the file, so it is necessary to recheck the xref table and only | 1728 | // by new objects appended to the file, so it is necessary to recheck the xref table and only |
| 1697 | // cache what would actually be resolved here. | 1729 | // cache what would actually be resolved here. |
| 1698 | - for (auto const& [id, offset]: offsets) { | ||
| 1699 | - QPDFObjGen og(id, 0); | 1730 | + for (auto const& [obj_id, obj_offset, obj_size]: offsets) { |
| 1731 | + QPDFObjGen og(obj_id, 0); | ||
| 1700 | auto entry = m->xref_table.find(og); | 1732 | auto entry = m->xref_table.find(og); |
| 1701 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && | 1733 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && |
| 1702 | entry->second.getObjStreamNumber() == obj_stream_number) { | 1734 | entry->second.getObjStreamNumber() == obj_stream_number) { |
| 1703 | - input.seek(offset, SEEK_SET); | ||
| 1704 | - QPDFObjectHandle oh = readObjectInStream(input, obj_stream_number, id); | 1735 | + Buffer obj_buffer{b_start + obj_offset, obj_size}; |
| 1736 | + is::OffsetBuffer in("", &obj_buffer, obj_offset); | ||
| 1737 | + auto oh = readObjectInStream(in, obj_stream_number, obj_id); | ||
| 1705 | updateCache(og, oh.getObj(), end_before_space, end_after_space); | 1738 | updateCache(og, oh.getObj(), end_before_space, end_after_space); |
| 1706 | } else { | 1739 | } else { |
| 1707 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); | 1740 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); |
libqpdf/qpdf/InputSource_private.hh
| 1 | #ifndef QPDF_INPUTSOURCE_PRIVATE_HH | 1 | #ifndef QPDF_INPUTSOURCE_PRIVATE_HH |
| 2 | #define QPDF_INPUTSOURCE_PRIVATE_HH | 2 | #define QPDF_INPUTSOURCE_PRIVATE_HH |
| 3 | 3 | ||
| 4 | +#include <qpdf/BufferInputSource.hh> | ||
| 4 | #include <qpdf/InputSource.hh> | 5 | #include <qpdf/InputSource.hh> |
| 5 | 6 | ||
| 7 | +#include <limits> | ||
| 8 | +#include <sstream> | ||
| 9 | +#include <stdexcept> | ||
| 10 | + | ||
| 11 | +namespace qpdf::is | ||
| 12 | +{ | ||
| 13 | + class OffsetBuffer final: public InputSource | ||
| 14 | + { | ||
| 15 | + public: | ||
| 16 | + OffsetBuffer(std::string const& description, Buffer* buf, qpdf_offset_t global_offset) : | ||
| 17 | + proxied(description, buf), | ||
| 18 | + global_offset(global_offset) | ||
| 19 | + { | ||
| 20 | + if (global_offset < 0) { | ||
| 21 | + throw std::logic_error("is::OffsetBuffer constructed with negative offset"); | ||
| 22 | + } | ||
| 23 | + last_offset = global_offset; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + ~OffsetBuffer() final = default; | ||
| 27 | + | ||
| 28 | + qpdf_offset_t | ||
| 29 | + findAndSkipNextEOL() final | ||
| 30 | + { | ||
| 31 | + return proxied.findAndSkipNextEOL() + global_offset; | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + std::string const& | ||
| 35 | + getName() const final | ||
| 36 | + { | ||
| 37 | + return proxied.getName(); | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + qpdf_offset_t | ||
| 41 | + tell() final | ||
| 42 | + { | ||
| 43 | + return proxied.tell() + global_offset; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + void | ||
| 47 | + seek(qpdf_offset_t offset, int whence) final | ||
| 48 | + { | ||
| 49 | + if (whence == SEEK_SET) { | ||
| 50 | + proxied.seek(offset - global_offset, whence); | ||
| 51 | + } else { | ||
| 52 | + proxied.seek(offset, whence); | ||
| 53 | + } | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + void | ||
| 57 | + rewind() final | ||
| 58 | + { | ||
| 59 | + seek(0, SEEK_SET); | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + size_t | ||
| 63 | + read(char* buffer, size_t length) final | ||
| 64 | + { | ||
| 65 | + size_t result = proxied.read(buffer, length); | ||
| 66 | + setLastOffset(proxied.getLastOffset() + global_offset); | ||
| 67 | + return result; | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + void | ||
| 71 | + unreadCh(char ch) final | ||
| 72 | + { | ||
| 73 | + proxied.unreadCh(ch); | ||
| 74 | + } | ||
| 75 | + | ||
| 76 | + private: | ||
| 77 | + BufferInputSource proxied; | ||
| 78 | + qpdf_offset_t global_offset; | ||
| 79 | + }; | ||
| 80 | + | ||
| 81 | +} // namespace qpdf::is | ||
| 82 | + | ||
| 6 | inline size_t | 83 | inline size_t |
| 7 | InputSource::read(std::string& str, size_t count, qpdf_offset_t at) | 84 | InputSource::read(std::string& str, size_t count, qpdf_offset_t at) |
| 8 | { | 85 | { |
libqpdf/qpdf/QPDFParser.hh
| 1 | #ifndef QPDFPARSER_HH | 1 | #ifndef QPDFPARSER_HH |
| 2 | #define QPDFPARSER_HH | 2 | #define QPDFPARSER_HH |
| 3 | 3 | ||
| 4 | +#include <qpdf/InputSource_private.hh> | ||
| 4 | #include <qpdf/QPDFObjectHandle_private.hh> | 5 | #include <qpdf/QPDFObjectHandle_private.hh> |
| 5 | #include <qpdf/QPDFObject_private.hh> | 6 | #include <qpdf/QPDFObject_private.hh> |
| 6 | #include <qpdf/QPDFTokenizer_private.hh> | 7 | #include <qpdf/QPDFTokenizer_private.hh> |
| @@ -38,7 +39,7 @@ class QPDFParser | @@ -38,7 +39,7 @@ class QPDFParser | ||
| 38 | QPDF& context); | 39 | QPDF& context); |
| 39 | 40 | ||
| 40 | static std::pair<QPDFObjectHandle, bool> parse( | 41 | static std::pair<QPDFObjectHandle, bool> parse( |
| 41 | - BufferInputSource& input, | 42 | + qpdf::is::OffsetBuffer& input, |
| 42 | int stream_id, | 43 | int stream_id, |
| 43 | int obj_id, | 44 | int obj_id, |
| 44 | qpdf::Tokenizer& tokenizer, | 45 | qpdf::Tokenizer& tokenizer, |
manual/release-notes.rst
| @@ -21,6 +21,11 @@ more detail. | @@ -21,6 +21,11 @@ more detail. | ||
| 21 | integer object. Previously the method returned false if the first | 21 | integer object. Previously the method returned false if the first |
| 22 | dictionary object was not a linearization parameter dictionary. | 22 | dictionary object was not a linearization parameter dictionary. |
| 23 | 23 | ||
| 24 | + = Fix parsing of object streams containing objects not seperated by | ||
| 25 | + white-space. Pre-2020 editions of the PDF specification incorrectly | ||
| 26 | + stated that white-space was required between objects. qpdf relied on this | ||
| 27 | + when parsing object streams. | ||
| 28 | + | ||
| 24 | - Fix two object stream error/warning messages that reported the wrong | 29 | - Fix two object stream error/warning messages that reported the wrong |
| 25 | object id. | 30 | object id. |
| 26 | 31 |
qpdf/qtest/object-stream.test
| @@ -124,7 +124,7 @@ $td->runtest("adjacent compressed objects", | @@ -124,7 +124,7 @@ $td->runtest("adjacent compressed objects", | ||
| 124 | {$td->COMMAND => "test_driver 99 no-space-compressed-object.pdf"}, | 124 | {$td->COMMAND => "test_driver 99 no-space-compressed-object.pdf"}, |
| 125 | {$td->FILE => "no-space-compressed-object.out", | 125 | {$td->FILE => "no-space-compressed-object.out", |
| 126 | $td->EXIT_STATUS => 0}, | 126 | $td->EXIT_STATUS => 0}, |
| 127 | - $td->EXPECT_FAILURE); | 127 | + $td->NORMALIZE_NEWLINES); |
| 128 | 128 | ||
| 129 | cleanup(); | 129 | cleanup(); |
| 130 | $td->report(calc_ntests($n_tests, $n_compare_pdfs)); | 130 | $td->report(calc_ntests($n_tests, $n_compare_pdfs)); |