diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 9f076af..84c1c87 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -45,6 +45,11 @@ #include #include +namespace qpdf::is +{ + class OffsetBuffer; +} + class QPDF_Stream; class BitStream; class BitWriter; @@ -785,7 +790,7 @@ class QPDF QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); - QPDFObjectHandle readObjectInStream(BufferInputSource& input, int stream_id, int obj_id); + QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id); size_t recoverStreamLength( std::shared_ptr input, QPDFObjGen og, qpdf_offset_t stream_offset); QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 05b9a35..91334b1 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -12,6 +12,7 @@ #include using namespace std::literals; +using namespace qpdf; using ObjectPtr = std::shared_ptr; @@ -87,7 +88,7 @@ QPDFParser::parse( std::pair QPDFParser::parse( - BufferInputSource& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) + is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) { bool empty{false}; auto result = QPDFParser( diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index e434a5d..8bcd66c 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -1288,7 +1288,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset } QPDFObjectHandle -QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) +QPDF::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) { auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this); if (empty) { @@ -1645,12 +1645,26 @@ QPDF::resolveObjectsInStream(int obj_stream_number) "object stream " + std::to_string(obj_stream_number) + " has incorrect keys"); } - std::vector> offsets; + // id, offset, size + std::vector> offsets; auto bp = obj_stream.getStreamData(qpdf_dl_specialized); + BufferInputSource input("", bp.get()); + const auto b_size = bp->getSize(); + const auto end_offset = static_cast(b_size); + auto b_start = bp->getBuffer(); + + if (first >= end_offset) { + throw damagedPDF( + "object " + std::to_string(obj_stream_number) + " 0", + "object stream " + std::to_string(obj_stream_number) + " has invalid /First entry"); + } + + int id = 0; long long last_offset = -1; + bool is_first = true; for (unsigned int i = 0; i < n; ++i) { auto tnum = readToken(input); auto toffset = readToken(input); @@ -1682,26 +1696,45 @@ QPDF::resolveObjectsInStream(int obj_stream_number) std::to_string(last_offset) + ")")); continue; } - last_offset = offset; if (num > m->xref_table_max_id) { continue; } - offsets.emplace_back(num, offset + first); + if (first + offset >= end_offset) { + warn(damaged(num, offset, "offset is too large")); + continue; + } + + if (is_first) { + is_first = false; + } else { + offsets.emplace_back( + id, last_offset + first, static_cast(offset - last_offset)); + } + + last_offset = offset; + id = num; + } + + if (!is_first) { + // We found at least one valid entry. + offsets.emplace_back( + id, last_offset + first, b_size - static_cast(last_offset + first)); } // To avoid having to read the object stream multiple times, store all objects that would be // found here in the cache. Remember that some objects stored here might have been overridden // by new objects appended to the file, so it is necessary to recheck the xref table and only // cache what would actually be resolved here. - for (auto const& [id, offset]: offsets) { - QPDFObjGen og(id, 0); + for (auto const& [obj_id, obj_offset, obj_size]: offsets) { + QPDFObjGen og(obj_id, 0); auto entry = m->xref_table.find(og); if (entry != m->xref_table.end() && entry->second.getType() == 2 && entry->second.getObjStreamNumber() == obj_stream_number) { - input.seek(offset, SEEK_SET); - QPDFObjectHandle oh = readObjectInStream(input, obj_stream_number, id); + Buffer obj_buffer{b_start + obj_offset, obj_size}; + is::OffsetBuffer in("", &obj_buffer, obj_offset); + auto oh = readObjectInStream(in, obj_stream_number, obj_id); updateCache(og, oh.getObj(), end_before_space, end_after_space); } else { QTC::TC("qpdf", "QPDF not caching overridden objstm object"); diff --git a/libqpdf/qpdf/InputSource_private.hh b/libqpdf/qpdf/InputSource_private.hh index 6194615..00d27c9 100644 --- a/libqpdf/qpdf/InputSource_private.hh +++ b/libqpdf/qpdf/InputSource_private.hh @@ -1,8 +1,85 @@ #ifndef QPDF_INPUTSOURCE_PRIVATE_HH #define QPDF_INPUTSOURCE_PRIVATE_HH +#include #include +#include +#include +#include + +namespace qpdf::is +{ + class OffsetBuffer final: public InputSource + { + public: + OffsetBuffer(std::string const& description, Buffer* buf, qpdf_offset_t global_offset) : + proxied(description, buf), + global_offset(global_offset) + { + if (global_offset < 0) { + throw std::logic_error("is::OffsetBuffer constructed with negative offset"); + } + last_offset = global_offset; + } + + ~OffsetBuffer() final = default; + + qpdf_offset_t + findAndSkipNextEOL() final + { + return proxied.findAndSkipNextEOL() + global_offset; + } + + std::string const& + getName() const final + { + return proxied.getName(); + } + + qpdf_offset_t + tell() final + { + return proxied.tell() + global_offset; + } + + void + seek(qpdf_offset_t offset, int whence) final + { + if (whence == SEEK_SET) { + proxied.seek(offset - global_offset, whence); + } else { + proxied.seek(offset, whence); + } + } + + void + rewind() final + { + seek(0, SEEK_SET); + } + + size_t + read(char* buffer, size_t length) final + { + size_t result = proxied.read(buffer, length); + setLastOffset(proxied.getLastOffset() + global_offset); + return result; + } + + void + unreadCh(char ch) final + { + proxied.unreadCh(ch); + } + + private: + BufferInputSource proxied; + qpdf_offset_t global_offset; + }; + +} // namespace qpdf::is + inline size_t InputSource::read(std::string& str, size_t count, qpdf_offset_t at) { diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index 2bc76ae..fa0de4a 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -1,6 +1,7 @@ #ifndef QPDFPARSER_HH #define QPDFPARSER_HH +#include #include #include #include @@ -38,7 +39,7 @@ class QPDFParser QPDF& context); static std::pair parse( - BufferInputSource& input, + qpdf::is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, diff --git a/manual/release-notes.rst b/manual/release-notes.rst index 0f1a7c2..bfb502d 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -21,6 +21,11 @@ more detail. integer object. Previously the method returned false if the first dictionary object was not a linearization parameter dictionary. + = Fix parsing of object streams containing objects not seperated by + white-space. Pre-2020 editions of the PDF specification incorrectly + stated that white-space was required between objects. qpdf relied on this + when parsing object streams. + - Fix two object stream error/warning messages that reported the wrong object id. diff --git a/qpdf/qtest/object-stream.test b/qpdf/qtest/object-stream.test index 891f74a..7c3eb20 100644 --- a/qpdf/qtest/object-stream.test +++ b/qpdf/qtest/object-stream.test @@ -124,7 +124,7 @@ $td->runtest("adjacent compressed objects", {$td->COMMAND => "test_driver 99 no-space-compressed-object.pdf"}, {$td->FILE => "no-space-compressed-object.out", $td->EXIT_STATUS => 0}, - $td->EXPECT_FAILURE); + $td->NORMALIZE_NEWLINES); cleanup(); $td->report(calc_ntests($n_tests, $n_compare_pdfs));