Commit 65e7e3db36ac731fb56d3c675c3bacfcd21dec61

Authored by m-holger
Committed by GitHub
2 parents 5eb9a18a b37fc717

Merge pull request #1400 from m-holger/pr1266

Fix parsing of object streams containing objects with no white-space between them
include/qpdf/QPDF.hh
@@ -45,6 +45,11 @@ @@ -45,6 +45,11 @@
45 #include <qpdf/QPDFWriter.hh> 45 #include <qpdf/QPDFWriter.hh>
46 #include <qpdf/QPDFXRefEntry.hh> 46 #include <qpdf/QPDFXRefEntry.hh>
47 47
  48 +namespace qpdf::is
  49 +{
  50 + class OffsetBuffer;
  51 +}
  52 +
48 class QPDF_Stream; 53 class QPDF_Stream;
49 class BitStream; 54 class BitStream;
50 class BitWriter; 55 class BitWriter;
@@ -785,7 +790,7 @@ class QPDF @@ -785,7 +790,7 @@ class QPDF
785 QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); 790 QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
786 void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); 791 void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
787 void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); 792 void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
788 - QPDFObjectHandle readObjectInStream(BufferInputSource& input, int stream_id, int obj_id); 793 + QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id);
789 size_t recoverStreamLength( 794 size_t recoverStreamLength(
790 std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); 795 std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
791 QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); 796 QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0);
libqpdf/QPDFParser.cc
@@ -12,6 +12,7 @@ @@ -12,6 +12,7 @@
12 #include <memory> 12 #include <memory>
13 13
14 using namespace std::literals; 14 using namespace std::literals;
  15 +using namespace qpdf;
15 16
16 using ObjectPtr = std::shared_ptr<QPDFObject>; 17 using ObjectPtr = std::shared_ptr<QPDFObject>;
17 18
@@ -87,7 +88,7 @@ QPDFParser::parse( @@ -87,7 +88,7 @@ QPDFParser::parse(
87 88
88 std::pair<QPDFObjectHandle, bool> 89 std::pair<QPDFObjectHandle, bool>
89 QPDFParser::parse( 90 QPDFParser::parse(
90 - BufferInputSource& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) 91 + is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
91 { 92 {
92 bool empty{false}; 93 bool empty{false};
93 auto result = QPDFParser( 94 auto result = QPDFParser(
libqpdf/QPDF_objects.cc
@@ -1288,7 +1288,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset @@ -1288,7 +1288,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset
1288 } 1288 }
1289 1289
1290 QPDFObjectHandle 1290 QPDFObjectHandle
1291 -QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) 1291 +QPDF::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id)
1292 { 1292 {
1293 auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this); 1293 auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this);
1294 if (empty) { 1294 if (empty) {
@@ -1645,12 +1645,26 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -1645,12 +1645,26 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1645 "object stream " + std::to_string(obj_stream_number) + " has incorrect keys"); 1645 "object stream " + std::to_string(obj_stream_number) + " has incorrect keys");
1646 } 1646 }
1647 1647
1648 - std::vector<std::pair<int, long long>> offsets; 1648 + // id, offset, size
  1649 + std::vector<std::tuple<int, qpdf_offset_t, size_t>> offsets;
1649 1650
1650 auto bp = obj_stream.getStreamData(qpdf_dl_specialized); 1651 auto bp = obj_stream.getStreamData(qpdf_dl_specialized);
  1652 +
1651 BufferInputSource input("", bp.get()); 1653 BufferInputSource input("", bp.get());
1652 1654
  1655 + const auto b_size = bp->getSize();
  1656 + const auto end_offset = static_cast<qpdf_offset_t>(b_size);
  1657 + auto b_start = bp->getBuffer();
  1658 +
  1659 + if (first >= end_offset) {
  1660 + throw damagedPDF(
  1661 + "object " + std::to_string(obj_stream_number) + " 0",
  1662 + "object stream " + std::to_string(obj_stream_number) + " has invalid /First entry");
  1663 + }
  1664 +
  1665 + int id = 0;
1653 long long last_offset = -1; 1666 long long last_offset = -1;
  1667 + bool is_first = true;
1654 for (unsigned int i = 0; i < n; ++i) { 1668 for (unsigned int i = 0; i < n; ++i) {
1655 auto tnum = readToken(input); 1669 auto tnum = readToken(input);
1656 auto toffset = readToken(input); 1670 auto toffset = readToken(input);
@@ -1682,26 +1696,45 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -1682,26 +1696,45 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1682 std::to_string(last_offset) + ")")); 1696 std::to_string(last_offset) + ")"));
1683 continue; 1697 continue;
1684 } 1698 }
1685 - last_offset = offset;  
1686 1699
1687 if (num > m->xref_table_max_id) { 1700 if (num > m->xref_table_max_id) {
1688 continue; 1701 continue;
1689 } 1702 }
1690 1703
1691 - offsets.emplace_back(num, offset + first); 1704 + if (first + offset >= end_offset) {
  1705 + warn(damaged(num, offset, "offset is too large"));
  1706 + continue;
  1707 + }
  1708 +
  1709 + if (is_first) {
  1710 + is_first = false;
  1711 + } else {
  1712 + offsets.emplace_back(
  1713 + id, last_offset + first, static_cast<size_t>(offset - last_offset));
  1714 + }
  1715 +
  1716 + last_offset = offset;
  1717 + id = num;
  1718 + }
  1719 +
  1720 + if (!is_first) {
  1721 + // We found at least one valid entry.
  1722 + offsets.emplace_back(
  1723 + id, last_offset + first, b_size - static_cast<size_t>(last_offset + first));
1692 } 1724 }
1693 1725
1694 // To avoid having to read the object stream multiple times, store all objects that would be 1726 // To avoid having to read the object stream multiple times, store all objects that would be
1695 // found here in the cache. Remember that some objects stored here might have been overridden 1727 // found here in the cache. Remember that some objects stored here might have been overridden
1696 // by new objects appended to the file, so it is necessary to recheck the xref table and only 1728 // by new objects appended to the file, so it is necessary to recheck the xref table and only
1697 // cache what would actually be resolved here. 1729 // cache what would actually be resolved here.
1698 - for (auto const& [id, offset]: offsets) {  
1699 - QPDFObjGen og(id, 0); 1730 + for (auto const& [obj_id, obj_offset, obj_size]: offsets) {
  1731 + QPDFObjGen og(obj_id, 0);
1700 auto entry = m->xref_table.find(og); 1732 auto entry = m->xref_table.find(og);
1701 if (entry != m->xref_table.end() && entry->second.getType() == 2 && 1733 if (entry != m->xref_table.end() && entry->second.getType() == 2 &&
1702 entry->second.getObjStreamNumber() == obj_stream_number) { 1734 entry->second.getObjStreamNumber() == obj_stream_number) {
1703 - input.seek(offset, SEEK_SET);  
1704 - QPDFObjectHandle oh = readObjectInStream(input, obj_stream_number, id); 1735 + Buffer obj_buffer{b_start + obj_offset, obj_size};
  1736 + is::OffsetBuffer in("", &obj_buffer, obj_offset);
  1737 + auto oh = readObjectInStream(in, obj_stream_number, obj_id);
1705 updateCache(og, oh.getObj(), end_before_space, end_after_space); 1738 updateCache(og, oh.getObj(), end_before_space, end_after_space);
1706 } else { 1739 } else {
1707 QTC::TC("qpdf", "QPDF not caching overridden objstm object"); 1740 QTC::TC("qpdf", "QPDF not caching overridden objstm object");
libqpdf/qpdf/InputSource_private.hh
1 #ifndef QPDF_INPUTSOURCE_PRIVATE_HH 1 #ifndef QPDF_INPUTSOURCE_PRIVATE_HH
2 #define QPDF_INPUTSOURCE_PRIVATE_HH 2 #define QPDF_INPUTSOURCE_PRIVATE_HH
3 3
  4 +#include <qpdf/BufferInputSource.hh>
4 #include <qpdf/InputSource.hh> 5 #include <qpdf/InputSource.hh>
5 6
  7 +#include <limits>
  8 +#include <sstream>
  9 +#include <stdexcept>
  10 +
  11 +namespace qpdf::is
  12 +{
  13 + class OffsetBuffer final: public InputSource
  14 + {
  15 + public:
  16 + OffsetBuffer(std::string const& description, Buffer* buf, qpdf_offset_t global_offset) :
  17 + proxied(description, buf),
  18 + global_offset(global_offset)
  19 + {
  20 + if (global_offset < 0) {
  21 + throw std::logic_error("is::OffsetBuffer constructed with negative offset");
  22 + }
  23 + last_offset = global_offset;
  24 + }
  25 +
  26 + ~OffsetBuffer() final = default;
  27 +
  28 + qpdf_offset_t
  29 + findAndSkipNextEOL() final
  30 + {
  31 + return proxied.findAndSkipNextEOL() + global_offset;
  32 + }
  33 +
  34 + std::string const&
  35 + getName() const final
  36 + {
  37 + return proxied.getName();
  38 + }
  39 +
  40 + qpdf_offset_t
  41 + tell() final
  42 + {
  43 + return proxied.tell() + global_offset;
  44 + }
  45 +
  46 + void
  47 + seek(qpdf_offset_t offset, int whence) final
  48 + {
  49 + if (whence == SEEK_SET) {
  50 + proxied.seek(offset - global_offset, whence);
  51 + } else {
  52 + proxied.seek(offset, whence);
  53 + }
  54 + }
  55 +
  56 + void
  57 + rewind() final
  58 + {
  59 + seek(0, SEEK_SET);
  60 + }
  61 +
  62 + size_t
  63 + read(char* buffer, size_t length) final
  64 + {
  65 + size_t result = proxied.read(buffer, length);
  66 + setLastOffset(proxied.getLastOffset() + global_offset);
  67 + return result;
  68 + }
  69 +
  70 + void
  71 + unreadCh(char ch) final
  72 + {
  73 + proxied.unreadCh(ch);
  74 + }
  75 +
  76 + private:
  77 + BufferInputSource proxied;
  78 + qpdf_offset_t global_offset;
  79 + };
  80 +
  81 +} // namespace qpdf::is
  82 +
6 inline size_t 83 inline size_t
7 InputSource::read(std::string& str, size_t count, qpdf_offset_t at) 84 InputSource::read(std::string& str, size_t count, qpdf_offset_t at)
8 { 85 {
libqpdf/qpdf/QPDFParser.hh
1 #ifndef QPDFPARSER_HH 1 #ifndef QPDFPARSER_HH
2 #define QPDFPARSER_HH 2 #define QPDFPARSER_HH
3 3
  4 +#include <qpdf/InputSource_private.hh>
4 #include <qpdf/QPDFObjectHandle_private.hh> 5 #include <qpdf/QPDFObjectHandle_private.hh>
5 #include <qpdf/QPDFObject_private.hh> 6 #include <qpdf/QPDFObject_private.hh>
6 #include <qpdf/QPDFTokenizer_private.hh> 7 #include <qpdf/QPDFTokenizer_private.hh>
@@ -38,7 +39,7 @@ class QPDFParser @@ -38,7 +39,7 @@ class QPDFParser
38 QPDF& context); 39 QPDF& context);
39 40
40 static std::pair<QPDFObjectHandle, bool> parse( 41 static std::pair<QPDFObjectHandle, bool> parse(
41 - BufferInputSource& input, 42 + qpdf::is::OffsetBuffer& input,
42 int stream_id, 43 int stream_id,
43 int obj_id, 44 int obj_id,
44 qpdf::Tokenizer& tokenizer, 45 qpdf::Tokenizer& tokenizer,
manual/release-notes.rst
@@ -21,6 +21,11 @@ more detail. @@ -21,6 +21,11 @@ more detail.
21 integer object. Previously the method returned false if the first 21 integer object. Previously the method returned false if the first
22 dictionary object was not a linearization parameter dictionary. 22 dictionary object was not a linearization parameter dictionary.
23 23
  24 + = Fix parsing of object streams containing objects not seperated by
  25 + white-space. Pre-2020 editions of the PDF specification incorrectly
  26 + stated that white-space was required between objects. qpdf relied on this
  27 + when parsing object streams.
  28 +
24 - Fix two object stream error/warning messages that reported the wrong 29 - Fix two object stream error/warning messages that reported the wrong
25 object id. 30 object id.
26 31
qpdf/qtest/object-stream.test
@@ -124,7 +124,7 @@ $td-&gt;runtest(&quot;adjacent compressed objects&quot;, @@ -124,7 +124,7 @@ $td-&gt;runtest(&quot;adjacent compressed objects&quot;,
124 {$td->COMMAND => "test_driver 99 no-space-compressed-object.pdf"}, 124 {$td->COMMAND => "test_driver 99 no-space-compressed-object.pdf"},
125 {$td->FILE => "no-space-compressed-object.out", 125 {$td->FILE => "no-space-compressed-object.out",
126 $td->EXIT_STATUS => 0}, 126 $td->EXIT_STATUS => 0},
127 - $td->EXPECT_FAILURE); 127 + $td->NORMALIZE_NEWLINES);
128 128
129 cleanup(); 129 cleanup();
130 $td->report(calc_ntests($n_tests, $n_compare_pdfs)); 130 $td->report(calc_ntests($n_tests, $n_compare_pdfs));