Commit fe4853fed484a73bc355af53259d26b48e68215d
1 parent
7061ee1c
Refactor `QPDFParser` to simplify `parse` methods by removing `empty` flag, adju…
…sting return values for uninitialized objects, and cleaning up error handling logic.
Showing
10 changed files
with
102 additions
and
148 deletions
libqpdf/QPDFParser.cc
| @@ -51,7 +51,6 @@ QPDFObjectHandle | @@ -51,7 +51,6 @@ QPDFObjectHandle | ||
| 51 | QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) | 51 | QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) |
| 52 | { | 52 | { |
| 53 | qpdf::Tokenizer tokenizer; | 53 | qpdf::Tokenizer tokenizer; |
| 54 | - bool empty = false; | ||
| 55 | if (auto result = QPDFParser( | 54 | if (auto result = QPDFParser( |
| 56 | input, | 55 | input, |
| 57 | make_description(input.getName(), object_description), | 56 | make_description(input.getName(), object_description), |
| @@ -60,7 +59,7 @@ QPDFParser::parse(InputSource& input, std::string const& object_description, QPD | @@ -60,7 +59,7 @@ QPDFParser::parse(InputSource& input, std::string const& object_description, QPD | ||
| 60 | nullptr, | 59 | nullptr, |
| 61 | context, | 60 | context, |
| 62 | false) | 61 | false) |
| 63 | - .parse(empty, false)) { | 62 | + .parse()) { |
| 64 | return result; | 63 | return result; |
| 65 | } | 64 | } |
| 66 | return {QPDFObject::create<QPDF_Null>()}; | 65 | return {QPDFObject::create<QPDF_Null>()}; |
| @@ -73,23 +72,24 @@ QPDFParser::parse_content( | @@ -73,23 +72,24 @@ QPDFParser::parse_content( | ||
| 73 | qpdf::Tokenizer& tokenizer, | 72 | qpdf::Tokenizer& tokenizer, |
| 74 | QPDF* context) | 73 | QPDF* context) |
| 75 | { | 74 | { |
| 76 | - bool empty = false; | ||
| 77 | - if (auto result = QPDFParser( | ||
| 78 | - input, | ||
| 79 | - std::move(sp_description), | ||
| 80 | - "content", | ||
| 81 | - tokenizer, | ||
| 82 | - nullptr, | ||
| 83 | - context, | ||
| 84 | - true, | ||
| 85 | - 0, | ||
| 86 | - 0, | ||
| 87 | - context && context->doc().reconstructed_xref()) | ||
| 88 | - .parse(empty, true)) { | 75 | + static const std::string content("content"); // GCC12 - make constexpr |
| 76 | + auto p = QPDFParser( | ||
| 77 | + input, | ||
| 78 | + std::move(sp_description), | ||
| 79 | + content, | ||
| 80 | + tokenizer, | ||
| 81 | + nullptr, | ||
| 82 | + context, | ||
| 83 | + true, | ||
| 84 | + 0, | ||
| 85 | + 0, | ||
| 86 | + context && context->doc().reconstructed_xref()); | ||
| 87 | + auto result = p.parse(true); | ||
| 88 | + if (result || p.empty_) { | ||
| 89 | + // In content stream mode, leave object uninitialized to indicate EOF | ||
| 89 | return result; | 90 | return result; |
| 90 | } | 91 | } |
| 91 | - // In content stream mode, leave object uninitialized to indicate EOF | ||
| 92 | - return {empty ? nullptr : QPDFObject::create<QPDF_Null>()}; | 92 | + return {QPDFObject::create<QPDF_Null>()}; |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | QPDFObjectHandle | 95 | QPDFObjectHandle |
| @@ -101,21 +101,25 @@ QPDFParser::parse( | @@ -101,21 +101,25 @@ QPDFParser::parse( | ||
| 101 | QPDFObjectHandle::StringDecrypter* decrypter, | 101 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 102 | QPDF* context) | 102 | QPDF* context) |
| 103 | { | 103 | { |
| 104 | - if (auto result = QPDFParser( | ||
| 105 | - input, | ||
| 106 | - make_description(input.getName(), object_description), | ||
| 107 | - object_description, | ||
| 108 | - *tokenizer.m, | ||
| 109 | - decrypter, | ||
| 110 | - context, | ||
| 111 | - false) | ||
| 112 | - .parse(empty, false)) { | 104 | + // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the |
| 105 | + // only user of the 'empty' member. When removing this overload also remove 'empty'. | ||
| 106 | + auto p = QPDFParser( | ||
| 107 | + input, | ||
| 108 | + make_description(input.getName(), object_description), | ||
| 109 | + object_description, | ||
| 110 | + *tokenizer.m, | ||
| 111 | + decrypter, | ||
| 112 | + context, | ||
| 113 | + false); | ||
| 114 | + auto result = p.parse(); | ||
| 115 | + empty = p.empty_; | ||
| 116 | + if (result) { | ||
| 113 | return result; | 117 | return result; |
| 114 | } | 118 | } |
| 115 | return {QPDFObject::create<QPDF_Null>()}; | 119 | return {QPDFObject::create<QPDF_Null>()}; |
| 116 | } | 120 | } |
| 117 | 121 | ||
| 118 | -std::pair<QPDFObjectHandle, bool> | 122 | +QPDFObjectHandle |
| 119 | QPDFParser::parse( | 123 | QPDFParser::parse( |
| 120 | InputSource& input, | 124 | InputSource& input, |
| 121 | std::string const& object_description, | 125 | std::string const& object_description, |
| @@ -124,55 +128,44 @@ QPDFParser::parse( | @@ -124,55 +128,44 @@ QPDFParser::parse( | ||
| 124 | QPDF& context, | 128 | QPDF& context, |
| 125 | bool sanity_checks) | 129 | bool sanity_checks) |
| 126 | { | 130 | { |
| 127 | - bool empty{false}; | ||
| 128 | - auto result = QPDFParser( | ||
| 129 | - input, | ||
| 130 | - make_description(input.getName(), object_description), | ||
| 131 | - object_description, | ||
| 132 | - tokenizer, | ||
| 133 | - decrypter, | ||
| 134 | - &context, | ||
| 135 | - true, | ||
| 136 | - 0, | ||
| 137 | - 0, | ||
| 138 | - sanity_checks) | ||
| 139 | - .parse(empty, false); | ||
| 140 | - if (result) { | ||
| 141 | - return {result, empty}; | ||
| 142 | - } | ||
| 143 | - return {QPDFObject::create<QPDF_Null>(), empty}; | 131 | + return QPDFParser( |
| 132 | + input, | ||
| 133 | + make_description(input.getName(), object_description), | ||
| 134 | + object_description, | ||
| 135 | + tokenizer, | ||
| 136 | + decrypter, | ||
| 137 | + &context, | ||
| 138 | + true, | ||
| 139 | + 0, | ||
| 140 | + 0, | ||
| 141 | + sanity_checks) | ||
| 142 | + .parse(); | ||
| 144 | } | 143 | } |
| 145 | 144 | ||
| 146 | -std::pair<QPDFObjectHandle, bool> | 145 | +QPDFObjectHandle |
| 147 | QPDFParser::parse( | 146 | QPDFParser::parse( |
| 148 | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) | 147 | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) |
| 149 | { | 148 | { |
| 150 | - bool empty{false}; | ||
| 151 | - auto result = QPDFParser( | ||
| 152 | - input, | ||
| 153 | - std::make_shared<QPDFObject::Description>( | ||
| 154 | - QPDFObject::ObjStreamDescr(stream_id, obj_id)), | ||
| 155 | - "", | ||
| 156 | - tokenizer, | ||
| 157 | - nullptr, | ||
| 158 | - &context, | ||
| 159 | - true, | ||
| 160 | - stream_id, | ||
| 161 | - obj_id) | ||
| 162 | - .parse(empty, false); | ||
| 163 | - | ||
| 164 | - if (result) { | ||
| 165 | - return {result, empty}; | ||
| 166 | - } | ||
| 167 | - return {QPDFObject::create<QPDF_Null>(), empty}; | 149 | + return QPDFParser( |
| 150 | + input, | ||
| 151 | + std::make_shared<QPDFObject::Description>( | ||
| 152 | + QPDFObject::ObjStreamDescr(stream_id, obj_id)), | ||
| 153 | + "", | ||
| 154 | + tokenizer, | ||
| 155 | + nullptr, | ||
| 156 | + &context, | ||
| 157 | + true, | ||
| 158 | + stream_id, | ||
| 159 | + obj_id) | ||
| 160 | + .parse(); | ||
| 168 | } | 161 | } |
| 169 | 162 | ||
| 170 | QPDFObjectHandle | 163 | QPDFObjectHandle |
| 171 | -QPDFParser::parse(bool& empty, bool content_stream) | 164 | +QPDFParser::parse(bool content_stream) |
| 172 | { | 165 | { |
| 173 | try { | 166 | try { |
| 174 | - return parse_first(empty, content_stream); | ||
| 175 | - } catch (Error& e) { | 167 | + return parse_first(content_stream); |
| 168 | + } catch (Error&) { | ||
| 176 | return {}; | 169 | return {}; |
| 177 | } catch (QPDFExc& e) { | 170 | } catch (QPDFExc& e) { |
| 178 | throw e; | 171 | throw e; |
| @@ -185,15 +178,14 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -185,15 +178,14 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 185 | } | 178 | } |
| 186 | 179 | ||
| 187 | QPDFObjectHandle | 180 | QPDFObjectHandle |
| 188 | -QPDFParser::parse_first(bool& empty, bool content_stream) | 181 | +QPDFParser::parse_first(bool content_stream) |
| 189 | { | 182 | { |
| 190 | // This method must take care not to resolve any objects. Don't check the type of any object | 183 | // This method must take care not to resolve any objects. Don't check the type of any object |
| 191 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side | 184 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
| 192 | // effect of reading the object and changing the file pointer. If you do this, it will cause a | 185 | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
| 193 | // logic error to be thrown from QPDF::inParse(). | 186 | // logic error to be thrown from QPDF::inParse(). |
| 194 | 187 | ||
| 195 | - ParseGuard pg(context); | ||
| 196 | - empty = false; | 188 | + QPDF::Doc::ParseGuard pg(context); |
| 197 | start = input.tell(); | 189 | start = input.tell(); |
| 198 | if (!tokenizer.nextToken(input, object_description)) { | 190 | if (!tokenizer.nextToken(input, object_description)) { |
| 199 | warn(tokenizer.getErrorMessage()); | 191 | warn(tokenizer.getErrorMessage()); |
| @@ -203,7 +195,7 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | @@ -203,7 +195,7 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | ||
| 203 | case QPDFTokenizer::tt_eof: | 195 | case QPDFTokenizer::tt_eof: |
| 204 | if (content_stream) { | 196 | if (content_stream) { |
| 205 | // In content stream mode, leave object uninitialized to indicate EOF | 197 | // In content stream mode, leave object uninitialized to indicate EOF |
| 206 | - empty = true; | 198 | + empty_ = true; |
| 207 | return {}; | 199 | return {}; |
| 208 | } | 200 | } |
| 209 | warn("unexpected EOF"); | 201 | warn("unexpected EOF"); |
| @@ -255,10 +247,15 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | @@ -255,10 +247,15 @@ QPDFParser::parse_first(bool& empty, bool content_stream) | ||
| 255 | if (content_stream) { | 247 | if (content_stream) { |
| 256 | return withDescription<QPDF_Operator>(value); | 248 | return withDescription<QPDF_Operator>(value); |
| 257 | } else if (value == "endobj") { | 249 | } else if (value == "endobj") { |
| 258 | - // We just saw endobj without having read anything. Treat this as a null and do | ||
| 259 | - // not move the input source's offset. | 250 | + // We just saw endobj without having read anything. Nothing in the PDF spec appears |
| 251 | + // to allow empty objects, but they have been encountered in actual PDF files and | ||
| 252 | + // Adobe Reader appears to ignore them. Treat this as a null and do not move the | ||
| 253 | + // input source's offset. | ||
| 254 | + empty_ = true; | ||
| 260 | input.seek(input.getLastOffset(), SEEK_SET); | 255 | input.seek(input.getLastOffset(), SEEK_SET); |
| 261 | - empty = true; | 256 | + if (!content_stream) { |
| 257 | + warn("empty object treated as null"); | ||
| 258 | + } | ||
| 262 | return {}; | 259 | return {}; |
| 263 | } else { | 260 | } else { |
| 264 | warn("unknown token while reading object; treating as string"); | 261 | warn("unknown token while reading object; treating as string"); |
libqpdf/QPDF_objects.cc
| @@ -1233,13 +1233,9 @@ QPDFObjectHandle | @@ -1233,13 +1233,9 @@ QPDFObjectHandle | ||
| 1233 | Objects::readTrailer() | 1233 | Objects::readTrailer() |
| 1234 | { | 1234 | { |
| 1235 | qpdf_offset_t offset = m->file->tell(); | 1235 | qpdf_offset_t offset = m->file->tell(); |
| 1236 | - auto [object, empty] = | 1236 | + auto object = |
| 1237 | QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref); | 1237 | QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref); |
| 1238 | - if (empty) { | ||
| 1239 | - // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in | ||
| 1240 | - // actual PDF files and Adobe Reader appears to ignore them. | ||
| 1241 | - warn(damagedPDF("trailer", "empty object treated as null")); | ||
| 1242 | - } else if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) { | 1238 | + if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) { |
| 1243 | warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); | 1239 | warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); |
| 1244 | } | 1240 | } |
| 1245 | // Override last_offset so that it points to the beginning of the object we just read | 1241 | // Override last_offset so that it points to the beginning of the object we just read |
| @@ -1255,19 +1251,15 @@ Objects::readObject(std::string const& description, QPDFObjGen og) | @@ -1255,19 +1251,15 @@ Objects::readObject(std::string const& description, QPDFObjGen og) | ||
| 1255 | 1251 | ||
| 1256 | StringDecrypter decrypter{&qpdf, og}; | 1252 | StringDecrypter decrypter{&qpdf, og}; |
| 1257 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; | 1253 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; |
| 1258 | - auto [object, empty] = QPDFParser::parse( | 1254 | + auto object = QPDFParser::parse( |
| 1259 | *m->file, | 1255 | *m->file, |
| 1260 | m->last_object_description, | 1256 | m->last_object_description, |
| 1261 | m->tokenizer, | 1257 | m->tokenizer, |
| 1262 | decrypter_ptr, | 1258 | decrypter_ptr, |
| 1263 | qpdf, | 1259 | qpdf, |
| 1264 | m->reconstructed_xref || m->in_read_xref_stream); | 1260 | m->reconstructed_xref || m->in_read_xref_stream); |
| 1265 | - ; | ||
| 1266 | - if (empty) { | ||
| 1267 | - // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in | ||
| 1268 | - // actual PDF files and Adobe Reader appears to ignore them. | ||
| 1269 | - warn(damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null")); | ||
| 1270 | - return object; | 1261 | + if (!object) { |
| 1262 | + return {}; | ||
| 1271 | } | 1263 | } |
| 1272 | auto token = readToken(*m->file); | 1264 | auto token = readToken(*m->file); |
| 1273 | if (object.isDictionary() && token.isWord("stream")) { | 1265 | if (object.isDictionary() && token.isWord("stream")) { |
| @@ -1366,24 +1358,6 @@ Objects::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_off | @@ -1366,24 +1358,6 @@ Objects::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_off | ||
| 1366 | } | 1358 | } |
| 1367 | } | 1359 | } |
| 1368 | 1360 | ||
| 1369 | -QPDFObjectHandle | ||
| 1370 | -Objects::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id) | ||
| 1371 | -{ | ||
| 1372 | - auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, qpdf); | ||
| 1373 | - if (empty) { | ||
| 1374 | - // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in | ||
| 1375 | - // actual PDF files and Adobe Reader appears to ignore them. | ||
| 1376 | - warn(QPDFExc( | ||
| 1377 | - qpdf_e_damaged_pdf, | ||
| 1378 | - m->file->getName() + " object stream " + std::to_string(stream_id), | ||
| 1379 | - +"object " + std::to_string(obj_id) + " 0, offset " + | ||
| 1380 | - std::to_string(input.getLastOffset()), | ||
| 1381 | - 0, | ||
| 1382 | - "empty object treated as null")); | ||
| 1383 | - } | ||
| 1384 | - return object; | ||
| 1385 | -} | ||
| 1386 | - | ||
| 1387 | bool | 1361 | bool |
| 1388 | Objects ::findEndstream() | 1362 | Objects ::findEndstream() |
| 1389 | { | 1363 | { |
| @@ -1536,25 +1510,25 @@ Objects::readObjectAtOffset( | @@ -1536,25 +1510,25 @@ Objects::readObjectAtOffset( | ||
| 1536 | return; | 1510 | return; |
| 1537 | } | 1511 | } |
| 1538 | 1512 | ||
| 1539 | - QPDFObjectHandle oh = readObject(description, og); | 1513 | + if (auto oh = readObject(description, og)) { |
| 1514 | + // Determine the end offset of this object before and after white space. We use these | ||
| 1515 | + // numbers to validate linearization hint tables. Offsets and lengths of objects may imply | ||
| 1516 | + // the end of an object to be anywhere between these values. | ||
| 1517 | + qpdf_offset_t end_before_space = m->file->tell(); | ||
| 1540 | 1518 | ||
| 1541 | - // Determine the end offset of this object before and after white space. We use these | ||
| 1542 | - // numbers to validate linearization hint tables. Offsets and lengths of objects may imply | ||
| 1543 | - // the end of an object to be anywhere between these values. | ||
| 1544 | - qpdf_offset_t end_before_space = m->file->tell(); | ||
| 1545 | - | ||
| 1546 | - // skip over spaces | ||
| 1547 | - while (true) { | ||
| 1548 | - char ch; | ||
| 1549 | - if (!m->file->read(&ch, 1)) { | ||
| 1550 | - throw damagedPDF(m->file->tell(), "EOF after endobj"); | ||
| 1551 | - } | ||
| 1552 | - if (!isspace(static_cast<unsigned char>(ch))) { | ||
| 1553 | - m->file->seek(-1, SEEK_CUR); | ||
| 1554 | - break; | 1519 | + // skip over spaces |
| 1520 | + while (true) { | ||
| 1521 | + char ch; | ||
| 1522 | + if (!m->file->read(&ch, 1)) { | ||
| 1523 | + throw damagedPDF(m->file->tell(), "EOF after endobj"); | ||
| 1524 | + } | ||
| 1525 | + if (!isspace(static_cast<unsigned char>(ch))) { | ||
| 1526 | + m->file->seek(-1, SEEK_CUR); | ||
| 1527 | + break; | ||
| 1528 | + } | ||
| 1555 | } | 1529 | } |
| 1530 | + m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell()); | ||
| 1556 | } | 1531 | } |
| 1557 | - m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell()); | ||
| 1558 | } | 1532 | } |
| 1559 | 1533 | ||
| 1560 | QPDFObjectHandle | 1534 | QPDFObjectHandle |
| @@ -1564,7 +1538,7 @@ Objects::readObjectAtOffset( | @@ -1564,7 +1538,7 @@ Objects::readObjectAtOffset( | ||
| 1564 | auto og = read_object_start(offset); | 1538 | auto og = read_object_start(offset); |
| 1565 | auto oh = readObject(description, og); | 1539 | auto oh = readObject(description, og); |
| 1566 | 1540 | ||
| 1567 | - if (!m->objects.isUnresolved(og)) { | 1541 | + if (!oh || !m->objects.isUnresolved(og)) { |
| 1568 | return oh; | 1542 | return oh; |
| 1569 | } | 1543 | } |
| 1570 | 1544 | ||
| @@ -1804,8 +1778,9 @@ Objects::resolveObjectsInStream(int obj_stream_number) | @@ -1804,8 +1778,9 @@ Objects::resolveObjectsInStream(int obj_stream_number) | ||
| 1804 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && | 1778 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && |
| 1805 | entry->second.getObjStreamNumber() == obj_stream_number) { | 1779 | entry->second.getObjStreamNumber() == obj_stream_number) { |
| 1806 | is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); | 1780 | is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); |
| 1807 | - auto oh = readObjectInStream(in, obj_stream_number, obj_id); | ||
| 1808 | - updateCache(og, oh.obj_sp(), end_before_space, end_after_space); | 1781 | + if (auto oh = QPDFParser::parse(in, obj_stream_number, obj_id, m->tokenizer, qpdf)) { |
| 1782 | + updateCache(og, oh.obj_sp(), end_before_space, end_after_space); | ||
| 1783 | + } | ||
| 1809 | } else { | 1784 | } else { |
| 1810 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); | 1785 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); |
| 1811 | } | 1786 | } |
libqpdf/qpdf/QPDFParser.hh
| @@ -41,8 +41,8 @@ class QPDFParser | @@ -41,8 +41,8 @@ class QPDFParser | ||
| 41 | QPDFObjectHandle::StringDecrypter* decrypter, | 41 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 42 | QPDF* context); | 42 | QPDF* context); |
| 43 | 43 | ||
| 44 | - // For use by QPDF. Return parsed object and whether it is empty. | ||
| 45 | - static std::pair<QPDFObjectHandle, bool> parse( | 44 | + // For use by QPDF. |
| 45 | + static QPDFObjectHandle parse( | ||
| 46 | InputSource& input, | 46 | InputSource& input, |
| 47 | std::string const& object_description, | 47 | std::string const& object_description, |
| 48 | qpdf::Tokenizer& tokenizer, | 48 | qpdf::Tokenizer& tokenizer, |
| @@ -50,7 +50,7 @@ class QPDFParser | @@ -50,7 +50,7 @@ class QPDFParser | ||
| 50 | QPDF& context, | 50 | QPDF& context, |
| 51 | bool sanity_checks); | 51 | bool sanity_checks); |
| 52 | 52 | ||
| 53 | - static std::pair<QPDFObjectHandle, bool> parse( | 53 | + static QPDFObjectHandle parse( |
| 54 | qpdf::is::OffsetBuffer& input, | 54 | qpdf::is::OffsetBuffer& input, |
| 55 | int stream_id, | 55 | int stream_id, |
| 56 | int obj_id, | 56 | int obj_id, |
| @@ -112,8 +112,8 @@ class QPDFParser | @@ -112,8 +112,8 @@ class QPDFParser | ||
| 112 | int null_count{0}; | 112 | int null_count{0}; |
| 113 | }; | 113 | }; |
| 114 | 114 | ||
| 115 | - QPDFObjectHandle parse(bool& empty, bool content_stream); | ||
| 116 | - QPDFObjectHandle parse_first(bool& empty, bool content_stream); | 115 | + QPDFObjectHandle parse(bool content_stream = false); |
| 116 | + QPDFObjectHandle parse_first(bool content_stream); | ||
| 117 | QPDFObjectHandle parseRemainder(bool content_stream); | 117 | QPDFObjectHandle parseRemainder(bool content_stream); |
| 118 | void add(std::shared_ptr<QPDFObject>&& obj); | 118 | void add(std::shared_ptr<QPDFObject>&& obj); |
| 119 | void addNull(); | 119 | void addNull(); |
| @@ -158,6 +158,7 @@ class QPDFParser | @@ -158,6 +158,7 @@ class QPDFParser | ||
| 158 | int int_count{0}; | 158 | int int_count{0}; |
| 159 | long long int_buffer[2]{0, 0}; | 159 | long long int_buffer[2]{0, 0}; |
| 160 | qpdf_offset_t last_offset_buffer[2]{0, 0}; | 160 | qpdf_offset_t last_offset_buffer[2]{0, 0}; |
| 161 | + bool empty_{false}; | ||
| 161 | }; | 162 | }; |
| 162 | 163 | ||
| 163 | #endif // QPDFPARSER_HH | 164 | #endif // QPDFPARSER_HH |
libqpdf/qpdf/QPDF_private.hh
| @@ -1039,7 +1039,6 @@ class QPDF::Doc::Objects: Common | @@ -1039,7 +1039,6 @@ class QPDF::Doc::Objects: Common | ||
| 1039 | QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); | 1039 | QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); |
| 1040 | void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); | 1040 | void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
| 1041 | void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); | 1041 | void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); |
| 1042 | - QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id); | ||
| 1043 | size_t recoverStreamLength( | 1042 | size_t recoverStreamLength( |
| 1044 | std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); | 1043 | std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); |
| 1045 | 1044 |
qpdf/qtest/qpdf/issue-100.out
| @@ -7,7 +7,6 @@ WARNING: issue-100.pdf (trailer, offset 950): recovered trailer has no /Root ent | @@ -7,7 +7,6 @@ WARNING: issue-100.pdf (trailer, offset 950): recovered trailer has no /Root ent | ||
| 7 | WARNING: issue-100.pdf (trailer, offset 488): stream keyword found in trailer | 7 | WARNING: issue-100.pdf (trailer, offset 488): stream keyword found in trailer |
| 8 | WARNING: issue-100.pdf (trailer, offset 418): recovered trailer has no /Root entry | 8 | WARNING: issue-100.pdf (trailer, offset 418): recovered trailer has no /Root entry |
| 9 | WARNING: issue-100.pdf (object 1 0, offset 83): unexpected dictionary close token | 9 | WARNING: issue-100.pdf (object 1 0, offset 83): unexpected dictionary close token |
| 10 | -WARNING: issue-100.pdf (object 1 0, offset 87): expected endobj | ||
| 11 | WARNING: issue-100.pdf (object 5 0, offset 268): unknown token while reading object; treating as null | 10 | WARNING: issue-100.pdf (object 5 0, offset 268): unknown token while reading object; treating as null |
| 12 | WARNING: issue-100.pdf (object 5 0, offset 286): unknown token while reading object; treating as null | 11 | WARNING: issue-100.pdf (object 5 0, offset 286): unknown token while reading object; treating as null |
| 13 | WARNING: issue-100.pdf (object 5 0, offset 289): unknown token while reading object; treating as null | 12 | WARNING: issue-100.pdf (object 5 0, offset 289): unknown token while reading object; treating as null |
| @@ -15,9 +14,7 @@ WARNING: issue-100.pdf (object 5 0, offset 294): unknown token while reading obj | @@ -15,9 +14,7 @@ WARNING: issue-100.pdf (object 5 0, offset 294): unknown token while reading obj | ||
| 15 | WARNING: issue-100.pdf (object 5 0, offset 297): unknown token while reading object; treating as null | 14 | WARNING: issue-100.pdf (object 5 0, offset 297): unknown token while reading object; treating as null |
| 16 | WARNING: issue-100.pdf (object 5 0, offset 304): unknown token while reading object; treating as null | 15 | WARNING: issue-100.pdf (object 5 0, offset 304): unknown token while reading object; treating as null |
| 17 | WARNING: issue-100.pdf (object 5 0, offset 304): too many errors; giving up on reading object | 16 | WARNING: issue-100.pdf (object 5 0, offset 304): too many errors; giving up on reading object |
| 18 | -WARNING: issue-100.pdf (object 5 0, offset 308): expected endobj | ||
| 19 | WARNING: issue-100.pdf (object 8 0, offset 107): invalid character ()) in hexstring | 17 | WARNING: issue-100.pdf (object 8 0, offset 107): invalid character ()) in hexstring |
| 20 | -WARNING: issue-100.pdf (object 8 0, offset 109): expected endobj | ||
| 21 | WARNING: issue-100.pdf (object 9 0, offset 527): unknown token while reading object; treating as string | 18 | WARNING: issue-100.pdf (object 9 0, offset 527): unknown token while reading object; treating as string |
| 22 | WARNING: issue-100.pdf (object 9 0, offset 529): expected endobj | 19 | WARNING: issue-100.pdf (object 9 0, offset 529): expected endobj |
| 23 | WARNING: issue-100.pdf (object 10 0, offset 573): expected endobj | 20 | WARNING: issue-100.pdf (object 10 0, offset 573): expected endobj |
qpdf/qtest/qpdf/issue-101.out
| @@ -8,7 +8,6 @@ WARNING: issue-101.pdf (object 11 0, offset 637): unknown token while reading ob | @@ -8,7 +8,6 @@ WARNING: issue-101.pdf (object 11 0, offset 637): unknown token while reading ob | ||
| 8 | WARNING: issue-101.pdf (object 11 0, offset 639): unknown token while reading object; treating as null | 8 | WARNING: issue-101.pdf (object 11 0, offset 639): unknown token while reading object; treating as null |
| 9 | WARNING: issue-101.pdf (object 11 0, offset 644): unknown token while reading object; treating as null | 9 | WARNING: issue-101.pdf (object 11 0, offset 644): unknown token while reading object; treating as null |
| 10 | WARNING: issue-101.pdf (object 11 0, offset 644): too many errors; giving up on reading object | 10 | WARNING: issue-101.pdf (object 11 0, offset 644): too many errors; giving up on reading object |
| 11 | -WARNING: issue-101.pdf (object 11 0, offset 647): expected endobj | ||
| 12 | WARNING: issue-101.pdf (trailer, offset 4433): recovered trailer has no /Root entry | 11 | WARNING: issue-101.pdf (trailer, offset 4433): recovered trailer has no /Root entry |
| 13 | WARNING: issue-101.pdf (trailer, offset 4183): stream keyword found in trailer | 12 | WARNING: issue-101.pdf (trailer, offset 4183): stream keyword found in trailer |
| 14 | WARNING: issue-101.pdf (trailer, offset 4113): recovered trailer has no /Root entry | 13 | WARNING: issue-101.pdf (trailer, offset 4113): recovered trailer has no /Root entry |
| @@ -40,9 +39,7 @@ WARNING: issue-101.pdf (object 7 0, offset 3866): unknown token while reading ob | @@ -40,9 +39,7 @@ WARNING: issue-101.pdf (object 7 0, offset 3866): unknown token while reading ob | ||
| 40 | WARNING: issue-101.pdf (object 7 0, offset 3873): unknown token while reading object; treating as null | 39 | WARNING: issue-101.pdf (object 7 0, offset 3873): unknown token while reading object; treating as null |
| 41 | WARNING: issue-101.pdf (object 7 0, offset 3879): unknown token while reading object; treating as null | 40 | WARNING: issue-101.pdf (object 7 0, offset 3879): unknown token while reading object; treating as null |
| 42 | WARNING: issue-101.pdf (object 7 0, offset 3879): too many errors; giving up on reading object | 41 | WARNING: issue-101.pdf (object 7 0, offset 3879): too many errors; giving up on reading object |
| 43 | -WARNING: issue-101.pdf (object 7 0, offset 3888): expected endobj | ||
| 44 | WARNING: issue-101.pdf (object 8 0, offset 4067): invalid character ()) in hexstring | 42 | WARNING: issue-101.pdf (object 8 0, offset 4067): invalid character ()) in hexstring |
| 45 | -WARNING: issue-101.pdf (object 8 0, offset 4069): expected endobj | ||
| 46 | WARNING: issue-101.pdf (object 9 0, offset 2832): unknown token while reading object; treating as string | 43 | WARNING: issue-101.pdf (object 9 0, offset 2832): unknown token while reading object; treating as string |
| 47 | WARNING: issue-101.pdf (object 9 0, offset 2834): expected endobj | 44 | WARNING: issue-101.pdf (object 9 0, offset 2834): expected endobj |
| 48 | qpdf: issue-101.pdf: unable to find trailer dictionary while recovering damaged file | 45 | qpdf: issue-101.pdf: unable to find trailer dictionary while recovering damaged file |
qpdf/qtest/qpdf/issue-150.out
| 1 | WARNING: issue-150.pdf: can't find PDF header | 1 | WARNING: issue-150.pdf: can't find PDF header |
| 2 | WARNING: issue-150.pdf (xref stream: object 8 0, offset 56): treating object as null because of error during parsing : overflow/underflow converting 9900000000000000000 to 64-bit integer | 2 | WARNING: issue-150.pdf (xref stream: object 8 0, offset 56): treating object as null because of error during parsing : overflow/underflow converting 9900000000000000000 to 64-bit integer |
| 3 | -WARNING: issue-150.pdf (xref stream: object 8 0, offset 75): expected endobj | ||
| 4 | WARNING: issue-150.pdf: file is damaged | 3 | WARNING: issue-150.pdf: file is damaged |
| 5 | WARNING: issue-150.pdf (offset 4): xref not found | 4 | WARNING: issue-150.pdf (offset 4): xref not found |
| 6 | WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table | 5 | WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table |
| 6 | +WARNING: issue-150.pdf (object 8 0): object has offset 0 | ||
| 7 | qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file | 7 | qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file |
qpdf/qtest/qpdf/issue-1503.out
| @@ -6,11 +6,8 @@ WARNING: issue-1503.pdf (object 31 0, offset 813): unknown token while reading o | @@ -6,11 +6,8 @@ WARNING: issue-1503.pdf (object 31 0, offset 813): unknown token while reading o | ||
| 6 | WARNING: issue-1503.pdf (object 31 0, offset 851): unknown token while reading object; treating as null | 6 | WARNING: issue-1503.pdf (object 31 0, offset 851): unknown token while reading object; treating as null |
| 7 | WARNING: issue-1503.pdf (object 31 0, offset 856): unknown token while reading object; treating as null | 7 | WARNING: issue-1503.pdf (object 31 0, offset 856): unknown token while reading object; treating as null |
| 8 | WARNING: issue-1503.pdf (object 31 0, offset 861): unexpected 'endobj' or 'endstream' while reading object; giving up on reading object | 8 | WARNING: issue-1503.pdf (object 31 0, offset 861): unexpected 'endobj' or 'endstream' while reading object; giving up on reading object |
| 9 | -WARNING: issue-1503.pdf (object 31 0, offset 871): expected endobj | ||
| 10 | WARNING: issue-1503.pdf (object 38 0, offset 1126): unexpected 'endobj' or 'endstream' while reading object; giving up on reading object | 9 | WARNING: issue-1503.pdf (object 38 0, offset 1126): unexpected 'endobj' or 'endstream' while reading object; giving up on reading object |
| 11 | -WARNING: issue-1503.pdf (object 38 0, offset 1133): expected endobj | ||
| 12 | WARNING: issue-1503.pdf (object 40 0, offset 1195): unexpected array close token; giving up on reading object | 10 | WARNING: issue-1503.pdf (object 40 0, offset 1195): unexpected array close token; giving up on reading object |
| 13 | -WARNING: issue-1503.pdf (object 40 0, offset 1198): expected endobj | ||
| 14 | WARNING: issue-1503.pdf (object 41 0, offset 1359): stream dictionary lacks /Length key | 11 | WARNING: issue-1503.pdf (object 41 0, offset 1359): stream dictionary lacks /Length key |
| 15 | WARNING: issue-1503.pdf (object 41 0, offset 1411): attempting to recover stream length | 12 | WARNING: issue-1503.pdf (object 41 0, offset 1411): attempting to recover stream length |
| 16 | WARNING: issue-1503.pdf (object 41 0, offset 1411): recovered stream length: 54 | 13 | WARNING: issue-1503.pdf (object 41 0, offset 1411): recovered stream length: 54 |
| @@ -22,9 +19,7 @@ WARNING: issue-1503.pdf (object 44 0, offset 1814): name with stray # will not w | @@ -22,9 +19,7 @@ WARNING: issue-1503.pdf (object 44 0, offset 1814): name with stray # will not w | ||
| 22 | WARNING: issue-1503.pdf (object 44 0, offset 1821): unknown token while reading object; treating as null | 19 | WARNING: issue-1503.pdf (object 44 0, offset 1821): unknown token while reading object; treating as null |
| 23 | WARNING: issue-1503.pdf (object 44 0, offset 1826): unknown token while reading object; treating as null | 20 | WARNING: issue-1503.pdf (object 44 0, offset 1826): unknown token while reading object; treating as null |
| 24 | WARNING: issue-1503.pdf (object 44 0, offset 1826): too many errors; giving up on reading object | 21 | WARNING: issue-1503.pdf (object 44 0, offset 1826): too many errors; giving up on reading object |
| 25 | -WARNING: issue-1503.pdf (object 44 0, offset 1829): expected endobj | ||
| 26 | WARNING: issue-1503.pdf (object 46 0, offset 1923): unexpected array close token; giving up on reading object | 22 | WARNING: issue-1503.pdf (object 46 0, offset 1923): unexpected array close token; giving up on reading object |
| 27 | -WARNING: issue-1503.pdf (object 46 0, offset 1926): expected endobj | ||
| 28 | WARNING: issue-1503.pdf (object 47 0, offset 2087): stream dictionary lacks /Length key | 23 | WARNING: issue-1503.pdf (object 47 0, offset 2087): stream dictionary lacks /Length key |
| 29 | WARNING: issue-1503.pdf (object 47 0, offset 2139): attempting to recover stream length | 24 | WARNING: issue-1503.pdf (object 47 0, offset 2139): attempting to recover stream length |
| 30 | WARNING: issue-1503.pdf (object 47 0, offset 2139): recovered stream length: 54 | 25 | WARNING: issue-1503.pdf (object 47 0, offset 2139): recovered stream length: 54 |
| @@ -59,8 +54,6 @@ WARNING: issue-1503.pdf (object 151 0, offset 3836): unknown token while reading | @@ -59,8 +54,6 @@ WARNING: issue-1503.pdf (object 151 0, offset 3836): unknown token while reading | ||
| 59 | WARNING: issue-1503.pdf (object 151 0, offset 3958): unknown token while reading object; treating as null | 54 | WARNING: issue-1503.pdf (object 151 0, offset 3958): unknown token while reading object; treating as null |
| 60 | WARNING: issue-1503.pdf (object 152 0, offset 4088): parse error while reading object | 55 | WARNING: issue-1503.pdf (object 152 0, offset 4088): parse error while reading object |
| 61 | WARNING: issue-1503.pdf (object 152 0, offset 4088): unexpected EOF | 56 | WARNING: issue-1503.pdf (object 152 0, offset 4088): unexpected EOF |
| 62 | -WARNING: issue-1503.pdf (object 152 0, offset 4088): expected endobj | ||
| 63 | -WARNING: issue-1503.pdf (object 152 0, offset 4088): EOF after endobj | ||
| 64 | WARNING: issue-1503.pdf (object 155 0, offset 162): unknown token while reading object; treating as null | 57 | WARNING: issue-1503.pdf (object 155 0, offset 162): unknown token while reading object; treating as null |
| 65 | WARNING: issue-1503.pdf (object 155 0, offset 342): unknown token while reading object; treating as null | 58 | WARNING: issue-1503.pdf (object 155 0, offset 342): unknown token while reading object; treating as null |
| 66 | WARNING: issue-1503.pdf (object 155 0, offset 345): unknown token while reading object; treating as null | 59 | WARNING: issue-1503.pdf (object 155 0, offset 345): unknown token while reading object; treating as null |
qpdf/qtest/qpdf/issue-99.out
| @@ -8,15 +8,12 @@ WARNING: issue-99.pdf (object 1 0, offset 815): unknown token while reading obje | @@ -8,15 +8,12 @@ WARNING: issue-99.pdf (object 1 0, offset 815): unknown token while reading obje | ||
| 8 | WARNING: issue-99.pdf (object 1 0, offset 835): unknown token while reading object; treating as null | 8 | WARNING: issue-99.pdf (object 1 0, offset 835): unknown token while reading object; treating as null |
| 9 | WARNING: issue-99.pdf (object 1 0, offset 855): unknown token while reading object; treating as null | 9 | WARNING: issue-99.pdf (object 1 0, offset 855): unknown token while reading object; treating as null |
| 10 | WARNING: issue-99.pdf (object 1 0, offset 855): too many errors; giving up on reading object | 10 | WARNING: issue-99.pdf (object 1 0, offset 855): too many errors; giving up on reading object |
| 11 | -WARNING: issue-99.pdf (object 1 0, offset 858): expected endobj | ||
| 12 | WARNING: issue-99.pdf (object 2 0, offset 64): expected endobj | 11 | WARNING: issue-99.pdf (object 2 0, offset 64): expected endobj |
| 13 | WARNING: issue-99.pdf (object 5 0, offset 2452): unknown token while reading object; treating as string | 12 | WARNING: issue-99.pdf (object 5 0, offset 2452): unknown token while reading object; treating as string |
| 14 | WARNING: issue-99.pdf (object 6 0, offset 2506): unexpected array close token; giving up on reading object | 13 | WARNING: issue-99.pdf (object 6 0, offset 2506): unexpected array close token; giving up on reading object |
| 15 | -WARNING: issue-99.pdf (object 6 0, offset 2507): expected endobj | ||
| 16 | WARNING: issue-99.pdf (object 10 0, offset 3708): expected dictionary keys but found non-name objects; ignoring | 14 | WARNING: issue-99.pdf (object 10 0, offset 3708): expected dictionary keys but found non-name objects; ignoring |
| 17 | WARNING: issue-99.pdf (object 11 0, offset 4485): unknown token while reading object; treating as null | 15 | WARNING: issue-99.pdf (object 11 0, offset 4485): unknown token while reading object; treating as null |
| 18 | WARNING: issue-99.pdf (object 11 0, offset 4497): unexpected array close token; giving up on reading object | 16 | WARNING: issue-99.pdf (object 11 0, offset 4497): unexpected array close token; giving up on reading object |
| 19 | -WARNING: issue-99.pdf (object 11 0, offset 4499): expected endobj | ||
| 20 | WARNING: issue-99.pdf: unable to find trailer dictionary while recovering damaged file | 17 | WARNING: issue-99.pdf: unable to find trailer dictionary while recovering damaged file |
| 21 | WARNING: object 1 0: Pages tree includes non-dictionary object; ignoring | 18 | WARNING: object 1 0: Pages tree includes non-dictionary object; ignoring |
| 22 | WARNING: object 1 0: operation for dictionary attempted on object of type null: ignoring key replacement request | 19 | WARNING: object 1 0: operation for dictionary attempted on object of type null: ignoring key replacement request |
qpdf/qtest/qpdf/parse-object.out
| @@ -6,6 +6,4 @@ WARNING: parsed object: treating unexpected brace token as null | @@ -6,6 +6,4 @@ WARNING: parsed object: treating unexpected brace token as null | ||
| 6 | WARNING: parsed object: treating unexpected brace token as null | 6 | WARNING: parsed object: treating unexpected brace token as null |
| 7 | WARNING: parsed object: unexpected dictionary close token | 7 | WARNING: parsed object: unexpected dictionary close token |
| 8 | WARNING: bad-parse.qdf (object 7 0, offset 1121): unexpected EOF | 8 | WARNING: bad-parse.qdf (object 7 0, offset 1121): unexpected EOF |
| 9 | -WARNING: bad-parse.qdf (object 7 0, offset 1121): expected endobj | ||
| 10 | -WARNING: bad-parse.qdf (object 7 0, offset 1121): EOF after endobj | ||
| 11 | test 31 done | 9 | test 31 done |