Commit 628cf87315e36d3646a28aa45020326040ff4aed

Authored by m-holger
Committed by GitHub
2 parents b0ee84e5 9abd73ff

Merge pull request #1577 from m-holger/global

Refactor QPDFParser
libqpdf/CMakeLists.txt
... ... @@ -100,6 +100,7 @@ set(libqpdf_SOURCES
100 100 ResourceFinder.cc
101 101 SecureRandomDataProvider.cc
102 102 SF_FlateLzwDecode.cc
  103 + global.cc
103 104 qpdf-c.cc
104 105 qpdfjob-c.cc
105 106 qpdflogger-c.cc)
... ...
libqpdf/QPDFParser.cc
... ... @@ -15,6 +15,8 @@ using namespace qpdf;
15 15  
16 16 using ObjectPtr = std::shared_ptr<QPDFObject>;
17 17  
  18 +static uint32_t const& max_nesting{global::Limits::objects_max_nesting()};
  19 +
18 20 // The ParseGuard class allows QPDFParser to detect re-entrant parsing. It also provides
19 21 // special access to allow the parser to create unresolved objects and dangling references.
20 22 class QPDF::Doc::ParseGuard
... ... @@ -49,16 +51,18 @@ QPDFObjectHandle
49 51 QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context)
50 52 {
51 53 qpdf::Tokenizer tokenizer;
52   - bool empty = false;
53   - return QPDFParser(
54   - input,
55   - make_description(input.getName(), object_description),
56   - object_description,
57   - tokenizer,
58   - nullptr,
59   - context,
60   - false)
61   - .parse(empty, false);
  54 + if (auto result = QPDFParser(
  55 + input,
  56 + make_description(input.getName(), object_description),
  57 + object_description,
  58 + tokenizer,
  59 + nullptr,
  60 + context,
  61 + false)
  62 + .parse()) {
  63 + return result;
  64 + }
  65 + return {QPDFObject::create<QPDF_Null>()};
62 66 }
63 67  
64 68 QPDFObjectHandle
... ... @@ -68,19 +72,24 @@ QPDFParser::parse_content(
68 72 qpdf::Tokenizer& tokenizer,
69 73 QPDF* context)
70 74 {
71   - bool empty = false;
72   - return QPDFParser(
73   - input,
74   - std::move(sp_description),
75   - "content",
76   - tokenizer,
77   - nullptr,
78   - context,
79   - true,
80   - 0,
81   - 0,
82   - context && context->doc().reconstructed_xref())
83   - .parse(empty, true);
  75 + static const std::string content("content"); // GCC12 - make constexpr
  76 + auto p = QPDFParser(
  77 + input,
  78 + std::move(sp_description),
  79 + content,
  80 + tokenizer,
  81 + nullptr,
  82 + context,
  83 + true,
  84 + 0,
  85 + 0,
  86 + context && context->doc().reconstructed_xref());
  87 + auto result = p.parse(true);
  88 + if (result || p.empty_) {
  89 + // In content stream mode, leave object uninitialized to indicate EOF
  90 + return result;
  91 + }
  92 + return {QPDFObject::create<QPDF_Null>()};
84 93 }
85 94  
86 95 QPDFObjectHandle
... ... @@ -92,18 +101,25 @@ QPDFParser::parse(
92 101 QPDFObjectHandle::StringDecrypter* decrypter,
93 102 QPDF* context)
94 103 {
95   - return QPDFParser(
96   - input,
97   - make_description(input.getName(), object_description),
98   - object_description,
99   - *tokenizer.m,
100   - decrypter,
101   - context,
102   - false)
103   - .parse(empty, false);
  104 + // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
  105 + // only user of the 'empty' member. When removing this overload also remove 'empty'.
  106 + auto p = QPDFParser(
  107 + input,
  108 + make_description(input.getName(), object_description),
  109 + object_description,
  110 + *tokenizer.m,
  111 + decrypter,
  112 + context,
  113 + false);
  114 + auto result = p.parse();
  115 + empty = p.empty_;
  116 + if (result) {
  117 + return result;
  118 + }
  119 + return {QPDFObject::create<QPDF_Null>()};
104 120 }
105 121  
106   -std::pair<QPDFObjectHandle, bool>
  122 +QPDFObjectHandle
107 123 QPDFParser::parse(
108 124 InputSource& input,
109 125 std::string const& object_description,
... ... @@ -112,54 +128,65 @@ QPDFParser::parse(
112 128 QPDF& context,
113 129 bool sanity_checks)
114 130 {
115   - bool empty{false};
116   - auto result = QPDFParser(
117   - input,
118   - make_description(input.getName(), object_description),
119   - object_description,
120   - tokenizer,
121   - decrypter,
122   - &context,
123   - true,
124   - 0,
125   - 0,
126   - sanity_checks)
127   - .parse(empty, false);
128   - return {result, empty};
  131 + return QPDFParser(
  132 + input,
  133 + make_description(input.getName(), object_description),
  134 + object_description,
  135 + tokenizer,
  136 + decrypter,
  137 + &context,
  138 + true,
  139 + 0,
  140 + 0,
  141 + sanity_checks)
  142 + .parse();
129 143 }
130 144  
131   -std::pair<QPDFObjectHandle, bool>
  145 +QPDFObjectHandle
132 146 QPDFParser::parse(
133 147 is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
134 148 {
135   - bool empty{false};
136   - auto result = QPDFParser(
137   - input,
138   - std::make_shared<QPDFObject::Description>(
139   - QPDFObject::ObjStreamDescr(stream_id, obj_id)),
140   - "",
141   - tokenizer,
142   - nullptr,
143   - &context,
144   - true,
145   - stream_id,
146   - obj_id)
147   - .parse(empty, false);
148   - return {result, empty};
  149 + return QPDFParser(
  150 + input,
  151 + std::make_shared<QPDFObject::Description>(
  152 + QPDFObject::ObjStreamDescr(stream_id, obj_id)),
  153 + "",
  154 + tokenizer,
  155 + nullptr,
  156 + &context,
  157 + true,
  158 + stream_id,
  159 + obj_id)
  160 + .parse();
  161 +}
  162 +
  163 +QPDFObjectHandle
  164 +QPDFParser::parse(bool content_stream)
  165 +{
  166 + try {
  167 + return parse_first(content_stream);
  168 + } catch (Error&) {
  169 + return {};
  170 + } catch (QPDFExc& e) {
  171 + throw e;
  172 + } catch (std::logic_error& e) {
  173 + throw e;
  174 + } catch (std::exception& e) {
  175 + warn("treating object as null because of error during parsing : "s + e.what());
  176 + return {};
  177 + }
149 178 }
150 179  
151 180 QPDFObjectHandle
152   -QPDFParser::parse(bool& empty, bool content_stream)
  181 +QPDFParser::parse_first(bool content_stream)
153 182 {
154 183 // This method must take care not to resolve any objects. Don't check the type of any object
155 184 // without first ensuring that it is a direct object. Otherwise, doing so may have the side
156 185 // effect of reading the object and changing the file pointer. If you do this, it will cause a
157 186 // logic error to be thrown from QPDF::inParse().
158 187  
159   - ParseGuard pg(context);
160   - empty = false;
  188 + QPDF::Doc::ParseGuard pg(context);
161 189 start = input.tell();
162   -
163 190 if (!tokenizer.nextToken(input, object_description)) {
164 191 warn(tokenizer.getErrorMessage());
165 192 }
... ... @@ -168,31 +195,27 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
168 195 case QPDFTokenizer::tt_eof:
169 196 if (content_stream) {
170 197 // In content stream mode, leave object uninitialized to indicate EOF
  198 + empty_ = true;
171 199 return {};
172 200 }
173   - QTC::TC("qpdf", "QPDFParser eof in parse");
174 201 warn("unexpected EOF");
175   - return {QPDFObject::create<QPDF_Null>()};
  202 + return {};
176 203  
177 204 case QPDFTokenizer::tt_bad:
178   - QTC::TC("qpdf", "QPDFParser bad token in parse");
179   - return {QPDFObject::create<QPDF_Null>()};
  205 + return {};
180 206  
181 207 case QPDFTokenizer::tt_brace_open:
182 208 case QPDFTokenizer::tt_brace_close:
183   - QTC::TC("qpdf", "QPDFParser bad brace");
184 209 warn("treating unexpected brace token as null");
185   - return {QPDFObject::create<QPDF_Null>()};
  210 + return {};
186 211  
187 212 case QPDFTokenizer::tt_array_close:
188   - QTC::TC("qpdf", "QPDFParser bad array close");
189 213 warn("treating unexpected array close token as null");
190   - return {QPDFObject::create<QPDF_Null>()};
  214 + return {};
191 215  
192 216 case QPDFTokenizer::tt_dict_close:
193   - QTC::TC("qpdf", "QPDFParser bad dictionary close");
194 217 warn("unexpected dictionary close token");
195   - return {QPDFObject::create<QPDF_Null>()};
  218 + return {};
196 219  
197 220 case QPDFTokenizer::tt_array_open:
198 221 case QPDFTokenizer::tt_dict_open:
... ... @@ -224,13 +247,17 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
224 247 if (content_stream) {
225 248 return withDescription<QPDF_Operator>(value);
226 249 } else if (value == "endobj") {
227   - // We just saw endobj without having read anything. Treat this as a null and do
228   - // not move the input source's offset.
  250 + // We just saw endobj without having read anything. Nothing in the PDF spec appears
  251 + // to allow empty objects, but they have been encountered in actual PDF files and
  252 + // Adobe Reader appears to ignore them. Treat this as a null and do not move the
  253 + // input source's offset.
  254 + empty_ = true;
229 255 input.seek(input.getLastOffset(), SEEK_SET);
230   - empty = true;
231   - return {QPDFObject::create<QPDF_Null>()};
  256 + if (!content_stream) {
  257 + warn("empty object treated as null");
  258 + }
  259 + return {};
232 260 } else {
233   - QTC::TC("qpdf", "QPDFParser treat word as string");
234 261 warn("unknown token while reading object; treating as string");
235 262 return withDescription<QPDF_String>(value);
236 263 }
... ... @@ -247,7 +274,7 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
247 274  
248 275 default:
249 276 warn("treating unknown token type as null while reading object");
250   - return {QPDFObject::create<QPDF_Null>()};
  277 + return {};
251 278 }
252 279 }
253 280  
... ... @@ -283,19 +310,19 @@ QPDFParser::parseRemainder(bool content_stream)
283 310 } else if (
284 311 int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
285 312 tokenizer.getValue() == "R") {
286   - if (context == nullptr) {
287   - QTC::TC("qpdf", "QPDFParser indirect without context");
  313 + if (!context) {
288 314 throw std::logic_error(
289   - "QPDFParser::parse called without context on an object "
290   - "with indirect references");
  315 + "QPDFParser::parse called without context on an object with indirect "
  316 + "references");
291 317 }
292 318 auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]);
293 319 auto gen = QIntC::to_int(int_buffer[(int_count) % 2]);
294 320 if (!(id < 1 || gen < 0 || gen >= 65535)) {
295 321 add(ParseGuard::getObject(context, id, gen, parse_pdf));
296 322 } else {
297   - QTC::TC("qpdf", "QPDFParser invalid objgen");
298   - addNull();
  323 + add_bad_null(
  324 + "treating bad indirect reference (" + std::to_string(id) + " " +
  325 + std::to_string(gen) + " R) as null");
299 326 }
300 327 int_count = 0;
301 328 continue;
... ... @@ -317,34 +344,20 @@ QPDFParser::parseRemainder(bool content_stream)
317 344 // In content stream mode, leave object uninitialized to indicate EOF
318 345 return {};
319 346 }
320   - QTC::TC("qpdf", "QPDFParser eof in parseRemainder");
321 347 warn("unexpected EOF");
322   - return {QPDFObject::create<QPDF_Null>()};
  348 + return {};
323 349  
324 350 case QPDFTokenizer::tt_bad:
325   - QTC::TC("qpdf", "QPDFParser bad token in parseRemainder");
326   - if (tooManyBadTokens()) {
327   - return {QPDFObject::create<QPDF_Null>()};
328   - }
  351 + check_too_many_bad_tokens();
329 352 addNull();
330 353 continue;
331 354  
332 355 case QPDFTokenizer::tt_brace_open:
333 356 case QPDFTokenizer::tt_brace_close:
334   - QTC::TC("qpdf", "QPDFParser bad brace in parseRemainder");
335   - warn("treating unexpected brace token as null");
336   - if (tooManyBadTokens()) {
337   - return {QPDFObject::create<QPDF_Null>()};
338   - }
339   - addNull();
  357 + add_bad_null("treating unexpected brace token as null");
340 358 continue;
341 359  
342 360 case QPDFTokenizer::tt_array_close:
343   - if ((bad_count || sanity_checks) && !max_bad_count) {
344   - // Trigger warning.
345   - (void)tooManyBadTokens();
346   - return {QPDFObject::create<QPDF_Null>()};
347   - }
348 361 if (frame->state == st_array) {
349 362 auto object = frame->null_count > 100
350 363 ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true)
... ... @@ -361,33 +374,22 @@ QPDFParser::parseRemainder(bool content_stream)
361 374 frame = &stack.back();
362 375 add(std::move(object));
363 376 } else {
364   - QTC::TC("qpdf", "QPDFParser bad array close in parseRemainder");
365 377 if (sanity_checks) {
366 378 // During sanity checks, assume nesting of containers is corrupt and object is
367 379 // unusable.
368 380 warn("unexpected array close token; giving up on reading object");
369   - return {QPDFObject::create<QPDF_Null>()};
  381 + return {};
370 382 }
371   - warn("treating unexpected array close token as null");
372   - if (tooManyBadTokens()) {
373   - return {QPDFObject::create<QPDF_Null>()};
374   - }
375   - addNull();
  383 + add_bad_null("treating unexpected array close token as null");
376 384 }
377 385 continue;
378 386  
379 387 case QPDFTokenizer::tt_dict_close:
380   - if ((bad_count || sanity_checks) && !max_bad_count) {
381   - // Trigger warning.
382   - (void)tooManyBadTokens();
383   - return {QPDFObject::create<QPDF_Null>()};
384   - }
385 388 if (frame->state <= st_dictionary_value) {
386 389 // Attempt to recover more or less gracefully from invalid dictionaries.
387 390 auto& dict = frame->dict;
388 391  
389 392 if (frame->state == st_dictionary_value) {
390   - QTC::TC("qpdf", "QPDFParser no val for last key");
391 393 warn(
392 394 frame->offset,
393 395 "dictionary ended prematurely; using null as value for last key");
... ... @@ -426,22 +428,17 @@ QPDFParser::parseRemainder(bool content_stream)
426 428 // During sanity checks, assume nesting of containers is corrupt and object is
427 429 // unusable.
428 430 warn("unexpected dictionary close token; giving up on reading object");
429   - return {QPDFObject::create<QPDF_Null>()};
  431 + return {};
430 432 }
431   - warn("unexpected dictionary close token");
432   - if (tooManyBadTokens()) {
433   - return {QPDFObject::create<QPDF_Null>()};
434   - }
435   - addNull();
  433 + add_bad_null("unexpected dictionary close token");
436 434 }
437 435 continue;
438 436  
439 437 case QPDFTokenizer::tt_array_open:
440 438 case QPDFTokenizer::tt_dict_open:
441   - if (stack.size() > 499) {
442   - QTC::TC("qpdf", "QPDFParser too deep");
  439 + if (stack.size() > max_nesting) {
443 440 warn("ignoring excessively deeply nested data structure");
444   - return {QPDFObject::create<QPDF_Null>()};
  441 + return {};
445 442 } else {
446 443 b_contents = false;
447 444 stack.emplace_back(
... ... @@ -499,22 +496,15 @@ QPDFParser::parseRemainder(bool content_stream)
499 496 warn(
500 497 "unexpected 'endobj' or 'endstream' while reading object; giving up on "
501 498 "reading object");
502   - return {QPDFObject::create<QPDF_Null>()};
  499 + return {};
503 500 }
504 501  
505   - warn("unknown token while reading object; treating as null");
506   - if (tooManyBadTokens()) {
507   - return {QPDFObject::create<QPDF_Null>()};
508   - }
509   - addNull();
  502 + add_bad_null("unknown token while reading object; treating as null");
510 503 continue;
511 504 }
512 505  
513   - QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
514 506 warn("unknown token while reading object; treating as string");
515   - if (tooManyBadTokens()) {
516   - return {QPDFObject::create<QPDF_Null>()};
517   - }
  507 + check_too_many_bad_tokens();
518 508 addScalar<QPDF_String>(tokenizer.getValue());
519 509  
520 510 continue;
... ... @@ -538,11 +528,7 @@ QPDFParser::parseRemainder(bool content_stream)
538 528 continue;
539 529  
540 530 default:
541   - warn("treating unknown token type as null while reading object");
542   - if (tooManyBadTokens()) {
543   - return {QPDFObject::create<QPDF_Null>()};
544   - }
545   - addNull();
  531 + add_bad_null("treating unknown token type as null while reading object");
546 532 }
547 533 }
548 534 }
... ... @@ -581,6 +567,14 @@ QPDFParser::addNull()
581 567 }
582 568  
583 569 void
  570 +QPDFParser::add_bad_null(std::string const& msg)
  571 +{
  572 + warn(msg);
  573 + check_too_many_bad_tokens();
  574 + addNull();
  575 +}
  576 +
  577 +void
584 578 QPDFParser::addInt(int count)
585 579 {
586 580 auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]);
... ... @@ -592,12 +586,12 @@ template &lt;typename T, typename... Args&gt;
592 586 void
593 587 QPDFParser::addScalar(Args&&... args)
594 588 {
595   - if ((bad_count || sanity_checks) &&
596   - (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
  589 + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
  590 + if (frame->olist.size() > limit || frame->dict.size() > limit) {
597 591 // Stop adding scalars. We are going to abort when the close token or a bad token is
598 592 // encountered.
599 593 max_bad_count = 0;
600   - return;
  594 + check_too_many_bad_tokens(); // always throws Error()
601 595 }
602 596 auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
603 597 obj->setDescription(context, description, input.getLastOffset());
... ... @@ -647,34 +641,34 @@ QPDFParser::fixMissingKeys()
647 641 }
648 642 }
649 643  
650   -bool
651   -QPDFParser::tooManyBadTokens()
  644 +void
  645 +QPDFParser::check_too_many_bad_tokens()
652 646 {
653   - if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) {
  647 + auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
  648 + if (frame->olist.size() > limit || frame->dict.size() > limit) {
654 649 if (bad_count) {
655 650 warn(
656   - "encountered errors while parsing an array or dictionary with more than 5000 "
657   - "elements; giving up on reading object");
658   - return true;
  651 + "encountered errors while parsing an array or dictionary with more than " +
  652 + std::to_string(limit) + " elements; giving up on reading object");
  653 + throw Error();
659 654 }
660 655 warn(
661   - "encountered an array or dictionary with more than 5000 elements during xref recovery; "
662   - "giving up on reading object");
  656 + "encountered an array or dictionary with more than " + std::to_string(limit) +
  657 + " elements during xref recovery; giving up on reading object");
663 658 }
664 659 if (max_bad_count && --max_bad_count > 0 && good_count > 4) {
665 660 good_count = 0;
666 661 bad_count = 1;
667   - return false;
  662 + return;
668 663 }
669 664 if (++bad_count > 5 ||
670 665 (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) {
671 666 // Give up after 5 errors in close proximity or if the number of missing dictionary keys
672 667 // exceeds the remaining number of allowable total errors.
673 668 warn("too many errors; giving up on reading object");
674   - return true;
  669 + throw Error();
675 670 }
676 671 good_count = 0;
677   - return false;
678 672 }
679 673  
680 674 void
... ... @@ -693,7 +687,6 @@ QPDFParser::warn(QPDFExc const&amp; e) const
693 687 void
694 688 QPDFParser::warnDuplicateKey()
695 689 {
696   - QTC::TC("qpdf", "QPDFParser duplicate dict key");
697 690 warn(
698 691 frame->offset,
699 692 "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
... ...
libqpdf/QPDF_objects.cc
... ... @@ -1233,13 +1233,9 @@ QPDFObjectHandle
1233 1233 Objects::readTrailer()
1234 1234 {
1235 1235 qpdf_offset_t offset = m->file->tell();
1236   - auto [object, empty] =
  1236 + auto object =
1237 1237 QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref);
1238   - if (empty) {
1239   - // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1240   - // actual PDF files and Adobe Reader appears to ignore them.
1241   - warn(damagedPDF("trailer", "empty object treated as null"));
1242   - } else if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) {
  1238 + if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) {
1243 1239 warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer"));
1244 1240 }
1245 1241 // Override last_offset so that it points to the beginning of the object we just read
... ... @@ -1255,19 +1251,15 @@ Objects::readObject(std::string const&amp; description, QPDFObjGen og)
1255 1251  
1256 1252 StringDecrypter decrypter{&qpdf, og};
1257 1253 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1258   - auto [object, empty] = QPDFParser::parse(
  1254 + auto object = QPDFParser::parse(
1259 1255 *m->file,
1260 1256 m->last_object_description,
1261 1257 m->tokenizer,
1262 1258 decrypter_ptr,
1263 1259 qpdf,
1264 1260 m->reconstructed_xref || m->in_read_xref_stream);
1265   - ;
1266   - if (empty) {
1267   - // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1268   - // actual PDF files and Adobe Reader appears to ignore them.
1269   - warn(damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null"));
1270   - return object;
  1261 + if (!object) {
  1262 + return {};
1271 1263 }
1272 1264 auto token = readToken(*m->file);
1273 1265 if (object.isDictionary() && token.isWord("stream")) {
... ... @@ -1366,24 +1358,6 @@ Objects::validateStreamLineEnd(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_off
1366 1358 }
1367 1359 }
1368 1360  
1369   -QPDFObjectHandle
1370   -Objects::readObjectInStream(is::OffsetBuffer& input, int stream_id, int obj_id)
1371   -{
1372   - auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, qpdf);
1373   - if (empty) {
1374   - // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1375   - // actual PDF files and Adobe Reader appears to ignore them.
1376   - warn(QPDFExc(
1377   - qpdf_e_damaged_pdf,
1378   - m->file->getName() + " object stream " + std::to_string(stream_id),
1379   - +"object " + std::to_string(obj_id) + " 0, offset " +
1380   - std::to_string(input.getLastOffset()),
1381   - 0,
1382   - "empty object treated as null"));
1383   - }
1384   - return object;
1385   -}
1386   -
1387 1361 bool
1388 1362 Objects ::findEndstream()
1389 1363 {
... ... @@ -1536,25 +1510,25 @@ Objects::readObjectAtOffset(
1536 1510 return;
1537 1511 }
1538 1512  
1539   - QPDFObjectHandle oh = readObject(description, og);
  1513 + if (auto oh = readObject(description, og)) {
  1514 + // Determine the end offset of this object before and after white space. We use these
  1515 + // numbers to validate linearization hint tables. Offsets and lengths of objects may imply
  1516 + // the end of an object to be anywhere between these values.
  1517 + qpdf_offset_t end_before_space = m->file->tell();
1540 1518  
1541   - // Determine the end offset of this object before and after white space. We use these
1542   - // numbers to validate linearization hint tables. Offsets and lengths of objects may imply
1543   - // the end of an object to be anywhere between these values.
1544   - qpdf_offset_t end_before_space = m->file->tell();
1545   -
1546   - // skip over spaces
1547   - while (true) {
1548   - char ch;
1549   - if (!m->file->read(&ch, 1)) {
1550   - throw damagedPDF(m->file->tell(), "EOF after endobj");
1551   - }
1552   - if (!isspace(static_cast<unsigned char>(ch))) {
1553   - m->file->seek(-1, SEEK_CUR);
1554   - break;
  1519 + // skip over spaces
  1520 + while (true) {
  1521 + char ch;
  1522 + if (!m->file->read(&ch, 1)) {
  1523 + throw damagedPDF(m->file->tell(), "EOF after endobj");
  1524 + }
  1525 + if (!isspace(static_cast<unsigned char>(ch))) {
  1526 + m->file->seek(-1, SEEK_CUR);
  1527 + break;
  1528 + }
1555 1529 }
  1530 + m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell());
1556 1531 }
1557   - m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell());
1558 1532 }
1559 1533  
1560 1534 QPDFObjectHandle
... ... @@ -1564,7 +1538,7 @@ Objects::readObjectAtOffset(
1564 1538 auto og = read_object_start(offset);
1565 1539 auto oh = readObject(description, og);
1566 1540  
1567   - if (!m->objects.isUnresolved(og)) {
  1541 + if (!oh || !m->objects.isUnresolved(og)) {
1568 1542 return oh;
1569 1543 }
1570 1544  
... ... @@ -1804,8 +1778,9 @@ Objects::resolveObjectsInStream(int obj_stream_number)
1804 1778 if (entry != m->xref_table.end() && entry->second.getType() == 2 &&
1805 1779 entry->second.getObjStreamNumber() == obj_stream_number) {
1806 1780 is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset);
1807   - auto oh = readObjectInStream(in, obj_stream_number, obj_id);
1808   - updateCache(og, oh.obj_sp(), end_before_space, end_after_space);
  1781 + if (auto oh = QPDFParser::parse(in, obj_stream_number, obj_id, m->tokenizer, qpdf)) {
  1782 + updateCache(og, oh.obj_sp(), end_before_space, end_after_space);
  1783 + }
1809 1784 } else {
1810 1785 QTC::TC("qpdf", "QPDF not caching overridden objstm object");
1811 1786 }
... ...
libqpdf/global.cc 0 โ†’ 100644
  1 +#include <qpdf/global_private.hh>
  2 +
  3 +using namespace qpdf;
  4 +
  5 +global::Limits global::Limits::l;
... ...
libqpdf/qpdf/QPDFObject_private.hh
... ... @@ -5,11 +5,12 @@
5 5 // include/qpdf/QPDFObject.hh. See comments there for an explanation.
6 6  
7 7 #include <qpdf/Constants.h>
  8 +#include <qpdf/Types.h>
  9 +
8 10 #include <qpdf/JSON.hh>
9 11 #include <qpdf/JSON_writer.hh>
10 12 #include <qpdf/QPDF.hh>
11 13 #include <qpdf/QPDFObjGen.hh>
12   -#include <qpdf/Types.h>
13 14  
14 15 #include <map>
15 16 #include <memory>
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -5,13 +5,24 @@
5 5 #include <qpdf/QPDFObjectHandle_private.hh>
6 6 #include <qpdf/QPDFObject_private.hh>
7 7 #include <qpdf/QPDFTokenizer_private.hh>
  8 +#include <qpdf/global_private.hh>
8 9  
9 10 #include <memory>
10 11 #include <string>
11 12  
  13 +using namespace qpdf;
  14 +using namespace qpdf::global;
  15 +
12 16 class QPDFParser
13 17 {
14 18 public:
  19 + class Error: public std::exception
  20 + {
  21 + public:
  22 + Error() = default;
  23 + virtual ~Error() noexcept = default;
  24 + };
  25 +
15 26 static QPDFObjectHandle
16 27 parse(InputSource& input, std::string const& object_description, QPDF* context);
17 28  
... ... @@ -30,8 +41,8 @@ class QPDFParser
30 41 QPDFObjectHandle::StringDecrypter* decrypter,
31 42 QPDF* context);
32 43  
33   - // For use by QPDF. Return parsed object and whether it is empty.
34   - static std::pair<QPDFObjectHandle, bool> parse(
  44 + // For use by QPDF.
  45 + static QPDFObjectHandle parse(
35 46 InputSource& input,
36 47 std::string const& object_description,
37 48 qpdf::Tokenizer& tokenizer,
... ... @@ -39,7 +50,7 @@ class QPDFParser
39 50 QPDF& context,
40 51 bool sanity_checks);
41 52  
42   - static std::pair<QPDFObjectHandle, bool> parse(
  53 + static QPDFObjectHandle parse(
43 54 qpdf::is::OffsetBuffer& input,
44 55 int stream_id,
45 56 int obj_id,
... ... @@ -101,14 +112,16 @@ class QPDFParser
101 112 int null_count{0};
102 113 };
103 114  
104   - QPDFObjectHandle parse(bool& empty, bool content_stream);
  115 + QPDFObjectHandle parse(bool content_stream = false);
  116 + QPDFObjectHandle parse_first(bool content_stream);
105 117 QPDFObjectHandle parseRemainder(bool content_stream);
106 118 void add(std::shared_ptr<QPDFObject>&& obj);
107 119 void addNull();
  120 + void add_bad_null(std::string const& msg);
108 121 void addInt(int count);
109 122 template <typename T, typename... Args>
110 123 void addScalar(Args&&... args);
111   - bool tooManyBadTokens();
  124 + void check_too_many_bad_tokens();
112 125 void warnDuplicateKey();
113 126 void fixMissingKeys();
114 127 void warn(qpdf_offset_t offset, std::string const& msg) const;
... ... @@ -136,7 +149,7 @@ class QPDFParser
136 149 // it only gets incremented or reset when a bad token is encountered.
137 150 int bad_count{0};
138 151 // Number of bad tokens (remaining) before giving up.
139   - int max_bad_count{15};
  152 + uint32_t max_bad_count{Limits::objects_max_errors()};
140 153 // Number of good tokens since last bad token. Irrelevant if bad_count == 0.
141 154 int good_count{0};
142 155 // Start offset including any leading whitespace.
... ... @@ -145,6 +158,7 @@ class QPDFParser
145 158 int int_count{0};
146 159 long long int_buffer[2]{0, 0};
147 160 qpdf_offset_t last_offset_buffer[2]{0, 0};
  161 + bool empty_{false};
148 162 };
149 163  
150 164 #endif // QPDFPARSER_HH
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -1039,7 +1039,6 @@ class QPDF::Doc::Objects: Common
1039 1039 QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
1040 1040 void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
1041 1041 void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
1042   - QPDFObjectHandle readObjectInStream(qpdf::is::OffsetBuffer& input, int stream_id, int obj_id);
1043 1042 size_t recoverStreamLength(
1044 1043 std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
1045 1044  
... ...
libqpdf/qpdf/global_private.hh 0 โ†’ 100644
  1 +
  2 +#ifndef GLOBAL_PRIVATE_HH
  3 +#define GLOBAL_PRIVATE_HH
  4 +
  5 +#include <qpdf/Constants.h>
  6 +
  7 +#include <cstdint>
  8 +#include <limits>
  9 +
  10 +namespace qpdf
  11 +{
  12 + namespace global
  13 + {
  14 + class Limits
  15 + {
  16 + public:
  17 + Limits(Limits const&) = delete;
  18 + Limits(Limits&&) = delete;
  19 + Limits& operator=(Limits const&) = delete;
  20 + Limits& operator=(Limits&&) = delete;
  21 +
  22 + static uint32_t const&
  23 + objects_max_nesting()
  24 + {
  25 + return l.objects_max_nesting_;
  26 + }
  27 +
  28 + static uint32_t const&
  29 + objects_max_errors()
  30 + {
  31 + return l.objects_max_errors_;
  32 + }
  33 +
  34 + static uint32_t const&
  35 + objects_max_container_size(bool damaged)
  36 + {
  37 + return damaged ? l.objects_max_container_size_damaged_
  38 + : l.objects_max_container_size_;
  39 + }
  40 +
  41 + private:
  42 + Limits() = default;
  43 + ~Limits() = default;
  44 +
  45 + static Limits l;
  46 +
  47 + uint32_t objects_max_nesting_{499};
  48 + uint32_t objects_max_errors_{15};
  49 + uint32_t objects_max_container_size_{std::numeric_limits<uint32_t>::max()};
  50 + uint32_t objects_max_container_size_damaged_{5'000};
  51 + };
  52 +
  53 + } // namespace global
  54 +
  55 +} // namespace qpdf
  56 +
  57 +#endif // GLOBAL_PRIVATE_HH
... ...
qpdf/qpdf.testcov
... ... @@ -27,11 +27,6 @@ main QTest stream 0
27 27 QPDF lin write nshared_total > nshared_first_page 1
28 28 QPDFWriter encrypted hint stream 0
29 29 QPDF xref gen > 0 1
30   -QPDFParser bad brace 0
31   -QPDFParser bad brace in parseRemainder 0
32   -QPDFParser bad array close 0
33   -QPDFParser bad array close in parseRemainder 0
34   -QPDFParser bad dictionary close 0
35 30 QPDFTokenizer bad ) 0
36 31 QPDFTokenizer bad > 0
37 32 QPDFTokenizer bad hexstring character 0
... ... @@ -123,7 +118,6 @@ QPDF_Stream provider length not provided 0
123 118 QPDF_Stream unknown stream length 0
124 119 QPDF replaceReserved 0
125 120 QPDFWriter copy use_aes 1
126   -QPDFParser indirect without context 0
127 121 QPDFObjectHandle trailing data in parse 0
128 122 QPDFTokenizer EOF reading token 0
129 123 QPDFTokenizer EOF reading appendable token 0
... ... @@ -145,11 +139,7 @@ QPDFJob pages range omitted in middle 0
145 139 QPDFWriter standard deterministic ID 1
146 140 QPDFWriter linearized deterministic ID 1
147 141 qpdf-c called qpdf_set_deterministic_ID 0
148   -QPDFParser invalid objgen 0
149   -QPDFParser treat word as string 0
150   -QPDFParser treat word as string in parseRemainder 0
151 142 QPDFParser found fake 1
152   -QPDFParser no val for last key 0
153 143 QPDFObjectHandle errors in parsecontent 0
154 144 QPDFJob split-pages %d 0
155 145 QPDFJob split-pages .pdf 0
... ... @@ -168,10 +158,6 @@ Pl_QPDFTokenizer found ID 0
168 158 QPDFObjectHandle coalesce called on stream 0
169 159 QPDFObjectHandle coalesce provide stream data 0
170 160 QPDF_Stream bad token at end during normalize 0
171   -QPDFParser bad token in parse 0
172   -QPDFParser bad token in parseRemainder 0
173   -QPDFParser eof in parse 0
174   -QPDFParser eof in parseRemainder 0
175 161 QPDFObjectHandle boolean returning false 0
176 162 QPDFObjectHandle real returning 0.0 0
177 163 QPDFObjectHandle operator returning fake value 0
... ... @@ -189,7 +175,6 @@ QPDFObjectHandle dictionary ignoring replaceKey 0
189 175 QPDFObjectHandle numeric non-numeric 0
190 176 QPDFObjectHandle erase array bounds 0
191 177 qpdf-c called qpdf_check_pdf 0
192   -QPDFParser too deep 0
193 178 QPDFFormFieldObjectHelper TU present 0
194 179 QPDFFormFieldObjectHelper TM present 0
195 180 QPDFFormFieldObjectHelper TU absent 0
... ... @@ -252,7 +237,6 @@ QPDFJob image optimize bits per component 0
252 237 QPDF eof skipping spaces before xref 1
253 238 QPDF_encryption user matches owner V < 5 0
254 239 QPDF_encryption same password 1
255   -QPDFParser duplicate dict key 0
256 240 QPDFWriter no encryption sig contents 0
257 241 QPDFPageObjectHelper colorspace lookup 0
258 242 QPDFPageObjectHelper filter form xobject 0
... ...
qpdf/qtest/qpdf/issue-100.out
... ... @@ -7,7 +7,6 @@ WARNING: issue-100.pdf (trailer, offset 950): recovered trailer has no /Root ent
7 7 WARNING: issue-100.pdf (trailer, offset 488): stream keyword found in trailer
8 8 WARNING: issue-100.pdf (trailer, offset 418): recovered trailer has no /Root entry
9 9 WARNING: issue-100.pdf (object 1 0, offset 83): unexpected dictionary close token
10   -WARNING: issue-100.pdf (object 1 0, offset 87): expected endobj
11 10 WARNING: issue-100.pdf (object 5 0, offset 268): unknown token while reading object; treating as null
12 11 WARNING: issue-100.pdf (object 5 0, offset 286): unknown token while reading object; treating as null
13 12 WARNING: issue-100.pdf (object 5 0, offset 289): unknown token while reading object; treating as null
... ... @@ -15,9 +14,7 @@ WARNING: issue-100.pdf (object 5 0, offset 294): unknown token while reading obj
15 14 WARNING: issue-100.pdf (object 5 0, offset 297): unknown token while reading object; treating as null
16 15 WARNING: issue-100.pdf (object 5 0, offset 304): unknown token while reading object; treating as null
17 16 WARNING: issue-100.pdf (object 5 0, offset 304): too many errors; giving up on reading object
18   -WARNING: issue-100.pdf (object 5 0, offset 308): expected endobj
19 17 WARNING: issue-100.pdf (object 8 0, offset 107): invalid character ()) in hexstring
20   -WARNING: issue-100.pdf (object 8 0, offset 109): expected endobj
21 18 WARNING: issue-100.pdf (object 9 0, offset 527): unknown token while reading object; treating as string
22 19 WARNING: issue-100.pdf (object 9 0, offset 529): expected endobj
23 20 WARNING: issue-100.pdf (object 10 0, offset 573): expected endobj
... ...
qpdf/qtest/qpdf/issue-101.out
... ... @@ -8,7 +8,6 @@ WARNING: issue-101.pdf (object 11 0, offset 637): unknown token while reading ob
8 8 WARNING: issue-101.pdf (object 11 0, offset 639): unknown token while reading object; treating as null
9 9 WARNING: issue-101.pdf (object 11 0, offset 644): unknown token while reading object; treating as null
10 10 WARNING: issue-101.pdf (object 11 0, offset 644): too many errors; giving up on reading object
11   -WARNING: issue-101.pdf (object 11 0, offset 647): expected endobj
12 11 WARNING: issue-101.pdf (trailer, offset 4433): recovered trailer has no /Root entry
13 12 WARNING: issue-101.pdf (trailer, offset 4183): stream keyword found in trailer
14 13 WARNING: issue-101.pdf (trailer, offset 4113): recovered trailer has no /Root entry
... ... @@ -31,6 +30,8 @@ WARNING: issue-101.pdf (trailer, offset 1508): stream keyword found in trailer
31 30 WARNING: issue-101.pdf (trailer, offset 1438): recovered trailer has no /Root entry
32 31 WARNING: issue-101.pdf (object 2 0, offset 244): unknown token while reading object; treating as null
33 32 WARNING: issue-101.pdf (object 2 0, offset 29): dictionary has duplicated key /Parent; last occurrence overrides earlier ones
  33 +WARNING: issue-101.pdf (object 2 0, offset 333): treating bad indirect reference (0 0 R) as null
  34 +WARNING: issue-101.pdf (object 5 0, offset 1247): treating bad indirect reference (0 0 R) as null
34 35 WARNING: issue-101.pdf (object 5 0, offset 1242): dictionary ended prematurely; using null as value for last key
35 36 WARNING: issue-101.pdf (object 5 0, offset 1242): expected dictionary keys but found non-name objects; ignoring
36 37 WARNING: issue-101.pdf (object 7 0, offset 3855): unknown token while reading object; treating as null
... ... @@ -40,9 +41,7 @@ WARNING: issue-101.pdf (object 7 0, offset 3866): unknown token while reading ob
40 41 WARNING: issue-101.pdf (object 7 0, offset 3873): unknown token while reading object; treating as null
41 42 WARNING: issue-101.pdf (object 7 0, offset 3879): unknown token while reading object; treating as null
42 43 WARNING: issue-101.pdf (object 7 0, offset 3879): too many errors; giving up on reading object
43   -WARNING: issue-101.pdf (object 7 0, offset 3888): expected endobj
44 44 WARNING: issue-101.pdf (object 8 0, offset 4067): invalid character ()) in hexstring
45   -WARNING: issue-101.pdf (object 8 0, offset 4069): expected endobj
46 45 WARNING: issue-101.pdf (object 9 0, offset 2832): unknown token while reading object; treating as string
47 46 WARNING: issue-101.pdf (object 9 0, offset 2834): expected endobj
48 47 qpdf: issue-101.pdf: unable to find trailer dictionary while recovering damaged file
... ...
qpdf/qtest/qpdf/issue-118.out
1 1 WARNING: issue-118.pdf: can't find PDF header
  2 +WARNING: issue-118.pdf (xref stream: object 8 0, offset 720): treating bad indirect reference (0 0 R) as null
2 3 WARNING: issue-118.pdf (xref stream, offset 732): self-referential object stream 2
3 4 WARNING: issue-118.pdf (xref stream, offset 732): object stream id 12336 for object 3 is impossibly large
4 5 WARNING: issue-118.pdf (xref stream, offset 732): object stream id 12336 for object 4 is impossibly large
... ...
qpdf/qtest/qpdf/issue-150.out
1 1 WARNING: issue-150.pdf: can't find PDF header
  2 +WARNING: issue-150.pdf (xref stream: object 8 0, offset 56): treating object as null because of error during parsing : overflow/underflow converting 9900000000000000000 to 64-bit integer
2 3 WARNING: issue-150.pdf: file is damaged
3   -WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer
  4 +WARNING: issue-150.pdf (offset 4): xref not found
4 5 WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table
5 6 WARNING: issue-150.pdf (object 8 0): object has offset 0
6 7 qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file
... ...
qpdf/qtest/qpdf/issue-1503.out
... ... @@ -6,11 +6,8 @@ WARNING: issue-1503.pdf (object 31 0, offset 813): unknown token while reading o
6 6 WARNING: issue-1503.pdf (object 31 0, offset 851): unknown token while reading object; treating as null
7 7 WARNING: issue-1503.pdf (object 31 0, offset 856): unknown token while reading object; treating as null
8 8 WARNING: issue-1503.pdf (object 31 0, offset 861): unexpected 'endobj' or 'endstream' while reading object; giving up on reading object
9   -WARNING: issue-1503.pdf (object 31 0, offset 871): expected endobj
10 9 WARNING: issue-1503.pdf (object 38 0, offset 1126): unexpected 'endobj' or 'endstream' while reading object; giving up on reading object
11   -WARNING: issue-1503.pdf (object 38 0, offset 1133): expected endobj
12 10 WARNING: issue-1503.pdf (object 40 0, offset 1195): unexpected array close token; giving up on reading object
13   -WARNING: issue-1503.pdf (object 40 0, offset 1198): expected endobj
14 11 WARNING: issue-1503.pdf (object 41 0, offset 1359): stream dictionary lacks /Length key
15 12 WARNING: issue-1503.pdf (object 41 0, offset 1411): attempting to recover stream length
16 13 WARNING: issue-1503.pdf (object 41 0, offset 1411): recovered stream length: 54
... ... @@ -22,9 +19,7 @@ WARNING: issue-1503.pdf (object 44 0, offset 1814): name with stray # will not w
22 19 WARNING: issue-1503.pdf (object 44 0, offset 1821): unknown token while reading object; treating as null
23 20 WARNING: issue-1503.pdf (object 44 0, offset 1826): unknown token while reading object; treating as null
24 21 WARNING: issue-1503.pdf (object 44 0, offset 1826): too many errors; giving up on reading object
25   -WARNING: issue-1503.pdf (object 44 0, offset 1829): expected endobj
26 22 WARNING: issue-1503.pdf (object 46 0, offset 1923): unexpected array close token; giving up on reading object
27   -WARNING: issue-1503.pdf (object 46 0, offset 1926): expected endobj
28 23 WARNING: issue-1503.pdf (object 47 0, offset 2087): stream dictionary lacks /Length key
29 24 WARNING: issue-1503.pdf (object 47 0, offset 2139): attempting to recover stream length
30 25 WARNING: issue-1503.pdf (object 47 0, offset 2139): recovered stream length: 54
... ... @@ -59,8 +54,6 @@ WARNING: issue-1503.pdf (object 151 0, offset 3836): unknown token while reading
59 54 WARNING: issue-1503.pdf (object 151 0, offset 3958): unknown token while reading object; treating as null
60 55 WARNING: issue-1503.pdf (object 152 0, offset 4088): parse error while reading object
61 56 WARNING: issue-1503.pdf (object 152 0, offset 4088): unexpected EOF
62   -WARNING: issue-1503.pdf (object 152 0, offset 4088): expected endobj
63   -WARNING: issue-1503.pdf (object 152 0, offset 4088): EOF after endobj
64 57 WARNING: issue-1503.pdf (object 155 0, offset 162): unknown token while reading object; treating as null
65 58 WARNING: issue-1503.pdf (object 155 0, offset 342): unknown token while reading object; treating as null
66 59 WARNING: issue-1503.pdf (object 155 0, offset 345): unknown token while reading object; treating as null
... ...
qpdf/qtest/qpdf/issue-335a.out
... ... @@ -39,12 +39,14 @@ WARNING: issue-335a.pdf (trailer, offset 22844): expected dictionary keys but fo
39 39 WARNING: issue-335a.pdf (trailer, offset 22880): stream keyword found in trailer
40 40 WARNING: issue-335a.pdf (trailer, offset 22840): recovered trailer has no /Root entry
41 41 WARNING: issue-335a.pdf (trailer, offset 22702): unknown token while reading object; treating as null
  42 +WARNING: issue-335a.pdf (trailer, offset 22713): treating bad indirect reference (0 0 R) as null
42 43 WARNING: issue-335a.pdf (trailer, offset 22701): expected dictionary keys but found non-name objects; ignoring
43 44 WARNING: issue-335a.pdf (trailer, offset 22746): stream keyword found in trailer
44 45 WARNING: issue-335a.pdf (trailer, offset 22697): recovered trailer has no /Root entry
45 46 WARNING: issue-335a.pdf (trailer, offset 22687): unknown token while reading object; treating as null
46 47 WARNING: issue-335a.pdf (trailer, offset 22690): unknown token while reading object; treating as null
47 48 WARNING: issue-335a.pdf (trailer, offset 22702): unknown token while reading object; treating as null
  49 +WARNING: issue-335a.pdf (trailer, offset 22713): treating bad indirect reference (0 0 R) as null
48 50 WARNING: issue-335a.pdf (trailer, offset 22701): expected dictionary keys but found non-name objects; ignoring
49 51 WARNING: issue-335a.pdf (trailer, offset 22740): unknown token while reading object; treating as null
50 52 WARNING: issue-335a.pdf (trailer, offset 22748): unknown token while reading object; treating as null
... ... @@ -58,6 +60,7 @@ WARNING: issue-335a.pdf (trailer, offset 22675): unknown token while reading obj
58 60 WARNING: issue-335a.pdf (trailer, offset 22687): unknown token while reading object; treating as null
59 61 WARNING: issue-335a.pdf (trailer, offset 22690): unknown token while reading object; treating as null
60 62 WARNING: issue-335a.pdf (trailer, offset 22702): unknown token while reading object; treating as null
  63 +WARNING: issue-335a.pdf (trailer, offset 22713): treating bad indirect reference (0 0 R) as null
61 64 WARNING: issue-335a.pdf (trailer, offset 22701): expected dictionary keys but found non-name objects; ignoring
62 65 WARNING: issue-335a.pdf (trailer, offset 22740): unknown token while reading object; treating as null
63 66 WARNING: issue-335a.pdf (trailer, offset 22748): unknown token while reading object; treating as null
... ... @@ -66,6 +69,8 @@ WARNING: issue-335a.pdf (trailer, offset 22791): unknown token while reading obj
66 69 WARNING: issue-335a.pdf (trailer, offset 22794): unexpected >
67 70 WARNING: issue-335a.pdf (trailer, offset 22794): too many errors; giving up on reading object
68 71 WARNING: issue-335a.pdf (trailer, offset 22437): unknown token while reading object; treating as null
  72 +WARNING: issue-335a.pdf (trailer, offset 22448): treating bad indirect reference (0 0 R) as null
  73 +WARNING: issue-335a.pdf (trailer, offset 22471): treating bad indirect reference (20 -1 R) as null
69 74 WARNING: issue-335a.pdf (trailer, offset 22436): expected dictionary keys but found non-name objects; ignoring
70 75 WARNING: issue-335a.pdf (trailer, offset 22482): stream keyword found in trailer
71 76 WARNING: issue-335a.pdf (trailer, offset 22432): recovered trailer has no /Root entry
... ... @@ -98,27 +103,30 @@ WARNING: issue-335a.pdf (trailer, offset 22134): stream keyword found in trailer
98 103 WARNING: issue-335a.pdf (trailer, offset 22083): recovered trailer has no /Root entry
99 104 WARNING: issue-335a.pdf (trailer, offset 22000): invalid character (t) in hexstring
100 105 WARNING: issue-335a.pdf (trailer, offset 21937): unknown token while reading object; treating as null
  106 +WARNING: issue-335a.pdf (trailer, offset 21948): treating bad indirect reference (0 0 R) as null
101 107 WARNING: issue-335a.pdf (trailer, offset 21962): unknown token while reading object; treating as null
102 108 WARNING: issue-335a.pdf (trailer, offset 21991): unknown token while reading object; treating as null
103 109 WARNING: issue-335a.pdf (trailer, offset 22000): invalid character (t) in hexstring
104 110 WARNING: issue-335a.pdf (trailer, offset 22003): unknown token while reading object; treating as null
  111 +WARNING: issue-335a.pdf (trailer, offset 22026): treating bad indirect reference (-4 0 R) as null
105 112 WARNING: issue-335a.pdf (trailer, offset 21936): dictionary has duplicated key /Length; last occurrence overrides earlier ones
106 113 WARNING: issue-335a.pdf (trailer, offset 22028): unexpected >
107 114 WARNING: issue-335a.pdf (trailer, offset 22030): unknown token while reading object; treating as null
108   -WARNING: issue-335a.pdf (trailer, offset 22038): unknown token while reading object; treating as null
109   -WARNING: issue-335a.pdf (trailer, offset 22038): too many errors; giving up on reading object
  115 +WARNING: issue-335a.pdf (trailer, offset 22030): too many errors; giving up on reading object
110 116 WARNING: issue-335a.pdf (trailer, offset 21918): unknown token while reading object; treating as null
111 117 WARNING: issue-335a.pdf (trailer, offset 21925): unknown token while reading object; treating as null
112 118 WARNING: issue-335a.pdf (trailer, offset 21937): unknown token while reading object; treating as null
  119 +WARNING: issue-335a.pdf (trailer, offset 21948): treating bad indirect reference (0 0 R) as null
113 120 WARNING: issue-335a.pdf (trailer, offset 21962): unknown token while reading object; treating as null
114 121 WARNING: issue-335a.pdf (trailer, offset 21991): unknown token while reading object; treating as null
115 122 WARNING: issue-335a.pdf (trailer, offset 22000): invalid character (t) in hexstring
116 123 WARNING: issue-335a.pdf (trailer, offset 22003): unknown token while reading object; treating as null
  124 +WARNING: issue-335a.pdf (trailer, offset 22026): treating bad indirect reference (-4 0 R) as null
117 125 WARNING: issue-335a.pdf (trailer, offset 21936): dictionary has duplicated key /Length; last occurrence overrides earlier ones
118 126 WARNING: issue-335a.pdf (trailer, offset 22028): unexpected >
119   -WARNING: issue-335a.pdf (trailer, offset 22030): unknown token while reading object; treating as null
120   -WARNING: issue-335a.pdf (trailer, offset 22030): too many errors; giving up on reading object
  127 +WARNING: issue-335a.pdf (trailer, offset 22028): too many errors; giving up on reading object
121 128 WARNING: issue-335a.pdf (trailer, offset 21837): unknown token while reading object; treating as null
  129 +WARNING: issue-335a.pdf (trailer, offset 21861): treating bad indirect reference (0 0 R) as null
122 130 WARNING: issue-335a.pdf (trailer, offset 21850): expected dictionary keys but found non-name objects; ignoring
123 131 WARNING: issue-335a.pdf (trailer, offset 21892): unknown token while reading object; treating as null
124 132 WARNING: issue-335a.pdf (trailer, offset 21900): unknown token while reading object; treating as null
... ...
qpdf/qtest/qpdf/issue-51.out
1 1 WARNING: issue-51.pdf: can't find PDF header
2 2 WARNING: issue-51.pdf: reported number of objects (0) is not one plus the highest object number (8)
  3 +WARNING: issue-51.pdf (object 7 0, offset 500): treating bad indirect reference (0 0 R) as null
3 4 WARNING: issue-51.pdf (object 7 0, offset 476): dictionary has duplicated key /0000; last occurrence overrides earlier ones
4 5 WARNING: issue-51.pdf (object 7 0, offset 553): expected endobj
5 6 issue-51.pdf: unable to find page tree
... ...
qpdf/qtest/qpdf/issue-99.out
1 1 WARNING: issue-99.pdf: file is damaged
2 2 WARNING: issue-99.pdf (offset 3526): xref not found
3 3 WARNING: issue-99.pdf: Attempting to reconstruct cross-reference table
  4 +WARNING: issue-99.pdf (trailer, offset 4635): treating bad indirect reference (0 0 R) as null
4 5 WARNING: issue-99.pdf (trailer, offset 4613): recovered trailer has no /Root entry
5 6 WARNING: issue-99.pdf (object 1 0, offset 775): unknown token while reading object; treating as null
6 7 WARNING: issue-99.pdf (object 1 0, offset 795): unknown token while reading object; treating as null
... ... @@ -8,15 +9,13 @@ WARNING: issue-99.pdf (object 1 0, offset 815): unknown token while reading obje
8 9 WARNING: issue-99.pdf (object 1 0, offset 835): unknown token while reading object; treating as null
9 10 WARNING: issue-99.pdf (object 1 0, offset 855): unknown token while reading object; treating as null
10 11 WARNING: issue-99.pdf (object 1 0, offset 855): too many errors; giving up on reading object
11   -WARNING: issue-99.pdf (object 1 0, offset 858): expected endobj
12 12 WARNING: issue-99.pdf (object 2 0, offset 64): expected endobj
13 13 WARNING: issue-99.pdf (object 5 0, offset 2452): unknown token while reading object; treating as string
14 14 WARNING: issue-99.pdf (object 6 0, offset 2506): unexpected array close token; giving up on reading object
15   -WARNING: issue-99.pdf (object 6 0, offset 2507): expected endobj
  15 +WARNING: issue-99.pdf (object 8 0, offset 4281): treating bad indirect reference (0 0 R) as null
16 16 WARNING: issue-99.pdf (object 10 0, offset 3708): expected dictionary keys but found non-name objects; ignoring
17 17 WARNING: issue-99.pdf (object 11 0, offset 4485): unknown token while reading object; treating as null
18 18 WARNING: issue-99.pdf (object 11 0, offset 4497): unexpected array close token; giving up on reading object
19   -WARNING: issue-99.pdf (object 11 0, offset 4499): expected endobj
20 19 WARNING: issue-99.pdf: unable to find trailer dictionary while recovering damaged file
21 20 WARNING: object 1 0: Pages tree includes non-dictionary object; ignoring
22 21 WARNING: object 1 0: operation for dictionary attempted on object of type null: ignoring key replacement request
... ...
qpdf/qtest/qpdf/parse-object.out
... ... @@ -6,6 +6,9 @@ WARNING: parsed object: treating unexpected brace token as null
6 6 WARNING: parsed object: treating unexpected brace token as null
7 7 WARNING: parsed object: unexpected dictionary close token
8 8 WARNING: bad-parse.qdf (object 7 0, offset 1121): unexpected EOF
9   -WARNING: bad-parse.qdf (object 7 0, offset 1121): expected endobj
10   -WARNING: bad-parse.qdf (object 7 0, offset 1121): EOF after endobj
  9 +WARNING: parsed object (offset 5): treating bad indirect reference (0 0 R) as null
  10 +WARNING: parsed object (offset 12): treating bad indirect reference (-1 0 R) as null
  11 +WARNING: parsed object (offset 22): treating bad indirect reference (1 65535 R) as null
  12 +WARNING: parsed object (offset 33): treating bad indirect reference (1 100000 R) as null
  13 +WARNING: parsed object (offset 40): treating bad indirect reference (1 -1 R) as null
11 14 test 31 done
... ...