Commit 1a1c640aa426d0d4ac8f7a581e548c96ea6e8984

Authored by m-holger
1 parent b841c2d2

Refactor `QPDFParser` error handling by introducing `QPDFParser::Error` class, r…

…eplacing redundant logic with centralized functions, and streamlining bad token handling for improved readability and maintainability.
libqpdf/QPDFParser.cc
... ... @@ -153,6 +153,23 @@ QPDFParser::parse(
153 153 QPDFObjectHandle
154 154 QPDFParser::parse(bool& empty, bool content_stream)
155 155 {
  156 + try {
  157 + return parse_first(empty, content_stream);
  158 + } catch (Error& e) {
  159 + return {QPDFObject::create<QPDF_Null>()};
  160 + } catch (QPDFExc& e) {
  161 + throw e;
  162 + } catch (std::logic_error& e) {
  163 + throw e;
  164 + } catch (std::exception& e) {
  165 + warn("treating object as null because of error during parsing : "s + e.what());
  166 + return {QPDFObject::create<QPDF_Null>()};
  167 + }
  168 +}
  169 +
  170 +QPDFObjectHandle
  171 +QPDFParser::parse_first(bool& empty, bool content_stream)
  172 +{
156 173 // This method must take care not to resolve any objects. Don't check the type of any object
157 174 // without first ensuring that it is a direct object. Otherwise, doing so may have the side
158 175 // effect of reading the object and changing the file pointer. If you do this, it will cause a
... ... @@ -161,7 +178,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
161 178 ParseGuard pg(context);
162 179 empty = false;
163 180 start = input.tell();
164   -
165 181 if (!tokenizer.nextToken(input, object_description)) {
166 182 warn(tokenizer.getErrorMessage());
167 183 }
... ... @@ -315,27 +331,16 @@ QPDFParser::parseRemainder(bool content_stream)
315 331 return {QPDFObject::create<QPDF_Null>()};
316 332  
317 333 case QPDFTokenizer::tt_bad:
318   - if (tooManyBadTokens()) {
319   - return {QPDFObject::create<QPDF_Null>()};
320   - }
  334 + check_too_many_bad_tokens();
321 335 addNull();
322 336 continue;
323 337  
324 338 case QPDFTokenizer::tt_brace_open:
325 339 case QPDFTokenizer::tt_brace_close:
326   - warn("treating unexpected brace token as null");
327   - if (tooManyBadTokens()) {
328   - return {QPDFObject::create<QPDF_Null>()};
329   - }
330   - addNull();
  340 + add_bad_null("treating unexpected brace token as null");
331 341 continue;
332 342  
333 343 case QPDFTokenizer::tt_array_close:
334   - if ((bad_count || sanity_checks) && !max_bad_count) {
335   - // Trigger warning.
336   - (void)tooManyBadTokens();
337   - return {QPDFObject::create<QPDF_Null>()};
338   - }
339 344 if (frame->state == st_array) {
340 345 auto object = frame->null_count > 100
341 346 ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true)
... ... @@ -358,20 +363,11 @@ QPDFParser::parseRemainder(bool content_stream)
358 363 warn("unexpected array close token; giving up on reading object");
359 364 return {QPDFObject::create<QPDF_Null>()};
360 365 }
361   - warn("treating unexpected array close token as null");
362   - if (tooManyBadTokens()) {
363   - return {QPDFObject::create<QPDF_Null>()};
364   - }
365   - addNull();
  366 + add_bad_null("treating unexpected array close token as null");
366 367 }
367 368 continue;
368 369  
369 370 case QPDFTokenizer::tt_dict_close:
370   - if ((bad_count || sanity_checks) && !max_bad_count) {
371   - // Trigger warning.
372   - (void)tooManyBadTokens();
373   - return {QPDFObject::create<QPDF_Null>()};
374   - }
375 371 if (frame->state <= st_dictionary_value) {
376 372 // Attempt to recover more or less gracefully from invalid dictionaries.
377 373 auto& dict = frame->dict;
... ... @@ -417,11 +413,7 @@ QPDFParser::parseRemainder(bool content_stream)
417 413 warn("unexpected dictionary close token; giving up on reading object");
418 414 return {QPDFObject::create<QPDF_Null>()};
419 415 }
420   - warn("unexpected dictionary close token");
421   - if (tooManyBadTokens()) {
422   - return {QPDFObject::create<QPDF_Null>()};
423   - }
424   - addNull();
  416 + add_bad_null("unexpected dictionary close token");
425 417 }
426 418 continue;
427 419  
... ... @@ -490,18 +482,12 @@ QPDFParser::parseRemainder(bool content_stream)
490 482 return {QPDFObject::create<QPDF_Null>()};
491 483 }
492 484  
493   - warn("unknown token while reading object; treating as null");
494   - if (tooManyBadTokens()) {
495   - return {QPDFObject::create<QPDF_Null>()};
496   - }
497   - addNull();
  485 + add_bad_null("unknown token while reading object; treating as null");
498 486 continue;
499 487 }
500 488  
501 489 warn("unknown token while reading object; treating as string");
502   - if (tooManyBadTokens()) {
503   - return {QPDFObject::create<QPDF_Null>()};
504   - }
  490 + check_too_many_bad_tokens();
505 491 addScalar<QPDF_String>(tokenizer.getValue());
506 492  
507 493 continue;
... ... @@ -525,11 +511,7 @@ QPDFParser::parseRemainder(bool content_stream)
525 511 continue;
526 512  
527 513 default:
528   - warn("treating unknown token type as null while reading object");
529   - if (tooManyBadTokens()) {
530   - return {QPDFObject::create<QPDF_Null>()};
531   - }
532   - addNull();
  514 + add_bad_null("treating unknown token type as null while reading object");
533 515 }
534 516 }
535 517 }
... ... @@ -568,6 +550,14 @@ QPDFParser::addNull()
568 550 }
569 551  
570 552 void
  553 +QPDFParser::add_bad_null(std::string const& msg)
  554 +{
  555 + warn(msg);
  556 + check_too_many_bad_tokens();
  557 + addNull();
  558 +}
  559 +
  560 +void
571 561 QPDFParser::addInt(int count)
572 562 {
573 563 auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]);
... ... @@ -584,7 +574,8 @@ QPDFParser::addScalar(Args&amp;&amp;... args)
584 574 // Stop adding scalars. We are going to abort when the close token or a bad token is
585 575 // encountered.
586 576 max_bad_count = 0;
587   - return;
  577 + check_too_many_bad_tokens();
  578 + return; // unreachable
588 579 }
589 580 auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
590 581 obj->setDescription(context, description, input.getLastOffset());
... ... @@ -634,8 +625,8 @@ QPDFParser::fixMissingKeys()
634 625 }
635 626 }
636 627  
637   -bool
638   -QPDFParser::tooManyBadTokens()
  628 +void
  629 +QPDFParser::check_too_many_bad_tokens()
639 630 {
640 631 auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
641 632 if (frame->olist.size() > limit || frame->dict.size() > limit) {
... ... @@ -643,7 +634,7 @@ QPDFParser::tooManyBadTokens()
643 634 warn(
644 635 "encountered errors while parsing an array or dictionary with more than " +
645 636 std::to_string(limit) + " elements; giving up on reading object");
646   - return true;
  637 + throw Error();
647 638 }
648 639 warn(
649 640 "encountered an array or dictionary with more than " + std::to_string(limit) +
... ... @@ -652,17 +643,16 @@ QPDFParser::tooManyBadTokens()
652 643 if (max_bad_count && --max_bad_count > 0 && good_count > 4) {
653 644 good_count = 0;
654 645 bad_count = 1;
655   - return false;
  646 + return;
656 647 }
657 648 if (++bad_count > 5 ||
658 649 (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) {
659 650 // Give up after 5 errors in close proximity or if the number of missing dictionary keys
660 651 // exceeds the remaining number of allowable total errors.
661 652 warn("too many errors; giving up on reading object");
662   - return true;
  653 + throw Error();
663 654 }
664 655 good_count = 0;
665   - return false;
666 656 }
667 657  
668 658 void
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -16,6 +16,13 @@ using namespace qpdf::global;
16 16 class QPDFParser
17 17 {
18 18 public:
  19 + class Error: public std::exception
  20 + {
  21 + public:
  22 + Error() = default;
  23 + virtual ~Error() noexcept = default;
  24 + };
  25 +
19 26 static QPDFObjectHandle
20 27 parse(InputSource& input, std::string const& object_description, QPDF* context);
21 28  
... ... @@ -106,13 +113,15 @@ class QPDFParser
106 113 };
107 114  
108 115 QPDFObjectHandle parse(bool& empty, bool content_stream);
  116 + QPDFObjectHandle parse_first(bool& empty, bool content_stream);
109 117 QPDFObjectHandle parseRemainder(bool content_stream);
110 118 void add(std::shared_ptr<QPDFObject>&& obj);
111 119 void addNull();
  120 + void add_bad_null(std::string const& msg);
112 121 void addInt(int count);
113 122 template <typename T, typename... Args>
114 123 void addScalar(Args&&... args);
115   - bool tooManyBadTokens();
  124 + void check_too_many_bad_tokens();
116 125 void warnDuplicateKey();
117 126 void fixMissingKeys();
118 127 void warn(qpdf_offset_t offset, std::string const& msg) const;
... ...
qpdf/qtest/qpdf/issue-150.out
1 1 WARNING: issue-150.pdf: can't find PDF header
  2 +WARNING: issue-150.pdf (xref stream: object 8 0, offset 56): treating object as null because of error during parsing : overflow/underflow converting 9900000000000000000 to 64-bit integer
  3 +WARNING: issue-150.pdf (xref stream: object 8 0, offset 75): expected endobj
2 4 WARNING: issue-150.pdf: file is damaged
3   -WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer
  5 +WARNING: issue-150.pdf (offset 4): xref not found
4 6 WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table
5   -WARNING: issue-150.pdf (object 8 0): object has offset 0
6 7 qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file
... ...