Commit 1a1c640aa426d0d4ac8f7a581e548c96ea6e8984
1 parent
b841c2d2
Refactor `QPDFParser` error handling by introducing `QPDFParser::Error` class, r…
…eplacing redundant logic with centralized functions, and streamlining bad token handling for improved readability and maintainability.
Showing
3 changed files
with
52 additions
and
52 deletions
libqpdf/QPDFParser.cc
| ... | ... | @@ -153,6 +153,23 @@ QPDFParser::parse( |
| 153 | 153 | QPDFObjectHandle |
| 154 | 154 | QPDFParser::parse(bool& empty, bool content_stream) |
| 155 | 155 | { |
| 156 | + try { | |
| 157 | + return parse_first(empty, content_stream); | |
| 158 | + } catch (Error& e) { | |
| 159 | + return {QPDFObject::create<QPDF_Null>()}; | |
| 160 | + } catch (QPDFExc& e) { | |
| 161 | + throw e; | |
| 162 | + } catch (std::logic_error& e) { | |
| 163 | + throw e; | |
| 164 | + } catch (std::exception& e) { | |
| 165 | + warn("treating object as null because of error during parsing : "s + e.what()); | |
| 166 | + return {QPDFObject::create<QPDF_Null>()}; | |
| 167 | + } | |
| 168 | +} | |
| 169 | + | |
| 170 | +QPDFObjectHandle | |
| 171 | +QPDFParser::parse_first(bool& empty, bool content_stream) | |
| 172 | +{ | |
| 156 | 173 | // This method must take care not to resolve any objects. Don't check the type of any object |
| 157 | 174 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
| 158 | 175 | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
| ... | ... | @@ -161,7 +178,6 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 161 | 178 | ParseGuard pg(context); |
| 162 | 179 | empty = false; |
| 163 | 180 | start = input.tell(); |
| 164 | - | |
| 165 | 181 | if (!tokenizer.nextToken(input, object_description)) { |
| 166 | 182 | warn(tokenizer.getErrorMessage()); |
| 167 | 183 | } |
| ... | ... | @@ -315,27 +331,16 @@ QPDFParser::parseRemainder(bool content_stream) |
| 315 | 331 | return {QPDFObject::create<QPDF_Null>()}; |
| 316 | 332 | |
| 317 | 333 | case QPDFTokenizer::tt_bad: |
| 318 | - if (tooManyBadTokens()) { | |
| 319 | - return {QPDFObject::create<QPDF_Null>()}; | |
| 320 | - } | |
| 334 | + check_too_many_bad_tokens(); | |
| 321 | 335 | addNull(); |
| 322 | 336 | continue; |
| 323 | 337 | |
| 324 | 338 | case QPDFTokenizer::tt_brace_open: |
| 325 | 339 | case QPDFTokenizer::tt_brace_close: |
| 326 | - warn("treating unexpected brace token as null"); | |
| 327 | - if (tooManyBadTokens()) { | |
| 328 | - return {QPDFObject::create<QPDF_Null>()}; | |
| 329 | - } | |
| 330 | - addNull(); | |
| 340 | + add_bad_null("treating unexpected brace token as null"); | |
| 331 | 341 | continue; |
| 332 | 342 | |
| 333 | 343 | case QPDFTokenizer::tt_array_close: |
| 334 | - if ((bad_count || sanity_checks) && !max_bad_count) { | |
| 335 | - // Trigger warning. | |
| 336 | - (void)tooManyBadTokens(); | |
| 337 | - return {QPDFObject::create<QPDF_Null>()}; | |
| 338 | - } | |
| 339 | 344 | if (frame->state == st_array) { |
| 340 | 345 | auto object = frame->null_count > 100 |
| 341 | 346 | ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true) |
| ... | ... | @@ -358,20 +363,11 @@ QPDFParser::parseRemainder(bool content_stream) |
| 358 | 363 | warn("unexpected array close token; giving up on reading object"); |
| 359 | 364 | return {QPDFObject::create<QPDF_Null>()}; |
| 360 | 365 | } |
| 361 | - warn("treating unexpected array close token as null"); | |
| 362 | - if (tooManyBadTokens()) { | |
| 363 | - return {QPDFObject::create<QPDF_Null>()}; | |
| 364 | - } | |
| 365 | - addNull(); | |
| 366 | + add_bad_null("treating unexpected array close token as null"); | |
| 366 | 367 | } |
| 367 | 368 | continue; |
| 368 | 369 | |
| 369 | 370 | case QPDFTokenizer::tt_dict_close: |
| 370 | - if ((bad_count || sanity_checks) && !max_bad_count) { | |
| 371 | - // Trigger warning. | |
| 372 | - (void)tooManyBadTokens(); | |
| 373 | - return {QPDFObject::create<QPDF_Null>()}; | |
| 374 | - } | |
| 375 | 371 | if (frame->state <= st_dictionary_value) { |
| 376 | 372 | // Attempt to recover more or less gracefully from invalid dictionaries. |
| 377 | 373 | auto& dict = frame->dict; |
| ... | ... | @@ -417,11 +413,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 417 | 413 | warn("unexpected dictionary close token; giving up on reading object"); |
| 418 | 414 | return {QPDFObject::create<QPDF_Null>()}; |
| 419 | 415 | } |
| 420 | - warn("unexpected dictionary close token"); | |
| 421 | - if (tooManyBadTokens()) { | |
| 422 | - return {QPDFObject::create<QPDF_Null>()}; | |
| 423 | - } | |
| 424 | - addNull(); | |
| 416 | + add_bad_null("unexpected dictionary close token"); | |
| 425 | 417 | } |
| 426 | 418 | continue; |
| 427 | 419 | |
| ... | ... | @@ -490,18 +482,12 @@ QPDFParser::parseRemainder(bool content_stream) |
| 490 | 482 | return {QPDFObject::create<QPDF_Null>()}; |
| 491 | 483 | } |
| 492 | 484 | |
| 493 | - warn("unknown token while reading object; treating as null"); | |
| 494 | - if (tooManyBadTokens()) { | |
| 495 | - return {QPDFObject::create<QPDF_Null>()}; | |
| 496 | - } | |
| 497 | - addNull(); | |
| 485 | + add_bad_null("unknown token while reading object; treating as null"); | |
| 498 | 486 | continue; |
| 499 | 487 | } |
| 500 | 488 | |
| 501 | 489 | warn("unknown token while reading object; treating as string"); |
| 502 | - if (tooManyBadTokens()) { | |
| 503 | - return {QPDFObject::create<QPDF_Null>()}; | |
| 504 | - } | |
| 490 | + check_too_many_bad_tokens(); | |
| 505 | 491 | addScalar<QPDF_String>(tokenizer.getValue()); |
| 506 | 492 | |
| 507 | 493 | continue; |
| ... | ... | @@ -525,11 +511,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 525 | 511 | continue; |
| 526 | 512 | |
| 527 | 513 | default: |
| 528 | - warn("treating unknown token type as null while reading object"); | |
| 529 | - if (tooManyBadTokens()) { | |
| 530 | - return {QPDFObject::create<QPDF_Null>()}; | |
| 531 | - } | |
| 532 | - addNull(); | |
| 514 | + add_bad_null("treating unknown token type as null while reading object"); | |
| 533 | 515 | } |
| 534 | 516 | } |
| 535 | 517 | } |
| ... | ... | @@ -568,6 +550,14 @@ QPDFParser::addNull() |
| 568 | 550 | } |
| 569 | 551 | |
| 570 | 552 | void |
| 553 | +QPDFParser::add_bad_null(std::string const& msg) | |
| 554 | +{ | |
| 555 | + warn(msg); | |
| 556 | + check_too_many_bad_tokens(); | |
| 557 | + addNull(); | |
| 558 | +} | |
| 559 | + | |
| 560 | +void | |
| 571 | 561 | QPDFParser::addInt(int count) |
| 572 | 562 | { |
| 573 | 563 | auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]); |
| ... | ... | @@ -584,7 +574,8 @@ QPDFParser::addScalar(Args&&... args) |
| 584 | 574 | // Stop adding scalars. We are going to abort when the close token or a bad token is |
| 585 | 575 | // encountered. |
| 586 | 576 | max_bad_count = 0; |
| 587 | - return; | |
| 577 | + check_too_many_bad_tokens(); | |
| 578 | + return; // unreachable | |
| 588 | 579 | } |
| 589 | 580 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
| 590 | 581 | obj->setDescription(context, description, input.getLastOffset()); |
| ... | ... | @@ -634,8 +625,8 @@ QPDFParser::fixMissingKeys() |
| 634 | 625 | } |
| 635 | 626 | } |
| 636 | 627 | |
| 637 | -bool | |
| 638 | -QPDFParser::tooManyBadTokens() | |
| 628 | +void | |
| 629 | +QPDFParser::check_too_many_bad_tokens() | |
| 639 | 630 | { |
| 640 | 631 | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); |
| 641 | 632 | if (frame->olist.size() > limit || frame->dict.size() > limit) { |
| ... | ... | @@ -643,7 +634,7 @@ QPDFParser::tooManyBadTokens() |
| 643 | 634 | warn( |
| 644 | 635 | "encountered errors while parsing an array or dictionary with more than " + |
| 645 | 636 | std::to_string(limit) + " elements; giving up on reading object"); |
| 646 | - return true; | |
| 637 | + throw Error(); | |
| 647 | 638 | } |
| 648 | 639 | warn( |
| 649 | 640 | "encountered an array or dictionary with more than " + std::to_string(limit) + |
| ... | ... | @@ -652,17 +643,16 @@ QPDFParser::tooManyBadTokens() |
| 652 | 643 | if (max_bad_count && --max_bad_count > 0 && good_count > 4) { |
| 653 | 644 | good_count = 0; |
| 654 | 645 | bad_count = 1; |
| 655 | - return false; | |
| 646 | + return; | |
| 656 | 647 | } |
| 657 | 648 | if (++bad_count > 5 || |
| 658 | 649 | (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) { |
| 659 | 650 | // Give up after 5 errors in close proximity or if the number of missing dictionary keys |
| 660 | 651 | // exceeds the remaining number of allowable total errors. |
| 661 | 652 | warn("too many errors; giving up on reading object"); |
| 662 | - return true; | |
| 653 | + throw Error(); | |
| 663 | 654 | } |
| 664 | 655 | good_count = 0; |
| 665 | - return false; | |
| 666 | 656 | } |
| 667 | 657 | |
| 668 | 658 | void | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -16,6 +16,13 @@ using namespace qpdf::global; |
| 16 | 16 | class QPDFParser |
| 17 | 17 | { |
| 18 | 18 | public: |
| 19 | + class Error: public std::exception | |
| 20 | + { | |
| 21 | + public: | |
| 22 | + Error() = default; | |
| 23 | + virtual ~Error() noexcept = default; | |
| 24 | + }; | |
| 25 | + | |
| 19 | 26 | static QPDFObjectHandle |
| 20 | 27 | parse(InputSource& input, std::string const& object_description, QPDF* context); |
| 21 | 28 | |
| ... | ... | @@ -106,13 +113,15 @@ class QPDFParser |
| 106 | 113 | }; |
| 107 | 114 | |
| 108 | 115 | QPDFObjectHandle parse(bool& empty, bool content_stream); |
| 116 | + QPDFObjectHandle parse_first(bool& empty, bool content_stream); | |
| 109 | 117 | QPDFObjectHandle parseRemainder(bool content_stream); |
| 110 | 118 | void add(std::shared_ptr<QPDFObject>&& obj); |
| 111 | 119 | void addNull(); |
| 120 | + void add_bad_null(std::string const& msg); | |
| 112 | 121 | void addInt(int count); |
| 113 | 122 | template <typename T, typename... Args> |
| 114 | 123 | void addScalar(Args&&... args); |
| 115 | - bool tooManyBadTokens(); | |
| 124 | + void check_too_many_bad_tokens(); | |
| 116 | 125 | void warnDuplicateKey(); |
| 117 | 126 | void fixMissingKeys(); |
| 118 | 127 | void warn(qpdf_offset_t offset, std::string const& msg) const; | ... | ... |
qpdf/qtest/qpdf/issue-150.out
| 1 | 1 | WARNING: issue-150.pdf: can't find PDF header |
| 2 | +WARNING: issue-150.pdf (xref stream: object 8 0, offset 56): treating object as null because of error during parsing : overflow/underflow converting 9900000000000000000 to 64-bit integer | |
| 3 | +WARNING: issue-150.pdf (xref stream: object 8 0, offset 75): expected endobj | |
| 2 | 4 | WARNING: issue-150.pdf: file is damaged |
| 3 | -WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer | |
| 5 | +WARNING: issue-150.pdf (offset 4): xref not found | |
| 4 | 6 | WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table |
| 5 | -WARNING: issue-150.pdf (object 8 0): object has offset 0 | |
| 6 | 7 | qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file | ... | ... |