Commit 1a1c640aa426d0d4ac8f7a581e548c96ea6e8984
1 parent
b841c2d2
Refactor `QPDFParser` error handling by introducing `QPDFParser::Error` class, r…
…eplacing redundant logic with centralized functions, and streamlining bad token handling for improved readability and maintainability.
Showing
3 changed files
with
52 additions
and
52 deletions
libqpdf/QPDFParser.cc
| @@ -153,6 +153,23 @@ QPDFParser::parse( | @@ -153,6 +153,23 @@ QPDFParser::parse( | ||
| 153 | QPDFObjectHandle | 153 | QPDFObjectHandle |
| 154 | QPDFParser::parse(bool& empty, bool content_stream) | 154 | QPDFParser::parse(bool& empty, bool content_stream) |
| 155 | { | 155 | { |
| 156 | + try { | ||
| 157 | + return parse_first(empty, content_stream); | ||
| 158 | + } catch (Error& e) { | ||
| 159 | + return {QPDFObject::create<QPDF_Null>()}; | ||
| 160 | + } catch (QPDFExc& e) { | ||
| 161 | + throw e; | ||
| 162 | + } catch (std::logic_error& e) { | ||
| 163 | + throw e; | ||
| 164 | + } catch (std::exception& e) { | ||
| 165 | + warn("treating object as null because of error during parsing : "s + e.what()); | ||
| 166 | + return {QPDFObject::create<QPDF_Null>()}; | ||
| 167 | + } | ||
| 168 | +} | ||
| 169 | + | ||
| 170 | +QPDFObjectHandle | ||
| 171 | +QPDFParser::parse_first(bool& empty, bool content_stream) | ||
| 172 | +{ | ||
| 156 | // This method must take care not to resolve any objects. Don't check the type of any object | 173 | // This method must take care not to resolve any objects. Don't check the type of any object |
| 157 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side | 174 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
| 158 | // effect of reading the object and changing the file pointer. If you do this, it will cause a | 175 | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
| @@ -161,7 +178,6 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -161,7 +178,6 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 161 | ParseGuard pg(context); | 178 | ParseGuard pg(context); |
| 162 | empty = false; | 179 | empty = false; |
| 163 | start = input.tell(); | 180 | start = input.tell(); |
| 164 | - | ||
| 165 | if (!tokenizer.nextToken(input, object_description)) { | 181 | if (!tokenizer.nextToken(input, object_description)) { |
| 166 | warn(tokenizer.getErrorMessage()); | 182 | warn(tokenizer.getErrorMessage()); |
| 167 | } | 183 | } |
| @@ -315,27 +331,16 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -315,27 +331,16 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 315 | return {QPDFObject::create<QPDF_Null>()}; | 331 | return {QPDFObject::create<QPDF_Null>()}; |
| 316 | 332 | ||
| 317 | case QPDFTokenizer::tt_bad: | 333 | case QPDFTokenizer::tt_bad: |
| 318 | - if (tooManyBadTokens()) { | ||
| 319 | - return {QPDFObject::create<QPDF_Null>()}; | ||
| 320 | - } | 334 | + check_too_many_bad_tokens(); |
| 321 | addNull(); | 335 | addNull(); |
| 322 | continue; | 336 | continue; |
| 323 | 337 | ||
| 324 | case QPDFTokenizer::tt_brace_open: | 338 | case QPDFTokenizer::tt_brace_open: |
| 325 | case QPDFTokenizer::tt_brace_close: | 339 | case QPDFTokenizer::tt_brace_close: |
| 326 | - warn("treating unexpected brace token as null"); | ||
| 327 | - if (tooManyBadTokens()) { | ||
| 328 | - return {QPDFObject::create<QPDF_Null>()}; | ||
| 329 | - } | ||
| 330 | - addNull(); | 340 | + add_bad_null("treating unexpected brace token as null"); |
| 331 | continue; | 341 | continue; |
| 332 | 342 | ||
| 333 | case QPDFTokenizer::tt_array_close: | 343 | case QPDFTokenizer::tt_array_close: |
| 334 | - if ((bad_count || sanity_checks) && !max_bad_count) { | ||
| 335 | - // Trigger warning. | ||
| 336 | - (void)tooManyBadTokens(); | ||
| 337 | - return {QPDFObject::create<QPDF_Null>()}; | ||
| 338 | - } | ||
| 339 | if (frame->state == st_array) { | 344 | if (frame->state == st_array) { |
| 340 | auto object = frame->null_count > 100 | 345 | auto object = frame->null_count > 100 |
| 341 | ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true) | 346 | ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true) |
| @@ -358,20 +363,11 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -358,20 +363,11 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 358 | warn("unexpected array close token; giving up on reading object"); | 363 | warn("unexpected array close token; giving up on reading object"); |
| 359 | return {QPDFObject::create<QPDF_Null>()}; | 364 | return {QPDFObject::create<QPDF_Null>()}; |
| 360 | } | 365 | } |
| 361 | - warn("treating unexpected array close token as null"); | ||
| 362 | - if (tooManyBadTokens()) { | ||
| 363 | - return {QPDFObject::create<QPDF_Null>()}; | ||
| 364 | - } | ||
| 365 | - addNull(); | 366 | + add_bad_null("treating unexpected array close token as null"); |
| 366 | } | 367 | } |
| 367 | continue; | 368 | continue; |
| 368 | 369 | ||
| 369 | case QPDFTokenizer::tt_dict_close: | 370 | case QPDFTokenizer::tt_dict_close: |
| 370 | - if ((bad_count || sanity_checks) && !max_bad_count) { | ||
| 371 | - // Trigger warning. | ||
| 372 | - (void)tooManyBadTokens(); | ||
| 373 | - return {QPDFObject::create<QPDF_Null>()}; | ||
| 374 | - } | ||
| 375 | if (frame->state <= st_dictionary_value) { | 371 | if (frame->state <= st_dictionary_value) { |
| 376 | // Attempt to recover more or less gracefully from invalid dictionaries. | 372 | // Attempt to recover more or less gracefully from invalid dictionaries. |
| 377 | auto& dict = frame->dict; | 373 | auto& dict = frame->dict; |
| @@ -417,11 +413,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -417,11 +413,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 417 | warn("unexpected dictionary close token; giving up on reading object"); | 413 | warn("unexpected dictionary close token; giving up on reading object"); |
| 418 | return {QPDFObject::create<QPDF_Null>()}; | 414 | return {QPDFObject::create<QPDF_Null>()}; |
| 419 | } | 415 | } |
| 420 | - warn("unexpected dictionary close token"); | ||
| 421 | - if (tooManyBadTokens()) { | ||
| 422 | - return {QPDFObject::create<QPDF_Null>()}; | ||
| 423 | - } | ||
| 424 | - addNull(); | 416 | + add_bad_null("unexpected dictionary close token"); |
| 425 | } | 417 | } |
| 426 | continue; | 418 | continue; |
| 427 | 419 | ||
| @@ -490,18 +482,12 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -490,18 +482,12 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 490 | return {QPDFObject::create<QPDF_Null>()}; | 482 | return {QPDFObject::create<QPDF_Null>()}; |
| 491 | } | 483 | } |
| 492 | 484 | ||
| 493 | - warn("unknown token while reading object; treating as null"); | ||
| 494 | - if (tooManyBadTokens()) { | ||
| 495 | - return {QPDFObject::create<QPDF_Null>()}; | ||
| 496 | - } | ||
| 497 | - addNull(); | 485 | + add_bad_null("unknown token while reading object; treating as null"); |
| 498 | continue; | 486 | continue; |
| 499 | } | 487 | } |
| 500 | 488 | ||
| 501 | warn("unknown token while reading object; treating as string"); | 489 | warn("unknown token while reading object; treating as string"); |
| 502 | - if (tooManyBadTokens()) { | ||
| 503 | - return {QPDFObject::create<QPDF_Null>()}; | ||
| 504 | - } | 490 | + check_too_many_bad_tokens(); |
| 505 | addScalar<QPDF_String>(tokenizer.getValue()); | 491 | addScalar<QPDF_String>(tokenizer.getValue()); |
| 506 | 492 | ||
| 507 | continue; | 493 | continue; |
| @@ -525,11 +511,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -525,11 +511,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 525 | continue; | 511 | continue; |
| 526 | 512 | ||
| 527 | default: | 513 | default: |
| 528 | - warn("treating unknown token type as null while reading object"); | ||
| 529 | - if (tooManyBadTokens()) { | ||
| 530 | - return {QPDFObject::create<QPDF_Null>()}; | ||
| 531 | - } | ||
| 532 | - addNull(); | 514 | + add_bad_null("treating unknown token type as null while reading object"); |
| 533 | } | 515 | } |
| 534 | } | 516 | } |
| 535 | } | 517 | } |
| @@ -568,6 +550,14 @@ QPDFParser::addNull() | @@ -568,6 +550,14 @@ QPDFParser::addNull() | ||
| 568 | } | 550 | } |
| 569 | 551 | ||
| 570 | void | 552 | void |
| 553 | +QPDFParser::add_bad_null(std::string const& msg) | ||
| 554 | +{ | ||
| 555 | + warn(msg); | ||
| 556 | + check_too_many_bad_tokens(); | ||
| 557 | + addNull(); | ||
| 558 | +} | ||
| 559 | + | ||
| 560 | +void | ||
| 571 | QPDFParser::addInt(int count) | 561 | QPDFParser::addInt(int count) |
| 572 | { | 562 | { |
| 573 | auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]); | 563 | auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]); |
| @@ -584,7 +574,8 @@ QPDFParser::addScalar(Args&&... args) | @@ -584,7 +574,8 @@ QPDFParser::addScalar(Args&&... args) | ||
| 584 | // Stop adding scalars. We are going to abort when the close token or a bad token is | 574 | // Stop adding scalars. We are going to abort when the close token or a bad token is |
| 585 | // encountered. | 575 | // encountered. |
| 586 | max_bad_count = 0; | 576 | max_bad_count = 0; |
| 587 | - return; | 577 | + check_too_many_bad_tokens(); |
| 578 | + return; // unreachable | ||
| 588 | } | 579 | } |
| 589 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 580 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
| 590 | obj->setDescription(context, description, input.getLastOffset()); | 581 | obj->setDescription(context, description, input.getLastOffset()); |
| @@ -634,8 +625,8 @@ QPDFParser::fixMissingKeys() | @@ -634,8 +625,8 @@ QPDFParser::fixMissingKeys() | ||
| 634 | } | 625 | } |
| 635 | } | 626 | } |
| 636 | 627 | ||
| 637 | -bool | ||
| 638 | -QPDFParser::tooManyBadTokens() | 628 | +void |
| 629 | +QPDFParser::check_too_many_bad_tokens() | ||
| 639 | { | 630 | { |
| 640 | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); | 631 | auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); |
| 641 | if (frame->olist.size() > limit || frame->dict.size() > limit) { | 632 | if (frame->olist.size() > limit || frame->dict.size() > limit) { |
| @@ -643,7 +634,7 @@ QPDFParser::tooManyBadTokens() | @@ -643,7 +634,7 @@ QPDFParser::tooManyBadTokens() | ||
| 643 | warn( | 634 | warn( |
| 644 | "encountered errors while parsing an array or dictionary with more than " + | 635 | "encountered errors while parsing an array or dictionary with more than " + |
| 645 | std::to_string(limit) + " elements; giving up on reading object"); | 636 | std::to_string(limit) + " elements; giving up on reading object"); |
| 646 | - return true; | 637 | + throw Error(); |
| 647 | } | 638 | } |
| 648 | warn( | 639 | warn( |
| 649 | "encountered an array or dictionary with more than " + std::to_string(limit) + | 640 | "encountered an array or dictionary with more than " + std::to_string(limit) + |
| @@ -652,17 +643,16 @@ QPDFParser::tooManyBadTokens() | @@ -652,17 +643,16 @@ QPDFParser::tooManyBadTokens() | ||
| 652 | if (max_bad_count && --max_bad_count > 0 && good_count > 4) { | 643 | if (max_bad_count && --max_bad_count > 0 && good_count > 4) { |
| 653 | good_count = 0; | 644 | good_count = 0; |
| 654 | bad_count = 1; | 645 | bad_count = 1; |
| 655 | - return false; | 646 | + return; |
| 656 | } | 647 | } |
| 657 | if (++bad_count > 5 || | 648 | if (++bad_count > 5 || |
| 658 | (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) { | 649 | (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) { |
| 659 | // Give up after 5 errors in close proximity or if the number of missing dictionary keys | 650 | // Give up after 5 errors in close proximity or if the number of missing dictionary keys |
| 660 | // exceeds the remaining number of allowable total errors. | 651 | // exceeds the remaining number of allowable total errors. |
| 661 | warn("too many errors; giving up on reading object"); | 652 | warn("too many errors; giving up on reading object"); |
| 662 | - return true; | 653 | + throw Error(); |
| 663 | } | 654 | } |
| 664 | good_count = 0; | 655 | good_count = 0; |
| 665 | - return false; | ||
| 666 | } | 656 | } |
| 667 | 657 | ||
| 668 | void | 658 | void |
libqpdf/qpdf/QPDFParser.hh
| @@ -16,6 +16,13 @@ using namespace qpdf::global; | @@ -16,6 +16,13 @@ using namespace qpdf::global; | ||
| 16 | class QPDFParser | 16 | class QPDFParser |
| 17 | { | 17 | { |
| 18 | public: | 18 | public: |
| 19 | + class Error: public std::exception | ||
| 20 | + { | ||
| 21 | + public: | ||
| 22 | + Error() = default; | ||
| 23 | + virtual ~Error() noexcept = default; | ||
| 24 | + }; | ||
| 25 | + | ||
| 19 | static QPDFObjectHandle | 26 | static QPDFObjectHandle |
| 20 | parse(InputSource& input, std::string const& object_description, QPDF* context); | 27 | parse(InputSource& input, std::string const& object_description, QPDF* context); |
| 21 | 28 | ||
| @@ -106,13 +113,15 @@ class QPDFParser | @@ -106,13 +113,15 @@ class QPDFParser | ||
| 106 | }; | 113 | }; |
| 107 | 114 | ||
| 108 | QPDFObjectHandle parse(bool& empty, bool content_stream); | 115 | QPDFObjectHandle parse(bool& empty, bool content_stream); |
| 116 | + QPDFObjectHandle parse_first(bool& empty, bool content_stream); | ||
| 109 | QPDFObjectHandle parseRemainder(bool content_stream); | 117 | QPDFObjectHandle parseRemainder(bool content_stream); |
| 110 | void add(std::shared_ptr<QPDFObject>&& obj); | 118 | void add(std::shared_ptr<QPDFObject>&& obj); |
| 111 | void addNull(); | 119 | void addNull(); |
| 120 | + void add_bad_null(std::string const& msg); | ||
| 112 | void addInt(int count); | 121 | void addInt(int count); |
| 113 | template <typename T, typename... Args> | 122 | template <typename T, typename... Args> |
| 114 | void addScalar(Args&&... args); | 123 | void addScalar(Args&&... args); |
| 115 | - bool tooManyBadTokens(); | 124 | + void check_too_many_bad_tokens(); |
| 116 | void warnDuplicateKey(); | 125 | void warnDuplicateKey(); |
| 117 | void fixMissingKeys(); | 126 | void fixMissingKeys(); |
| 118 | void warn(qpdf_offset_t offset, std::string const& msg) const; | 127 | void warn(qpdf_offset_t offset, std::string const& msg) const; |
qpdf/qtest/qpdf/issue-150.out
| 1 | WARNING: issue-150.pdf: can't find PDF header | 1 | WARNING: issue-150.pdf: can't find PDF header |
| 2 | +WARNING: issue-150.pdf (xref stream: object 8 0, offset 56): treating object as null because of error during parsing : overflow/underflow converting 9900000000000000000 to 64-bit integer | ||
| 3 | +WARNING: issue-150.pdf (xref stream: object 8 0, offset 75): expected endobj | ||
| 2 | WARNING: issue-150.pdf: file is damaged | 4 | WARNING: issue-150.pdf: file is damaged |
| 3 | -WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer | 5 | +WARNING: issue-150.pdf (offset 4): xref not found |
| 4 | WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table | 6 | WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table |
| 5 | -WARNING: issue-150.pdf (object 8 0): object has offset 0 | ||
| 6 | qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file | 7 | qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file |