Commit 1a1c640aa426d0d4ac8f7a581e548c96ea6e8984

Authored by m-holger
1 parent b841c2d2

Refactor `QPDFParser` error handling by introducing `QPDFParser::Error` class, r…

…eplacing redundant logic with centralized functions, and streamlining bad token handling for improved readability and maintainability.
libqpdf/QPDFParser.cc
@@ -153,6 +153,23 @@ QPDFParser::parse( @@ -153,6 +153,23 @@ QPDFParser::parse(
153 QPDFObjectHandle 153 QPDFObjectHandle
154 QPDFParser::parse(bool& empty, bool content_stream) 154 QPDFParser::parse(bool& empty, bool content_stream)
155 { 155 {
  156 + try {
  157 + return parse_first(empty, content_stream);
  158 + } catch (Error& e) {
  159 + return {QPDFObject::create<QPDF_Null>()};
  160 + } catch (QPDFExc& e) {
  161 + throw e;
  162 + } catch (std::logic_error& e) {
  163 + throw e;
  164 + } catch (std::exception& e) {
  165 + warn("treating object as null because of error during parsing : "s + e.what());
  166 + return {QPDFObject::create<QPDF_Null>()};
  167 + }
  168 +}
  169 +
  170 +QPDFObjectHandle
  171 +QPDFParser::parse_first(bool& empty, bool content_stream)
  172 +{
156 // This method must take care not to resolve any objects. Don't check the type of any object 173 // This method must take care not to resolve any objects. Don't check the type of any object
157 // without first ensuring that it is a direct object. Otherwise, doing so may have the side 174 // without first ensuring that it is a direct object. Otherwise, doing so may have the side
158 // effect of reading the object and changing the file pointer. If you do this, it will cause a 175 // effect of reading the object and changing the file pointer. If you do this, it will cause a
@@ -161,7 +178,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -161,7 +178,6 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
161 ParseGuard pg(context); 178 ParseGuard pg(context);
162 empty = false; 179 empty = false;
163 start = input.tell(); 180 start = input.tell();
164 -  
165 if (!tokenizer.nextToken(input, object_description)) { 181 if (!tokenizer.nextToken(input, object_description)) {
166 warn(tokenizer.getErrorMessage()); 182 warn(tokenizer.getErrorMessage());
167 } 183 }
@@ -315,27 +331,16 @@ QPDFParser::parseRemainder(bool content_stream) @@ -315,27 +331,16 @@ QPDFParser::parseRemainder(bool content_stream)
315 return {QPDFObject::create<QPDF_Null>()}; 331 return {QPDFObject::create<QPDF_Null>()};
316 332
317 case QPDFTokenizer::tt_bad: 333 case QPDFTokenizer::tt_bad:
318 - if (tooManyBadTokens()) {  
319 - return {QPDFObject::create<QPDF_Null>()};  
320 - } 334 + check_too_many_bad_tokens();
321 addNull(); 335 addNull();
322 continue; 336 continue;
323 337
324 case QPDFTokenizer::tt_brace_open: 338 case QPDFTokenizer::tt_brace_open:
325 case QPDFTokenizer::tt_brace_close: 339 case QPDFTokenizer::tt_brace_close:
326 - warn("treating unexpected brace token as null");  
327 - if (tooManyBadTokens()) {  
328 - return {QPDFObject::create<QPDF_Null>()};  
329 - }  
330 - addNull(); 340 + add_bad_null("treating unexpected brace token as null");
331 continue; 341 continue;
332 342
333 case QPDFTokenizer::tt_array_close: 343 case QPDFTokenizer::tt_array_close:
334 - if ((bad_count || sanity_checks) && !max_bad_count) {  
335 - // Trigger warning.  
336 - (void)tooManyBadTokens();  
337 - return {QPDFObject::create<QPDF_Null>()};  
338 - }  
339 if (frame->state == st_array) { 344 if (frame->state == st_array) {
340 auto object = frame->null_count > 100 345 auto object = frame->null_count > 100
341 ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true) 346 ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true)
@@ -358,20 +363,11 @@ QPDFParser::parseRemainder(bool content_stream) @@ -358,20 +363,11 @@ QPDFParser::parseRemainder(bool content_stream)
358 warn("unexpected array close token; giving up on reading object"); 363 warn("unexpected array close token; giving up on reading object");
359 return {QPDFObject::create<QPDF_Null>()}; 364 return {QPDFObject::create<QPDF_Null>()};
360 } 365 }
361 - warn("treating unexpected array close token as null");  
362 - if (tooManyBadTokens()) {  
363 - return {QPDFObject::create<QPDF_Null>()};  
364 - }  
365 - addNull(); 366 + add_bad_null("treating unexpected array close token as null");
366 } 367 }
367 continue; 368 continue;
368 369
369 case QPDFTokenizer::tt_dict_close: 370 case QPDFTokenizer::tt_dict_close:
370 - if ((bad_count || sanity_checks) && !max_bad_count) {  
371 - // Trigger warning.  
372 - (void)tooManyBadTokens();  
373 - return {QPDFObject::create<QPDF_Null>()};  
374 - }  
375 if (frame->state <= st_dictionary_value) { 371 if (frame->state <= st_dictionary_value) {
376 // Attempt to recover more or less gracefully from invalid dictionaries. 372 // Attempt to recover more or less gracefully from invalid dictionaries.
377 auto& dict = frame->dict; 373 auto& dict = frame->dict;
@@ -417,11 +413,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -417,11 +413,7 @@ QPDFParser::parseRemainder(bool content_stream)
417 warn("unexpected dictionary close token; giving up on reading object"); 413 warn("unexpected dictionary close token; giving up on reading object");
418 return {QPDFObject::create<QPDF_Null>()}; 414 return {QPDFObject::create<QPDF_Null>()};
419 } 415 }
420 - warn("unexpected dictionary close token");  
421 - if (tooManyBadTokens()) {  
422 - return {QPDFObject::create<QPDF_Null>()};  
423 - }  
424 - addNull(); 416 + add_bad_null("unexpected dictionary close token");
425 } 417 }
426 continue; 418 continue;
427 419
@@ -490,18 +482,12 @@ QPDFParser::parseRemainder(bool content_stream) @@ -490,18 +482,12 @@ QPDFParser::parseRemainder(bool content_stream)
490 return {QPDFObject::create<QPDF_Null>()}; 482 return {QPDFObject::create<QPDF_Null>()};
491 } 483 }
492 484
493 - warn("unknown token while reading object; treating as null");  
494 - if (tooManyBadTokens()) {  
495 - return {QPDFObject::create<QPDF_Null>()};  
496 - }  
497 - addNull(); 485 + add_bad_null("unknown token while reading object; treating as null");
498 continue; 486 continue;
499 } 487 }
500 488
501 warn("unknown token while reading object; treating as string"); 489 warn("unknown token while reading object; treating as string");
502 - if (tooManyBadTokens()) {  
503 - return {QPDFObject::create<QPDF_Null>()};  
504 - } 490 + check_too_many_bad_tokens();
505 addScalar<QPDF_String>(tokenizer.getValue()); 491 addScalar<QPDF_String>(tokenizer.getValue());
506 492
507 continue; 493 continue;
@@ -525,11 +511,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -525,11 +511,7 @@ QPDFParser::parseRemainder(bool content_stream)
525 continue; 511 continue;
526 512
527 default: 513 default:
528 - warn("treating unknown token type as null while reading object");  
529 - if (tooManyBadTokens()) {  
530 - return {QPDFObject::create<QPDF_Null>()};  
531 - }  
532 - addNull(); 514 + add_bad_null("treating unknown token type as null while reading object");
533 } 515 }
534 } 516 }
535 } 517 }
@@ -568,6 +550,14 @@ QPDFParser::addNull() @@ -568,6 +550,14 @@ QPDFParser::addNull()
568 } 550 }
569 551
570 void 552 void
  553 +QPDFParser::add_bad_null(std::string const& msg)
  554 +{
  555 + warn(msg);
  556 + check_too_many_bad_tokens();
  557 + addNull();
  558 +}
  559 +
  560 +void
571 QPDFParser::addInt(int count) 561 QPDFParser::addInt(int count)
572 { 562 {
573 auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]); 563 auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]);
@@ -584,7 +574,8 @@ QPDFParser::addScalar(Args&amp;&amp;... args) @@ -584,7 +574,8 @@ QPDFParser::addScalar(Args&amp;&amp;... args)
584 // Stop adding scalars. We are going to abort when the close token or a bad token is 574 // Stop adding scalars. We are going to abort when the close token or a bad token is
585 // encountered. 575 // encountered.
586 max_bad_count = 0; 576 max_bad_count = 0;
587 - return; 577 + check_too_many_bad_tokens();
  578 + return; // unreachable
588 } 579 }
589 auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); 580 auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
590 obj->setDescription(context, description, input.getLastOffset()); 581 obj->setDescription(context, description, input.getLastOffset());
@@ -634,8 +625,8 @@ QPDFParser::fixMissingKeys() @@ -634,8 +625,8 @@ QPDFParser::fixMissingKeys()
634 } 625 }
635 } 626 }
636 627
637 -bool  
638 -QPDFParser::tooManyBadTokens() 628 +void
  629 +QPDFParser::check_too_many_bad_tokens()
639 { 630 {
640 auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); 631 auto limit = Limits::objects_max_container_size(bad_count || sanity_checks);
641 if (frame->olist.size() > limit || frame->dict.size() > limit) { 632 if (frame->olist.size() > limit || frame->dict.size() > limit) {
@@ -643,7 +634,7 @@ QPDFParser::tooManyBadTokens() @@ -643,7 +634,7 @@ QPDFParser::tooManyBadTokens()
643 warn( 634 warn(
644 "encountered errors while parsing an array or dictionary with more than " + 635 "encountered errors while parsing an array or dictionary with more than " +
645 std::to_string(limit) + " elements; giving up on reading object"); 636 std::to_string(limit) + " elements; giving up on reading object");
646 - return true; 637 + throw Error();
647 } 638 }
648 warn( 639 warn(
649 "encountered an array or dictionary with more than " + std::to_string(limit) + 640 "encountered an array or dictionary with more than " + std::to_string(limit) +
@@ -652,17 +643,16 @@ QPDFParser::tooManyBadTokens() @@ -652,17 +643,16 @@ QPDFParser::tooManyBadTokens()
652 if (max_bad_count && --max_bad_count > 0 && good_count > 4) { 643 if (max_bad_count && --max_bad_count > 0 && good_count > 4) {
653 good_count = 0; 644 good_count = 0;
654 bad_count = 1; 645 bad_count = 1;
655 - return false; 646 + return;
656 } 647 }
657 if (++bad_count > 5 || 648 if (++bad_count > 5 ||
658 (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) { 649 (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) {
659 // Give up after 5 errors in close proximity or if the number of missing dictionary keys 650 // Give up after 5 errors in close proximity or if the number of missing dictionary keys
660 // exceeds the remaining number of allowable total errors. 651 // exceeds the remaining number of allowable total errors.
661 warn("too many errors; giving up on reading object"); 652 warn("too many errors; giving up on reading object");
662 - return true; 653 + throw Error();
663 } 654 }
664 good_count = 0; 655 good_count = 0;
665 - return false;  
666 } 656 }
667 657
668 void 658 void
libqpdf/qpdf/QPDFParser.hh
@@ -16,6 +16,13 @@ using namespace qpdf::global; @@ -16,6 +16,13 @@ using namespace qpdf::global;
16 class QPDFParser 16 class QPDFParser
17 { 17 {
18 public: 18 public:
  19 + class Error: public std::exception
  20 + {
  21 + public:
  22 + Error() = default;
  23 + virtual ~Error() noexcept = default;
  24 + };
  25 +
19 static QPDFObjectHandle 26 static QPDFObjectHandle
20 parse(InputSource& input, std::string const& object_description, QPDF* context); 27 parse(InputSource& input, std::string const& object_description, QPDF* context);
21 28
@@ -106,13 +113,15 @@ class QPDFParser @@ -106,13 +113,15 @@ class QPDFParser
106 }; 113 };
107 114
108 QPDFObjectHandle parse(bool& empty, bool content_stream); 115 QPDFObjectHandle parse(bool& empty, bool content_stream);
  116 + QPDFObjectHandle parse_first(bool& empty, bool content_stream);
109 QPDFObjectHandle parseRemainder(bool content_stream); 117 QPDFObjectHandle parseRemainder(bool content_stream);
110 void add(std::shared_ptr<QPDFObject>&& obj); 118 void add(std::shared_ptr<QPDFObject>&& obj);
111 void addNull(); 119 void addNull();
  120 + void add_bad_null(std::string const& msg);
112 void addInt(int count); 121 void addInt(int count);
113 template <typename T, typename... Args> 122 template <typename T, typename... Args>
114 void addScalar(Args&&... args); 123 void addScalar(Args&&... args);
115 - bool tooManyBadTokens(); 124 + void check_too_many_bad_tokens();
116 void warnDuplicateKey(); 125 void warnDuplicateKey();
117 void fixMissingKeys(); 126 void fixMissingKeys();
118 void warn(qpdf_offset_t offset, std::string const& msg) const; 127 void warn(qpdf_offset_t offset, std::string const& msg) const;
qpdf/qtest/qpdf/issue-150.out
1 WARNING: issue-150.pdf: can't find PDF header 1 WARNING: issue-150.pdf: can't find PDF header
  2 +WARNING: issue-150.pdf (xref stream: object 8 0, offset 56): treating object as null because of error during parsing : overflow/underflow converting 9900000000000000000 to 64-bit integer
  3 +WARNING: issue-150.pdf (xref stream: object 8 0, offset 75): expected endobj
2 WARNING: issue-150.pdf: file is damaged 4 WARNING: issue-150.pdf: file is damaged
3 -WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer 5 +WARNING: issue-150.pdf (offset 4): xref not found
4 WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table 6 WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table
5 -WARNING: issue-150.pdf (object 8 0): object has offset 0  
6 qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file 7 qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file