Commit e2823965faa00a115ddc189576336d0bd7278ad0

Authored by m-holger
1 parent 27e14333

Refactor `limits_error` handling for improved clarity and consistency

- Introduce `limits_error` method in `QPDFParser` for centralized limit-related error handling.
- Enhance warnings and error messages with detailed limit identifiers (e.g., `parser-max-nesting`).
- Refactor limit checks to improve maintainability and ensure uniformity in error reporting.
- Update tests and output to reflect adjusted error handling approach.
libqpdf/QPDFParser.cc
... ... @@ -437,18 +437,16 @@ QPDFParser::parseRemainder(bool content_stream)
437 437 case QPDFTokenizer::tt_array_open:
438 438 case QPDFTokenizer::tt_dict_open:
439 439 if (stack.size() > max_nesting) {
440   - global::Limits::error();
441   - warn("limits error: ignoring excessively deeply nested data structure");
442   - return {};
443   - } else {
444   - b_contents = false;
445   - stack.emplace_back(
446   - input,
447   - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
448   - : st_dictionary_key);
449   - frame = &stack.back();
450   - continue;
  440 + limits_error(
  441 + "parser-max-nesting", "ignoring excessively deeply nested data structure");
451 442 }
  443 + b_contents = false;
  444 + stack.emplace_back(
  445 + input,
  446 + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
  447 + : st_dictionary_key);
  448 + frame = &stack.back();
  449 + continue;
452 450  
453 451 case QPDFTokenizer::tt_bool:
454 452 addScalar<QPDF_Bool>(tokenizer.getValue() == "true");
... ... @@ -588,10 +586,10 @@ void
588 586 QPDFParser::addScalar(Args&&... args)
589 587 {
590 588 auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
591   - if (frame->olist.size() > limit || frame->dict.size() > limit) {
  589 + if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
592 590 // Stop adding scalars. We are going to abort when the close token or a bad token is
593 591 // encountered.
594   - max_bad_count = 0;
  592 + max_bad_count = 1;
595 593 check_too_many_bad_tokens(); // always throws Error()
596 594 }
597 595 auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
... ... @@ -646,26 +644,29 @@ void
646 644 QPDFParser::check_too_many_bad_tokens()
647 645 {
648 646 auto limit = Limits::parser_max_container_size(bad_count || sanity_checks);
649   - if (frame->olist.size() > limit || frame->dict.size() > limit) {
  647 + if (frame->olist.size() >= limit || frame->dict.size() >= limit) {
650 648 if (bad_count) {
651   - Limits::error();
652   - warn(
653   - "limits error: encountered errors while parsing an array or dictionary with more "
654   - "than " +
655   - std::to_string(limit) + " elements; giving up on reading object");
656   - throw Error();
  649 + limits_error(
  650 + "parser-max-container-size-damaged",
  651 + "encountered errors while parsing an array or dictionary with more than " +
  652 + std::to_string(limit) + " elements; giving up on reading object");
657 653 }
658   - warn(
  654 + limits_error(
  655 + "parser-max-container-size",
659 656 "encountered an array or dictionary with more than " + std::to_string(limit) +
660   - " elements during xref recovery; giving up on reading object");
  657 + " elements during xref recovery; giving up on reading object");
661 658 }
662   - if (max_bad_count && --max_bad_count > 0 && good_count > 4) {
  659 + if (max_bad_count && --max_bad_count == 0) {
  660 + limits_error(
  661 + "parser-max-errors", "too many errors during parsing; treating object as null");
  662 + }
  663 + if (good_count > 4) {
663 664 good_count = 0;
664 665 bad_count = 1;
665 666 return;
666 667 }
667 668 if (++bad_count > 5 ||
668   - (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) {
  669 + (frame->state != st_array && std::cmp_less(max_bad_count, frame->olist.size()))) {
669 670 // Give up after 5 errors in close proximity or if the number of missing dictionary keys
670 671 // exceeds the remaining number of allowable total errors.
671 672 warn("too many errors; giving up on reading object");
... ... @@ -675,6 +676,14 @@ QPDFParser::check_too_many_bad_tokens()
675 676 }
676 677  
677 678 void
  679 +QPDFParser::limits_error(std::string const& limit, std::string const& msg)
  680 +{
  681 + Limits::error();
  682 + warn("limits error("s + limit + "): " + msg);
  683 + throw Error();
  684 +}
  685 +
  686 +void
678 687 QPDFParser::warn(QPDFExc const& e) const
679 688 {
680 689 // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -124,6 +124,7 @@ class QPDFParser
124 124 void check_too_many_bad_tokens();
125 125 void warnDuplicateKey();
126 126 void fixMissingKeys();
  127 + [[noreturn]] void limits_error(std::string const& limit, std::string const& msg);
127 128 void warn(qpdf_offset_t offset, std::string const& msg) const;
128 129 void warn(std::string const& msg) const;
129 130 void warn(QPDFExc const&) const;
... ...
qpdf/qtest/qpdf/issue-146.out
1 1 WARNING: issue-146.pdf: file is damaged
2 2 WARNING: issue-146.pdf: can't find startxref
3 3 WARNING: issue-146.pdf: Attempting to reconstruct cross-reference table
4   -WARNING: issue-146.pdf (trailer, offset 695): limits error: ignoring excessively deeply nested data structure
  4 +WARNING: issue-146.pdf (trailer, offset 695): limits error(parser-max-nesting): ignoring excessively deeply nested data structure
5 5 WARNING: issue-146.pdf (object 1 0, offset 92): expected endobj
6 6 WARNING: issue-146.pdf (object 7 0, offset 146): unknown token while reading object; treating as null
7 7 WARNING: issue-146.pdf (object 7 0, offset 168): expected endobj
... ...
qpdf/qtest/qpdf/issue-202.out
1   -WARNING: issue-202.pdf (trailer, offset 55770): limits error: ignoring excessively deeply nested data structure
  1 +WARNING: issue-202.pdf (trailer, offset 55770): limits error(parser-max-nesting): ignoring excessively deeply nested data structure
2 2 WARNING: issue-202.pdf: file is damaged
3 3 WARNING: issue-202.pdf (offset 54769): expected trailer dictionary
4 4 WARNING: issue-202.pdf: Attempting to reconstruct cross-reference table
5   -WARNING: issue-202.pdf (trailer, offset 55770): limits error: ignoring excessively deeply nested data structure
  5 +WARNING: issue-202.pdf (trailer, offset 55770): limits error(parser-max-nesting): ignoring excessively deeply nested data structure
6 6 WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Creator; last occurrence overrides earlier ones
7 7 WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Producer; last occurrence overrides earlier ones
8 8 WARNING: issue-202.pdf: unable to find trailer dictionary while recovering damaged file
... ...