diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 38d8b01..6edbbad 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -153,6 +153,23 @@ QPDFParser::parse( QPDFObjectHandle QPDFParser::parse(bool& empty, bool content_stream) { + try { + return parse_first(empty, content_stream); + } catch (Error& e) { + return {QPDFObject::create()}; + } catch (QPDFExc& e) { + throw e; + } catch (std::logic_error& e) { + throw e; + } catch (std::exception& e) { + warn("treating object as null because of error during parsing : "s + e.what()); + return {QPDFObject::create()}; + } +} + +QPDFObjectHandle +QPDFParser::parse_first(bool& empty, bool content_stream) +{ // This method must take care not to resolve any objects. Don't check the type of any object // without first ensuring that it is a direct object. Otherwise, doing so may have the side // effect of reading the object and changing the file pointer. If you do this, it will cause a @@ -161,7 +178,6 @@ QPDFParser::parse(bool& empty, bool content_stream) ParseGuard pg(context); empty = false; start = input.tell(); - if (!tokenizer.nextToken(input, object_description)) { warn(tokenizer.getErrorMessage()); } @@ -315,27 +331,16 @@ QPDFParser::parseRemainder(bool content_stream) return {QPDFObject::create()}; case QPDFTokenizer::tt_bad: - if (tooManyBadTokens()) { - return {QPDFObject::create()}; - } + check_too_many_bad_tokens(); addNull(); continue; case QPDFTokenizer::tt_brace_open: case QPDFTokenizer::tt_brace_close: - warn("treating unexpected brace token as null"); - if (tooManyBadTokens()) { - return {QPDFObject::create()}; - } - addNull(); + add_bad_null("treating unexpected brace token as null"); continue; case QPDFTokenizer::tt_array_close: - if ((bad_count || sanity_checks) && !max_bad_count) { - // Trigger warning. - (void)tooManyBadTokens(); - return {QPDFObject::create()}; - } if (frame->state == st_array) { auto object = frame->null_count > 100 ? QPDFObject::create(std::move(frame->olist), true) @@ -358,20 +363,11 @@ QPDFParser::parseRemainder(bool content_stream) warn("unexpected array close token; giving up on reading object"); return {QPDFObject::create()}; } - warn("treating unexpected array close token as null"); - if (tooManyBadTokens()) { - return {QPDFObject::create()}; - } - addNull(); + add_bad_null("treating unexpected array close token as null"); } continue; case QPDFTokenizer::tt_dict_close: - if ((bad_count || sanity_checks) && !max_bad_count) { - // Trigger warning. - (void)tooManyBadTokens(); - return {QPDFObject::create()}; - } if (frame->state <= st_dictionary_value) { // Attempt to recover more or less gracefully from invalid dictionaries. auto& dict = frame->dict; @@ -417,11 +413,7 @@ QPDFParser::parseRemainder(bool content_stream) warn("unexpected dictionary close token; giving up on reading object"); return {QPDFObject::create()}; } - warn("unexpected dictionary close token"); - if (tooManyBadTokens()) { - return {QPDFObject::create()}; - } - addNull(); + add_bad_null("unexpected dictionary close token"); } continue; @@ -490,18 +482,12 @@ QPDFParser::parseRemainder(bool content_stream) return {QPDFObject::create()}; } - warn("unknown token while reading object; treating as null"); - if (tooManyBadTokens()) { - return {QPDFObject::create()}; - } - addNull(); + add_bad_null("unknown token while reading object; treating as null"); continue; } warn("unknown token while reading object; treating as string"); - if (tooManyBadTokens()) { - return {QPDFObject::create()}; - } + check_too_many_bad_tokens(); addScalar(tokenizer.getValue()); continue; @@ -525,11 +511,7 @@ QPDFParser::parseRemainder(bool content_stream) continue; default: - warn("treating unknown token type as null while reading object"); - if (tooManyBadTokens()) { - return {QPDFObject::create()}; - } - addNull(); + add_bad_null("treating unknown token type as null while reading object"); } } } @@ -568,6 +550,14 @@ QPDFParser::addNull() } void +QPDFParser::add_bad_null(std::string const& msg) +{ + warn(msg); + check_too_many_bad_tokens(); + addNull(); +} + +void QPDFParser::addInt(int count) { auto obj = QPDFObject::create(int_buffer[count % 2]); @@ -584,7 +574,8 @@ QPDFParser::addScalar(Args&&... args) // Stop adding scalars. We are going to abort when the close token or a bad token is // encountered. max_bad_count = 0; - return; + check_too_many_bad_tokens(); + return; // unreachable } auto obj = QPDFObject::create(std::forward(args)...); obj->setDescription(context, description, input.getLastOffset()); @@ -634,8 +625,8 @@ QPDFParser::fixMissingKeys() } } -bool -QPDFParser::tooManyBadTokens() +void +QPDFParser::check_too_many_bad_tokens() { auto limit = Limits::objects_max_container_size(bad_count || sanity_checks); if (frame->olist.size() > limit || frame->dict.size() > limit) { @@ -643,7 +634,7 @@ QPDFParser::tooManyBadTokens() warn( "encountered errors while parsing an array or dictionary with more than " + std::to_string(limit) + " elements; giving up on reading object"); - return true; + throw Error(); } warn( "encountered an array or dictionary with more than " + std::to_string(limit) + @@ -652,17 +643,16 @@ QPDFParser::tooManyBadTokens() if (max_bad_count && --max_bad_count > 0 && good_count > 4) { good_count = 0; bad_count = 1; - return false; + return; } if (++bad_count > 5 || (frame->state != st_array && QIntC::to_size(max_bad_count) < frame->olist.size())) { // Give up after 5 errors in close proximity or if the number of missing dictionary keys // exceeds the remaining number of allowable total errors. warn("too many errors; giving up on reading object"); - return true; + throw Error(); } good_count = 0; - return false; } void diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index 22d24db..195a9a7 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -16,6 +16,13 @@ using namespace qpdf::global; class QPDFParser { public: + class Error: public std::exception + { + public: + Error() = default; + virtual ~Error() noexcept = default; + }; + static QPDFObjectHandle parse(InputSource& input, std::string const& object_description, QPDF* context); @@ -106,13 +113,15 @@ class QPDFParser }; QPDFObjectHandle parse(bool& empty, bool content_stream); + QPDFObjectHandle parse_first(bool& empty, bool content_stream); QPDFObjectHandle parseRemainder(bool content_stream); void add(std::shared_ptr&& obj); void addNull(); + void add_bad_null(std::string const& msg); void addInt(int count); template void addScalar(Args&&... args); - bool tooManyBadTokens(); + void check_too_many_bad_tokens(); void warnDuplicateKey(); void fixMissingKeys(); void warn(qpdf_offset_t offset, std::string const& msg) const; diff --git a/qpdf/qtest/qpdf/issue-150.out b/qpdf/qtest/qpdf/issue-150.out index 9fe8b5a..0dc8338 100644 --- a/qpdf/qtest/qpdf/issue-150.out +++ b/qpdf/qtest/qpdf/issue-150.out @@ -1,6 +1,7 @@ WARNING: issue-150.pdf: can't find PDF header +WARNING: issue-150.pdf (xref stream: object 8 0, offset 56): treating object as null because of error during parsing : overflow/underflow converting 9900000000000000000 to 64-bit integer +WARNING: issue-150.pdf (xref stream: object 8 0, offset 75): expected endobj WARNING: issue-150.pdf: file is damaged -WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer +WARNING: issue-150.pdf (offset 4): xref not found WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table -WARNING: issue-150.pdf (object 8 0): object has offset 0 qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file