From 2b94c75535b5af1c27276f343005af0219fbe90a Mon Sep 17 00:00:00 2001 From: m-holger Date: Wed, 23 Apr 2025 16:10:52 +0100 Subject: [PATCH] During xref reconstruction reject unreasonably large objects --- fuzz/CMakeLists.txt | 1 + fuzz/qpdf_extra/5109284021272576.fuzz | Bin 0 -> 506766 bytes fuzz/qtest/fuzz.test | 2 +- libqpdf/QPDFParser.cc | 26 ++++++++++++++++++-------- libqpdf/QPDF_objects.cc | 15 ++++++++++++--- libqpdf/qpdf/QPDFParser.hh | 10 +++++++--- libqpdf/qpdf/QPDF_private.hh | 1 + 7 files changed, 40 insertions(+), 15 deletions(-) create mode 100644 fuzz/qpdf_extra/5109284021272576.fuzz diff --git a/fuzz/CMakeLists.txt b/fuzz/CMakeLists.txt index 37c8f48..7377833 100644 --- a/fuzz/CMakeLists.txt +++ b/fuzz/CMakeLists.txt @@ -158,6 +158,7 @@ set(CORPUS_OTHER 398060137.fuzz 409905355.fuzz 411312393.fuzz + 5109284021272576.fuzz ) set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus) diff --git a/fuzz/qpdf_extra/5109284021272576.fuzz b/fuzz/qpdf_extra/5109284021272576.fuzz new file mode 100644 index 0000000..42dfa26 Binary files /dev/null and b/fuzz/qpdf_extra/5109284021272576.fuzz differ diff --git a/fuzz/qtest/fuzz.test b/fuzz/qtest/fuzz.test index f29a798..fe7a043 100644 --- a/fuzz/qtest/fuzz.test +++ b/fuzz/qtest/fuzz.test @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz'); my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; -my $n_qpdf_files = 95; # increment when adding new files +my $n_qpdf_files = 96; # increment when adding new files my @fuzzers = ( ['ascii85' => 1], diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 91334b1..1690f39 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -71,7 +71,8 @@ QPDFParser::parse( std::string const& object_description, qpdf::Tokenizer& tokenizer, QPDFObjectHandle::StringDecrypter* decrypter, - QPDF& context) + QPDF& context, + bool sanity_checks) { bool empty{false}; auto result = QPDFParser( @@ -81,7 +82,10 @@ QPDFParser::parse( tokenizer, decrypter, &context, - true) + true, + 0, + 0, + sanity_checks) .parse(empty, false); return {result, empty}; } @@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream) continue; case QPDFTokenizer::tt_array_close: - if (bad_count && !max_bad_count) { + if ((bad_count || sanity_checks) && !max_bad_count) { // Trigger warning. (void)tooManyBadTokens(); return {QPDFObject::create()}; @@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream) continue; case QPDFTokenizer::tt_dict_close: - if (bad_count && !max_bad_count) { + if ((bad_count || sanity_checks) && !max_bad_count) { // Trigger warning. (void)tooManyBadTokens(); return {QPDFObject::create()}; @@ -514,7 +518,8 @@ template void QPDFParser::addScalar(Args&&... args) { - if (bad_count && (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { + if ((bad_count || sanity_checks) && + (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { // Stop adding scalars. We are going to abort when the close token or a bad token is // encountered. max_bad_count = 0; @@ -572,10 +577,15 @@ bool QPDFParser::tooManyBadTokens() { if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { + if (bad_count) { + warn( + "encountered errors while parsing an array or dictionary with more than 5000 " + "elements; giving up on reading object"); + return true; + } warn( - "encountered errors while parsing an array or dictionary with more than 5000 " - "elements; giving up on reading object"); - return true; + "encountered an array or dictionary with more than 5000 elements during xref recovery; " + "giving up on reading object"); } if (--max_bad_count > 0 && good_count > 4) { good_count = 0; diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index 888b4df..33ba3ac 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) }; m->reconstructed_xref = true; + m->in_xref_reconstruction = true; // We may find more objects, which may contain dangling references. m->fixed_dangling_refs = false; @@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) throw damagedPDF("", -1, "unable to find any pages while recovering damaged file"); } } + + m->in_xref_reconstruction = false; // We could iterate through the objects looking for streams and try to find objects inside of // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything @@ -1154,7 +1157,8 @@ QPDFObjectHandle QPDF::readTrailer() { qpdf_offset_t offset = m->file->tell(); - auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this); + auto [object, empty] = QPDFParser::parse( + *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. @@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) StringDecrypter decrypter{this, og}; StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; - auto [object, empty] = - QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this); + auto [object, empty] = QPDFParser::parse( + *m->file, + m->last_object_description, + m->tokenizer, + decrypter_ptr, + *this, + m->in_xref_reconstruction); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index fa0de4a..bd8d6dd 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -36,7 +36,8 @@ class QPDFParser std::string const& object_description, qpdf::Tokenizer& tokenizer, QPDFObjectHandle::StringDecrypter* decrypter, - QPDF& context); + QPDF& context, + bool sanity_checks); static std::pair parse( qpdf::is::OffsetBuffer& input, @@ -63,7 +64,8 @@ class QPDFParser QPDF* context, bool parse_pdf, int stream_id = 0, - int obj_id = 0) : + int obj_id = 0, + bool sanity_checks = false) : input(input), object_description(object_description), tokenizer(tokenizer), @@ -72,7 +74,8 @@ class QPDFParser description(std::move(sp_description)), parse_pdf(parse_pdf), stream_id(stream_id), - obj_id(obj_id) + obj_id(obj_id), + sanity_checks(sanity_checks) { } @@ -125,6 +128,7 @@ class QPDFParser bool parse_pdf{false}; int stream_id{0}; int obj_id{0}; + bool sanity_checks{false}; std::vector stack; StackFrame* frame{nullptr}; diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index eae4199..ca315f5 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -490,6 +490,7 @@ class QPDF::Members // copied_stream_data_provider is owned by copied_streams CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; bool reconstructed_xref{false}; + bool in_xref_reconstruction{false}; bool fixed_dangling_refs{false}; bool immediate_copy_from{false}; bool in_parse{false}; -- libgit2 0.21.4