Commit 6e580e2453988e170f0cc41942bfa22877c44d1f
Committed by
GitHub
Merge pull request #1444 from m-holger/fuzz
During xref reconstruction reject unreasonably large objects
Showing
7 changed files
with
40 additions
and
15 deletions
fuzz/CMakeLists.txt
| @@ -158,6 +158,7 @@ set(CORPUS_OTHER | @@ -158,6 +158,7 @@ set(CORPUS_OTHER | ||
| 158 | 398060137.fuzz | 158 | 398060137.fuzz |
| 159 | 409905355.fuzz | 159 | 409905355.fuzz |
| 160 | 411312393.fuzz | 160 | 411312393.fuzz |
| 161 | + 5109284021272576.fuzz | ||
| 161 | ) | 162 | ) |
| 162 | 163 | ||
| 163 | set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus) | 164 | set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus) |
fuzz/qpdf_extra/5109284021272576.fuzz
0 → 100644
No preview for this file type
fuzz/qtest/fuzz.test
| @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz'); | @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz'); | ||
| 11 | 11 | ||
| 12 | my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; | 12 | my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; |
| 13 | 13 | ||
| 14 | -my $n_qpdf_files = 95; # increment when adding new files | 14 | +my $n_qpdf_files = 96; # increment when adding new files |
| 15 | 15 | ||
| 16 | my @fuzzers = ( | 16 | my @fuzzers = ( |
| 17 | ['ascii85' => 1], | 17 | ['ascii85' => 1], |
libqpdf/QPDFParser.cc
| @@ -71,7 +71,8 @@ QPDFParser::parse( | @@ -71,7 +71,8 @@ QPDFParser::parse( | ||
| 71 | std::string const& object_description, | 71 | std::string const& object_description, |
| 72 | qpdf::Tokenizer& tokenizer, | 72 | qpdf::Tokenizer& tokenizer, |
| 73 | QPDFObjectHandle::StringDecrypter* decrypter, | 73 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 74 | - QPDF& context) | 74 | + QPDF& context, |
| 75 | + bool sanity_checks) | ||
| 75 | { | 76 | { |
| 76 | bool empty{false}; | 77 | bool empty{false}; |
| 77 | auto result = QPDFParser( | 78 | auto result = QPDFParser( |
| @@ -81,7 +82,10 @@ QPDFParser::parse( | @@ -81,7 +82,10 @@ QPDFParser::parse( | ||
| 81 | tokenizer, | 82 | tokenizer, |
| 82 | decrypter, | 83 | decrypter, |
| 83 | &context, | 84 | &context, |
| 84 | - true) | 85 | + true, |
| 86 | + 0, | ||
| 87 | + 0, | ||
| 88 | + sanity_checks) | ||
| 85 | .parse(empty, false); | 89 | .parse(empty, false); |
| 86 | return {result, empty}; | 90 | return {result, empty}; |
| 87 | } | 91 | } |
| @@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 298 | continue; | 302 | continue; |
| 299 | 303 | ||
| 300 | case QPDFTokenizer::tt_array_close: | 304 | case QPDFTokenizer::tt_array_close: |
| 301 | - if (bad_count && !max_bad_count) { | 305 | + if ((bad_count || sanity_checks) && !max_bad_count) { |
| 302 | // Trigger warning. | 306 | // Trigger warning. |
| 303 | (void)tooManyBadTokens(); | 307 | (void)tooManyBadTokens(); |
| 304 | return {QPDFObject::create<QPDF_Null>()}; | 308 | return {QPDFObject::create<QPDF_Null>()}; |
| @@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 329 | continue; | 333 | continue; |
| 330 | 334 | ||
| 331 | case QPDFTokenizer::tt_dict_close: | 335 | case QPDFTokenizer::tt_dict_close: |
| 332 | - if (bad_count && !max_bad_count) { | 336 | + if ((bad_count || sanity_checks) && !max_bad_count) { |
| 333 | // Trigger warning. | 337 | // Trigger warning. |
| 334 | (void)tooManyBadTokens(); | 338 | (void)tooManyBadTokens(); |
| 335 | return {QPDFObject::create<QPDF_Null>()}; | 339 | return {QPDFObject::create<QPDF_Null>()}; |
| @@ -514,7 +518,8 @@ template <typename T, typename... Args> | @@ -514,7 +518,8 @@ template <typename T, typename... Args> | ||
| 514 | void | 518 | void |
| 515 | QPDFParser::addScalar(Args&&... args) | 519 | QPDFParser::addScalar(Args&&... args) |
| 516 | { | 520 | { |
| 517 | - if (bad_count && (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | 521 | + if ((bad_count || sanity_checks) && |
| 522 | + (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | ||
| 518 | // Stop adding scalars. We are going to abort when the close token or a bad token is | 523 | // Stop adding scalars. We are going to abort when the close token or a bad token is |
| 519 | // encountered. | 524 | // encountered. |
| 520 | max_bad_count = 0; | 525 | max_bad_count = 0; |
| @@ -572,10 +577,15 @@ bool | @@ -572,10 +577,15 @@ bool | ||
| 572 | QPDFParser::tooManyBadTokens() | 577 | QPDFParser::tooManyBadTokens() |
| 573 | { | 578 | { |
| 574 | if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { | 579 | if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { |
| 580 | + if (bad_count) { | ||
| 581 | + warn( | ||
| 582 | + "encountered errors while parsing an array or dictionary with more than 5000 " | ||
| 583 | + "elements; giving up on reading object"); | ||
| 584 | + return true; | ||
| 585 | + } | ||
| 575 | warn( | 586 | warn( |
| 576 | - "encountered errors while parsing an array or dictionary with more than 5000 " | ||
| 577 | - "elements; giving up on reading object"); | ||
| 578 | - return true; | 587 | + "encountered an array or dictionary with more than 5000 elements during xref recovery; " |
| 588 | + "giving up on reading object"); | ||
| 579 | } | 589 | } |
| 580 | if (--max_bad_count > 0 && good_count > 4) { | 590 | if (--max_bad_count > 0 && good_count > 4) { |
| 581 | good_count = 0; | 591 | good_count = 0; |
libqpdf/QPDF_objects.cc
| @@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) | @@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) | ||
| 200 | }; | 200 | }; |
| 201 | 201 | ||
| 202 | m->reconstructed_xref = true; | 202 | m->reconstructed_xref = true; |
| 203 | + m->in_xref_reconstruction = true; | ||
| 203 | // We may find more objects, which may contain dangling references. | 204 | // We may find more objects, which may contain dangling references. |
| 204 | m->fixed_dangling_refs = false; | 205 | m->fixed_dangling_refs = false; |
| 205 | 206 | ||
| @@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) | @@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) | ||
| 377 | throw damagedPDF("", -1, "unable to find any pages while recovering damaged file"); | 378 | throw damagedPDF("", -1, "unable to find any pages while recovering damaged file"); |
| 378 | } | 379 | } |
| 379 | } | 380 | } |
| 381 | + | ||
| 382 | + m->in_xref_reconstruction = false; | ||
| 380 | // We could iterate through the objects looking for streams and try to find objects inside of | 383 | // We could iterate through the objects looking for streams and try to find objects inside of |
| 381 | // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors | 384 | // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors |
| 382 | // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything | 385 | // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything |
| @@ -1154,7 +1157,8 @@ QPDFObjectHandle | @@ -1154,7 +1157,8 @@ QPDFObjectHandle | ||
| 1154 | QPDF::readTrailer() | 1157 | QPDF::readTrailer() |
| 1155 | { | 1158 | { |
| 1156 | qpdf_offset_t offset = m->file->tell(); | 1159 | qpdf_offset_t offset = m->file->tell(); |
| 1157 | - auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this); | 1160 | + auto [object, empty] = QPDFParser::parse( |
| 1161 | + *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction); | ||
| 1158 | if (empty) { | 1162 | if (empty) { |
| 1159 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in | 1163 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1160 | // actual PDF files and Adobe Reader appears to ignore them. | 1164 | // actual PDF files and Adobe Reader appears to ignore them. |
| @@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) | @@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) | ||
| 1175 | 1179 | ||
| 1176 | StringDecrypter decrypter{this, og}; | 1180 | StringDecrypter decrypter{this, og}; |
| 1177 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; | 1181 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; |
| 1178 | - auto [object, empty] = | ||
| 1179 | - QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this); | 1182 | + auto [object, empty] = QPDFParser::parse( |
| 1183 | + *m->file, | ||
| 1184 | + m->last_object_description, | ||
| 1185 | + m->tokenizer, | ||
| 1186 | + decrypter_ptr, | ||
| 1187 | + *this, | ||
| 1188 | + m->in_xref_reconstruction); | ||
| 1180 | if (empty) { | 1189 | if (empty) { |
| 1181 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in | 1190 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1182 | // actual PDF files and Adobe Reader appears to ignore them. | 1191 | // actual PDF files and Adobe Reader appears to ignore them. |
libqpdf/qpdf/QPDFParser.hh
| @@ -36,7 +36,8 @@ class QPDFParser | @@ -36,7 +36,8 @@ class QPDFParser | ||
| 36 | std::string const& object_description, | 36 | std::string const& object_description, |
| 37 | qpdf::Tokenizer& tokenizer, | 37 | qpdf::Tokenizer& tokenizer, |
| 38 | QPDFObjectHandle::StringDecrypter* decrypter, | 38 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 39 | - QPDF& context); | 39 | + QPDF& context, |
| 40 | + bool sanity_checks); | ||
| 40 | 41 | ||
| 41 | static std::pair<QPDFObjectHandle, bool> parse( | 42 | static std::pair<QPDFObjectHandle, bool> parse( |
| 42 | qpdf::is::OffsetBuffer& input, | 43 | qpdf::is::OffsetBuffer& input, |
| @@ -63,7 +64,8 @@ class QPDFParser | @@ -63,7 +64,8 @@ class QPDFParser | ||
| 63 | QPDF* context, | 64 | QPDF* context, |
| 64 | bool parse_pdf, | 65 | bool parse_pdf, |
| 65 | int stream_id = 0, | 66 | int stream_id = 0, |
| 66 | - int obj_id = 0) : | 67 | + int obj_id = 0, |
| 68 | + bool sanity_checks = false) : | ||
| 67 | input(input), | 69 | input(input), |
| 68 | object_description(object_description), | 70 | object_description(object_description), |
| 69 | tokenizer(tokenizer), | 71 | tokenizer(tokenizer), |
| @@ -72,7 +74,8 @@ class QPDFParser | @@ -72,7 +74,8 @@ class QPDFParser | ||
| 72 | description(std::move(sp_description)), | 74 | description(std::move(sp_description)), |
| 73 | parse_pdf(parse_pdf), | 75 | parse_pdf(parse_pdf), |
| 74 | stream_id(stream_id), | 76 | stream_id(stream_id), |
| 75 | - obj_id(obj_id) | 77 | + obj_id(obj_id), |
| 78 | + sanity_checks(sanity_checks) | ||
| 76 | { | 79 | { |
| 77 | } | 80 | } |
| 78 | 81 | ||
| @@ -125,6 +128,7 @@ class QPDFParser | @@ -125,6 +128,7 @@ class QPDFParser | ||
| 125 | bool parse_pdf{false}; | 128 | bool parse_pdf{false}; |
| 126 | int stream_id{0}; | 129 | int stream_id{0}; |
| 127 | int obj_id{0}; | 130 | int obj_id{0}; |
| 131 | + bool sanity_checks{false}; | ||
| 128 | 132 | ||
| 129 | std::vector<StackFrame> stack; | 133 | std::vector<StackFrame> stack; |
| 130 | StackFrame* frame{nullptr}; | 134 | StackFrame* frame{nullptr}; |
libqpdf/qpdf/QPDF_private.hh
| @@ -490,6 +490,7 @@ class QPDF::Members | @@ -490,6 +490,7 @@ class QPDF::Members | ||
| 490 | // copied_stream_data_provider is owned by copied_streams | 490 | // copied_stream_data_provider is owned by copied_streams |
| 491 | CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; | 491 | CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; |
| 492 | bool reconstructed_xref{false}; | 492 | bool reconstructed_xref{false}; |
| 493 | + bool in_xref_reconstruction{false}; | ||
| 493 | bool fixed_dangling_refs{false}; | 494 | bool fixed_dangling_refs{false}; |
| 494 | bool immediate_copy_from{false}; | 495 | bool immediate_copy_from{false}; |
| 495 | bool in_parse{false}; | 496 | bool in_parse{false}; |