Commit 6e580e2453988e170f0cc41942bfa22877c44d1f
Committed by
GitHub
Merge pull request #1444 from m-holger/fuzz
During xref reconstruction reject unreasonably large objects
Showing
7 changed files
with
40 additions
and
15 deletions
fuzz/CMakeLists.txt
fuzz/qpdf_extra/5109284021272576.fuzz
0 → 100644
No preview for this file type
fuzz/qtest/fuzz.test
| ... | ... | @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz'); |
| 11 | 11 | |
| 12 | 12 | my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; |
| 13 | 13 | |
| 14 | -my $n_qpdf_files = 95; # increment when adding new files | |
| 14 | +my $n_qpdf_files = 96; # increment when adding new files | |
| 15 | 15 | |
| 16 | 16 | my @fuzzers = ( |
| 17 | 17 | ['ascii85' => 1], | ... | ... |
libqpdf/QPDFParser.cc
| ... | ... | @@ -71,7 +71,8 @@ QPDFParser::parse( |
| 71 | 71 | std::string const& object_description, |
| 72 | 72 | qpdf::Tokenizer& tokenizer, |
| 73 | 73 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 74 | - QPDF& context) | |
| 74 | + QPDF& context, | |
| 75 | + bool sanity_checks) | |
| 75 | 76 | { |
| 76 | 77 | bool empty{false}; |
| 77 | 78 | auto result = QPDFParser( |
| ... | ... | @@ -81,7 +82,10 @@ QPDFParser::parse( |
| 81 | 82 | tokenizer, |
| 82 | 83 | decrypter, |
| 83 | 84 | &context, |
| 84 | - true) | |
| 85 | + true, | |
| 86 | + 0, | |
| 87 | + 0, | |
| 88 | + sanity_checks) | |
| 85 | 89 | .parse(empty, false); |
| 86 | 90 | return {result, empty}; |
| 87 | 91 | } |
| ... | ... | @@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 298 | 302 | continue; |
| 299 | 303 | |
| 300 | 304 | case QPDFTokenizer::tt_array_close: |
| 301 | - if (bad_count && !max_bad_count) { | |
| 305 | + if ((bad_count || sanity_checks) && !max_bad_count) { | |
| 302 | 306 | // Trigger warning. |
| 303 | 307 | (void)tooManyBadTokens(); |
| 304 | 308 | return {QPDFObject::create<QPDF_Null>()}; |
| ... | ... | @@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 329 | 333 | continue; |
| 330 | 334 | |
| 331 | 335 | case QPDFTokenizer::tt_dict_close: |
| 332 | - if (bad_count && !max_bad_count) { | |
| 336 | + if ((bad_count || sanity_checks) && !max_bad_count) { | |
| 333 | 337 | // Trigger warning. |
| 334 | 338 | (void)tooManyBadTokens(); |
| 335 | 339 | return {QPDFObject::create<QPDF_Null>()}; |
| ... | ... | @@ -514,7 +518,8 @@ template <typename T, typename... Args> |
| 514 | 518 | void |
| 515 | 519 | QPDFParser::addScalar(Args&&... args) |
| 516 | 520 | { |
| 517 | - if (bad_count && (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | |
| 521 | + if ((bad_count || sanity_checks) && | |
| 522 | + (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | |
| 518 | 523 | // Stop adding scalars. We are going to abort when the close token or a bad token is |
| 519 | 524 | // encountered. |
| 520 | 525 | max_bad_count = 0; |
| ... | ... | @@ -572,10 +577,15 @@ bool |
| 572 | 577 | QPDFParser::tooManyBadTokens() |
| 573 | 578 | { |
| 574 | 579 | if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { |
| 580 | + if (bad_count) { | |
| 581 | + warn( | |
| 582 | + "encountered errors while parsing an array or dictionary with more than 5000 " | |
| 583 | + "elements; giving up on reading object"); | |
| 584 | + return true; | |
| 585 | + } | |
| 575 | 586 | warn( |
| 576 | - "encountered errors while parsing an array or dictionary with more than 5000 " | |
| 577 | - "elements; giving up on reading object"); | |
| 578 | - return true; | |
| 587 | + "encountered an array or dictionary with more than 5000 elements during xref recovery; " | |
| 588 | + "giving up on reading object"); | |
| 579 | 589 | } |
| 580 | 590 | if (--max_bad_count > 0 && good_count > 4) { |
| 581 | 591 | good_count = 0; | ... | ... |
libqpdf/QPDF_objects.cc
| ... | ... | @@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 200 | 200 | }; |
| 201 | 201 | |
| 202 | 202 | m->reconstructed_xref = true; |
| 203 | + m->in_xref_reconstruction = true; | |
| 203 | 204 | // We may find more objects, which may contain dangling references. |
| 204 | 205 | m->fixed_dangling_refs = false; |
| 205 | 206 | |
| ... | ... | @@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 377 | 378 | throw damagedPDF("", -1, "unable to find any pages while recovering damaged file"); |
| 378 | 379 | } |
| 379 | 380 | } |
| 381 | + | |
| 382 | + m->in_xref_reconstruction = false; | |
| 380 | 383 | // We could iterate through the objects looking for streams and try to find objects inside of |
| 381 | 384 | // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors |
| 382 | 385 | // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything |
| ... | ... | @@ -1154,7 +1157,8 @@ QPDFObjectHandle |
| 1154 | 1157 | QPDF::readTrailer() |
| 1155 | 1158 | { |
| 1156 | 1159 | qpdf_offset_t offset = m->file->tell(); |
| 1157 | - auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this); | |
| 1160 | + auto [object, empty] = QPDFParser::parse( | |
| 1161 | + *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction); | |
| 1158 | 1162 | if (empty) { |
| 1159 | 1163 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1160 | 1164 | // actual PDF files and Adobe Reader appears to ignore them. |
| ... | ... | @@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) |
| 1175 | 1179 | |
| 1176 | 1180 | StringDecrypter decrypter{this, og}; |
| 1177 | 1181 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; |
| 1178 | - auto [object, empty] = | |
| 1179 | - QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this); | |
| 1182 | + auto [object, empty] = QPDFParser::parse( | |
| 1183 | + *m->file, | |
| 1184 | + m->last_object_description, | |
| 1185 | + m->tokenizer, | |
| 1186 | + decrypter_ptr, | |
| 1187 | + *this, | |
| 1188 | + m->in_xref_reconstruction); | |
| 1180 | 1189 | if (empty) { |
| 1181 | 1190 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1182 | 1191 | // actual PDF files and Adobe Reader appears to ignore them. | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -36,7 +36,8 @@ class QPDFParser |
| 36 | 36 | std::string const& object_description, |
| 37 | 37 | qpdf::Tokenizer& tokenizer, |
| 38 | 38 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 39 | - QPDF& context); | |
| 39 | + QPDF& context, | |
| 40 | + bool sanity_checks); | |
| 40 | 41 | |
| 41 | 42 | static std::pair<QPDFObjectHandle, bool> parse( |
| 42 | 43 | qpdf::is::OffsetBuffer& input, |
| ... | ... | @@ -63,7 +64,8 @@ class QPDFParser |
| 63 | 64 | QPDF* context, |
| 64 | 65 | bool parse_pdf, |
| 65 | 66 | int stream_id = 0, |
| 66 | - int obj_id = 0) : | |
| 67 | + int obj_id = 0, | |
| 68 | + bool sanity_checks = false) : | |
| 67 | 69 | input(input), |
| 68 | 70 | object_description(object_description), |
| 69 | 71 | tokenizer(tokenizer), |
| ... | ... | @@ -72,7 +74,8 @@ class QPDFParser |
| 72 | 74 | description(std::move(sp_description)), |
| 73 | 75 | parse_pdf(parse_pdf), |
| 74 | 76 | stream_id(stream_id), |
| 75 | - obj_id(obj_id) | |
| 77 | + obj_id(obj_id), | |
| 78 | + sanity_checks(sanity_checks) | |
| 76 | 79 | { |
| 77 | 80 | } |
| 78 | 81 | |
| ... | ... | @@ -125,6 +128,7 @@ class QPDFParser |
| 125 | 128 | bool parse_pdf{false}; |
| 126 | 129 | int stream_id{0}; |
| 127 | 130 | int obj_id{0}; |
| 131 | + bool sanity_checks{false}; | |
| 128 | 132 | |
| 129 | 133 | std::vector<StackFrame> stack; |
| 130 | 134 | StackFrame* frame{nullptr}; | ... | ... |
libqpdf/qpdf/QPDF_private.hh
| ... | ... | @@ -490,6 +490,7 @@ class QPDF::Members |
| 490 | 490 | // copied_stream_data_provider is owned by copied_streams |
| 491 | 491 | CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; |
| 492 | 492 | bool reconstructed_xref{false}; |
| 493 | + bool in_xref_reconstruction{false}; | |
| 493 | 494 | bool fixed_dangling_refs{false}; |
| 494 | 495 | bool immediate_copy_from{false}; |
| 495 | 496 | bool in_parse{false}; | ... | ... |