Commit 2b94c75535b5af1c27276f343005af0219fbe90a
1 parent
8ef5cfad
During xref reconstruction reject unreasonably large objects
Reject objects containing arrays or dictionaries with more than 5000 elements. We are by definition dealing with damaged files, and such objects are extremely likely to be invalid or malicious.
Showing
7 changed files
with
40 additions
and
15 deletions
fuzz/CMakeLists.txt
fuzz/qpdf_extra/5109284021272576.fuzz
0 → 100644
No preview for this file type
fuzz/qtest/fuzz.test
| ... | ... | @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz'); |
| 11 | 11 | |
| 12 | 12 | my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; |
| 13 | 13 | |
| 14 | -my $n_qpdf_files = 95; # increment when adding new files | |
| 14 | +my $n_qpdf_files = 96; # increment when adding new files | |
| 15 | 15 | |
| 16 | 16 | my @fuzzers = ( |
| 17 | 17 | ['ascii85' => 1], | ... | ... |
libqpdf/QPDFParser.cc
| ... | ... | @@ -71,7 +71,8 @@ QPDFParser::parse( |
| 71 | 71 | std::string const& object_description, |
| 72 | 72 | qpdf::Tokenizer& tokenizer, |
| 73 | 73 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 74 | - QPDF& context) | |
| 74 | + QPDF& context, | |
| 75 | + bool sanity_checks) | |
| 75 | 76 | { |
| 76 | 77 | bool empty{false}; |
| 77 | 78 | auto result = QPDFParser( |
| ... | ... | @@ -81,7 +82,10 @@ QPDFParser::parse( |
| 81 | 82 | tokenizer, |
| 82 | 83 | decrypter, |
| 83 | 84 | &context, |
| 84 | - true) | |
| 85 | + true, | |
| 86 | + 0, | |
| 87 | + 0, | |
| 88 | + sanity_checks) | |
| 85 | 89 | .parse(empty, false); |
| 86 | 90 | return {result, empty}; |
| 87 | 91 | } |
| ... | ... | @@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 298 | 302 | continue; |
| 299 | 303 | |
| 300 | 304 | case QPDFTokenizer::tt_array_close: |
| 301 | - if (bad_count && !max_bad_count) { | |
| 305 | + if ((bad_count || sanity_checks) && !max_bad_count) { | |
| 302 | 306 | // Trigger warning. |
| 303 | 307 | (void)tooManyBadTokens(); |
| 304 | 308 | return {QPDFObject::create<QPDF_Null>()}; |
| ... | ... | @@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream) |
| 329 | 333 | continue; |
| 330 | 334 | |
| 331 | 335 | case QPDFTokenizer::tt_dict_close: |
| 332 | - if (bad_count && !max_bad_count) { | |
| 336 | + if ((bad_count || sanity_checks) && !max_bad_count) { | |
| 333 | 337 | // Trigger warning. |
| 334 | 338 | (void)tooManyBadTokens(); |
| 335 | 339 | return {QPDFObject::create<QPDF_Null>()}; |
| ... | ... | @@ -514,7 +518,8 @@ template <typename T, typename... Args> |
| 514 | 518 | void |
| 515 | 519 | QPDFParser::addScalar(Args&&... args) |
| 516 | 520 | { |
| 517 | - if (bad_count && (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | |
| 521 | + if ((bad_count || sanity_checks) && | |
| 522 | + (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { | |
| 518 | 523 | // Stop adding scalars. We are going to abort when the close token or a bad token is |
| 519 | 524 | // encountered. |
| 520 | 525 | max_bad_count = 0; |
| ... | ... | @@ -572,10 +577,15 @@ bool |
| 572 | 577 | QPDFParser::tooManyBadTokens() |
| 573 | 578 | { |
| 574 | 579 | if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { |
| 580 | + if (bad_count) { | |
| 581 | + warn( | |
| 582 | + "encountered errors while parsing an array or dictionary with more than 5000 " | |
| 583 | + "elements; giving up on reading object"); | |
| 584 | + return true; | |
| 585 | + } | |
| 575 | 586 | warn( |
| 576 | - "encountered errors while parsing an array or dictionary with more than 5000 " | |
| 577 | - "elements; giving up on reading object"); | |
| 578 | - return true; | |
| 587 | + "encountered an array or dictionary with more than 5000 elements during xref recovery; " | |
| 588 | + "giving up on reading object"); | |
| 579 | 589 | } |
| 580 | 590 | if (--max_bad_count > 0 && good_count > 4) { |
| 581 | 591 | good_count = 0; | ... | ... |
libqpdf/QPDF_objects.cc
| ... | ... | @@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 200 | 200 | }; |
| 201 | 201 | |
| 202 | 202 | m->reconstructed_xref = true; |
| 203 | + m->in_xref_reconstruction = true; | |
| 203 | 204 | // We may find more objects, which may contain dangling references. |
| 204 | 205 | m->fixed_dangling_refs = false; |
| 205 | 206 | |
| ... | ... | @@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 377 | 378 | throw damagedPDF("", -1, "unable to find any pages while recovering damaged file"); |
| 378 | 379 | } |
| 379 | 380 | } |
| 381 | + | |
| 382 | + m->in_xref_reconstruction = false; | |
| 380 | 383 | // We could iterate through the objects looking for streams and try to find objects inside of |
| 381 | 384 | // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors |
| 382 | 385 | // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything |
| ... | ... | @@ -1154,7 +1157,8 @@ QPDFObjectHandle |
| 1154 | 1157 | QPDF::readTrailer() |
| 1155 | 1158 | { |
| 1156 | 1159 | qpdf_offset_t offset = m->file->tell(); |
| 1157 | - auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this); | |
| 1160 | + auto [object, empty] = QPDFParser::parse( | |
| 1161 | + *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction); | |
| 1158 | 1162 | if (empty) { |
| 1159 | 1163 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1160 | 1164 | // actual PDF files and Adobe Reader appears to ignore them. |
| ... | ... | @@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) |
| 1175 | 1179 | |
| 1176 | 1180 | StringDecrypter decrypter{this, og}; |
| 1177 | 1181 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; |
| 1178 | - auto [object, empty] = | |
| 1179 | - QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this); | |
| 1182 | + auto [object, empty] = QPDFParser::parse( | |
| 1183 | + *m->file, | |
| 1184 | + m->last_object_description, | |
| 1185 | + m->tokenizer, | |
| 1186 | + decrypter_ptr, | |
| 1187 | + *this, | |
| 1188 | + m->in_xref_reconstruction); | |
| 1180 | 1189 | if (empty) { |
| 1181 | 1190 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1182 | 1191 | // actual PDF files and Adobe Reader appears to ignore them. | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -36,7 +36,8 @@ class QPDFParser |
| 36 | 36 | std::string const& object_description, |
| 37 | 37 | qpdf::Tokenizer& tokenizer, |
| 38 | 38 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 39 | - QPDF& context); | |
| 39 | + QPDF& context, | |
| 40 | + bool sanity_checks); | |
| 40 | 41 | |
| 41 | 42 | static std::pair<QPDFObjectHandle, bool> parse( |
| 42 | 43 | qpdf::is::OffsetBuffer& input, |
| ... | ... | @@ -63,7 +64,8 @@ class QPDFParser |
| 63 | 64 | QPDF* context, |
| 64 | 65 | bool parse_pdf, |
| 65 | 66 | int stream_id = 0, |
| 66 | - int obj_id = 0) : | |
| 67 | + int obj_id = 0, | |
| 68 | + bool sanity_checks = false) : | |
| 67 | 69 | input(input), |
| 68 | 70 | object_description(object_description), |
| 69 | 71 | tokenizer(tokenizer), |
| ... | ... | @@ -72,7 +74,8 @@ class QPDFParser |
| 72 | 74 | description(std::move(sp_description)), |
| 73 | 75 | parse_pdf(parse_pdf), |
| 74 | 76 | stream_id(stream_id), |
| 75 | - obj_id(obj_id) | |
| 77 | + obj_id(obj_id), | |
| 78 | + sanity_checks(sanity_checks) | |
| 76 | 79 | { |
| 77 | 80 | } |
| 78 | 81 | |
| ... | ... | @@ -125,6 +128,7 @@ class QPDFParser |
| 125 | 128 | bool parse_pdf{false}; |
| 126 | 129 | int stream_id{0}; |
| 127 | 130 | int obj_id{0}; |
| 131 | + bool sanity_checks{false}; | |
| 128 | 132 | |
| 129 | 133 | std::vector<StackFrame> stack; |
| 130 | 134 | StackFrame* frame{nullptr}; | ... | ... |
libqpdf/qpdf/QPDF_private.hh
| ... | ... | @@ -490,6 +490,7 @@ class QPDF::Members |
| 490 | 490 | // copied_stream_data_provider is owned by copied_streams |
| 491 | 491 | CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; |
| 492 | 492 | bool reconstructed_xref{false}; |
| 493 | + bool in_xref_reconstruction{false}; | |
| 493 | 494 | bool fixed_dangling_refs{false}; |
| 494 | 495 | bool immediate_copy_from{false}; |
| 495 | 496 | bool in_parse{false}; | ... | ... |