Commit f4e468b18081d93e57ea14a670135666fed6a484
Committed by
GitHub
Merge pull request #1239 from m-holger/fuzz
Add further sanity check to QPDF::reconstruct_xref
Showing
7 changed files
with
22 additions
and
25 deletions
fuzz/dct_fuzzer_seed_corpus/25297ce5437bb882fbbd5073334d2eb64fb9c40b
0 → 100644
No preview for this file type
fuzz/dct_fuzzer_seed_corpus/f48621948bc5b8c7debabe2fbec04cad56927e12
0 → 100644
No preview for this file type
fuzz/qtest/fuzz.test
| @@ -13,7 +13,7 @@ my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; | @@ -13,7 +13,7 @@ my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; | ||
| 13 | 13 | ||
| 14 | my @fuzzers = ( | 14 | my @fuzzers = ( |
| 15 | ['ascii85' => 1], | 15 | ['ascii85' => 1], |
| 16 | - ['dct' => 2], | 16 | + ['dct' => 4], |
| 17 | ['flate' => 1], | 17 | ['flate' => 1], |
| 18 | ['hex' => 1], | 18 | ['hex' => 1], |
| 19 | ['json' => 40], | 19 | ['json' => 40], |
libqpdf/Pl_DCT.cc
| @@ -335,10 +335,11 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b) | @@ -335,10 +335,11 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b) | ||
| 335 | (void)jpeg_calc_output_dimensions(cinfo); | 335 | (void)jpeg_calc_output_dimensions(cinfo); |
| 336 | unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components); | 336 | unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components); |
| 337 | if (memory_limit > 0 && | 337 | if (memory_limit > 0 && |
| 338 | - width > (static_cast<unsigned long>(memory_limit) / (2U * cinfo->output_height))) { | ||
| 339 | - // Even if jpeglib does not run out of memory, qpdf will while buffering thye data before | ||
| 340 | - // writing it. | ||
| 341 | - throw std::runtime_error("Pl_DCT::decompress: JPEG data exceeds memory limit"); | 338 | + width > (static_cast<unsigned long>(memory_limit) / (20U * cinfo->output_height))) { |
| 339 | + // Even if jpeglib does not run out of memory, qpdf will while buffering the data before | ||
| 340 | + // writing it. Furthermore, for very large images runtime can be significant before the | ||
| 341 | + // first warning is encountered causing a timeout in oss-fuzz. | ||
| 342 | + throw std::runtime_error("Pl_DCT::decompress: JPEG data large - may be too slow"); | ||
| 342 | } | 343 | } |
| 343 | JSAMPARRAY buffer = | 344 | JSAMPARRAY buffer = |
| 344 | (*cinfo->mem->alloc_sarray)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1); | 345 | (*cinfo->mem->alloc_sarray)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1); |
libqpdf/QPDF.cc
| @@ -543,6 +543,10 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -543,6 +543,10 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 543 | throw e; | 543 | throw e; |
| 544 | } | 544 | } |
| 545 | 545 | ||
| 546 | + // If recovery generates more than 1000 warnings, the file is so severely damaged that there | ||
| 547 | + // probably is no point trying to continue. | ||
| 548 | + const auto max_warnings = m->warnings.size() + 1000U; | ||
| 549 | + | ||
| 546 | m->reconstructed_xref = true; | 550 | m->reconstructed_xref = true; |
| 547 | // We may find more objects, which may contain dangling references. | 551 | // We may find more objects, which may contain dangling references. |
| 548 | m->fixed_dangling_refs = false; | 552 | m->fixed_dangling_refs = false; |
| @@ -596,6 +600,9 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -596,6 +600,9 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 596 | setTrailer(t); | 600 | setTrailer(t); |
| 597 | } | 601 | } |
| 598 | } | 602 | } |
| 603 | + if (m->warnings.size() > max_warnings) { | ||
| 604 | + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table"); | ||
| 605 | + } | ||
| 599 | m->file->seek(next_line_start, SEEK_SET); | 606 | m->file->seek(next_line_start, SEEK_SET); |
| 600 | line_start = next_line_start; | 607 | line_start = next_line_start; |
| 601 | } | 608 | } |
| @@ -622,6 +629,10 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -622,6 +629,10 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 622 | max_offset = offset; | 629 | max_offset = offset; |
| 623 | setTrailer(oh.getDict()); | 630 | setTrailer(oh.getDict()); |
| 624 | } | 631 | } |
| 632 | + if (m->warnings.size() > max_warnings) { | ||
| 633 | + throw damagedPDF( | ||
| 634 | + "", 0, "too many errors while reconstructing cross-reference table"); | ||
| 635 | + } | ||
| 625 | } | 636 | } |
| 626 | if (max_offset > 0) { | 637 | if (max_offset > 0) { |
| 627 | try { | 638 | try { |
| @@ -646,7 +657,9 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -646,7 +657,9 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 646 | // creating QPDF objects from JSON. | 657 | // creating QPDF objects from JSON. |
| 647 | throw damagedPDF("", 0, "unable to find objects while recovering damaged file"); | 658 | throw damagedPDF("", 0, "unable to find objects while recovering damaged file"); |
| 648 | } | 659 | } |
| 649 | - | 660 | + if (m->warnings.size() > max_warnings) { |
| 661 | + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table"); | ||
| 662 | + } | ||
| 650 | // We could iterate through the objects looking for streams and try to find objects inside of | 663 | // We could iterate through the objects looking for streams and try to find objects inside of |
| 651 | // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors | 664 | // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors |
| 652 | // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything | 665 | // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything |
libqpdf/QPDF_pages.cc
| @@ -99,7 +99,7 @@ QPDF::getAllPagesInternal( | @@ -99,7 +99,7 @@ QPDF::getAllPagesInternal( | ||
| 99 | for (int i = 0; i < n; ++i) { | 99 | for (int i = 0; i < n; ++i) { |
| 100 | auto kid = kids.getArrayItem(i); | 100 | auto kid = kids.getArrayItem(i); |
| 101 | if (!kid.isDictionary()) { | 101 | if (!kid.isDictionary()) { |
| 102 | - kid.warnIfPossible("Pages tree includes non-dictionary object; removing"); | 102 | + kid.warnIfPossible("Pages tree includes non-dictionary object; ignoring"); |
| 103 | continue; | 103 | continue; |
| 104 | } | 104 | } |
| 105 | if (kid.hasKey("/Kids")) { | 105 | if (kid.hasKey("/Kids")) { |
qpdf/qtest/qpdf/issue-335a.out
| @@ -1003,21 +1003,4 @@ WARNING: issue-335a.pdf (trailer, offset 20601): unexpected ) | @@ -1003,21 +1003,4 @@ WARNING: issue-335a.pdf (trailer, offset 20601): unexpected ) | ||
| 1003 | WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string | 1003 | WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string |
| 1004 | WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring | 1004 | WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring |
| 1005 | WARNING: issue-335a.pdf (trailer, offset 20604): too many errors; giving up on reading object | 1005 | WARNING: issue-335a.pdf (trailer, offset 20604): too many errors; giving up on reading object |
| 1006 | -WARNING: issue-335a.pdf (trailer, offset 20446): unknown token while reading object; treating as string | ||
| 1007 | -WARNING: issue-335a.pdf (trailer, offset 20601): unexpected ) | ||
| 1008 | -WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string | ||
| 1009 | -WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring | ||
| 1010 | -WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null | ||
| 1011 | -WARNING: issue-335a.pdf (trailer, offset 20607): treating unexpected brace token as null | ||
| 1012 | -WARNING: issue-335a.pdf (trailer, offset 20607): too many errors; giving up on reading object | ||
| 1013 | -WARNING: issue-335a.pdf (trailer, offset 20598): unknown token while reading object; treating as string | ||
| 1014 | -WARNING: issue-335a.pdf (trailer, offset 20600): unexpected ) | ||
| 1015 | -WARNING: issue-335a.pdf (trailer, offset 20601): unexpected ) | ||
| 1016 | -WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string | ||
| 1017 | -WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring | ||
| 1018 | -WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null | ||
| 1019 | -WARNING: issue-335a.pdf (trailer, offset 20606): too many errors; giving up on reading object | ||
| 1020 | -WARNING: issue-335a.pdf (trailer, offset 20684): unknown token while reading object; treating as string | ||
| 1021 | -WARNING: issue-335a.pdf (trailer, offset 20683): expected dictionary key but found non-name object; inserting key /QPDFFake1 | ||
| 1022 | -WARNING: issue-335a.pdf (trailer, offset 20747): stream keyword found in trailer | ||
| 1023 | -qpdf: issue-335a.pdf: unable to find /Root dictionary | 1006 | +qpdf: issue-335a.pdf: too many errors while reconstructing cross-reference table |