Merge pull request #1236 from m-holger/fuzz

Add additional xref reconstruction sanity checks and fuzz test cases

Merge pull request #1236 from m-holger/fuzz
Add additional xref reconstruction sanity checks and fuzz test cases
m-holger · GitHub
2 parents 9ffa2014 2b6500ea
Showing 7 changed files with 24 additions and 14 deletions
fuzz/CMakeLists.txt
fuzz/dct_fuzzer_seed_corpus/e0b87af81384c81c7f5c3d71dfe525daeddc1d19
fuzz/qpdf_extra/69977a.fuzz
fuzz/qtest/fuzz.test
libqpdf/Pl_DCT.cc
libqpdf/QPDF.cc
qpdf/qtest/qpdf/issue-147.out
@@ -123,6 +123,7 @@ set(CORPUS_OTHER
   69913.fuzz
   69969.fuzz
   69977.fuzz
+  69977a.fuzz
   70055.fuzz
 )
@@ -13,7 +13,7 @@ my $qpdf_corpus = $ENV{&#39;QPDF_FUZZ_CORPUS&#39;} || die &quot;must set QPDF_FUZZ_CORPUS&quot;;
 my @fuzzers = (
     ['ascii85' => 1],
-    ['dct' => 1],
+    ['dct' => 2],
     ['flate' => 1],
     ['hex' => 1],
     ['json' => 40],
@@ -21,7 +21,7 @@ my @fuzzers = (
     ['pngpredictor' => 1],
     ['runlength' => 6],
     ['tiffpredictor' => 2],
-    ['qpdf' => 66],             # increment when adding new files
+    ['qpdf' => 67],             # increment when adding new files
     );
 my $n_tests = 0;
@@ -35,6 +35,16 @@ error_handler(j_common_ptr cinfo)
     longjmp(jerr->jmpbuf, 1);
 }
+static void
+emit_message(j_common_ptr cinfo, int msg_level)
+{
+    if (msg_level == -1) {
+        auto* jerr = reinterpret_cast<qpdf_jpeg_error_mgr*>(cinfo->err);
+        jerr->msg = "Pl_DCT::decompress: JPEG data is corrupt";
+        longjmp(jerr->jmpbuf, 1);
+    }
+}
+
 Pl_DCT::Members::Members() :
     action(a_decompress),
     buf("DCT compressed image")
@@ -116,6 +126,9 @@ Pl_DCT::finish()
     cinfo_compress.err = jpeg_std_error(&(jerr.pub));
     cinfo_decompress.err = jpeg_std_error(&(jerr.pub));
     jerr.pub.error_exit = error_handler;
+    if (m->action == a_decompress && throw_on_corrupt_data) {
+        jerr.pub.emit_message = emit_message;
+    }
     bool error = false;
     // The jpeg library is a "C" library, so we use setjmp and longjmp for exception handling.
@@ -319,11 +332,6 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b)
     jpeg_buffer_src(cinfo, b);
     (void)jpeg_read_header(cinfo, TRUE);
-    if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) {
-        // err->num_warnings is the number of corrupt data warnings emitted.
-        // err->msg_code could also be the code of an informational message.
-        throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt");
-    }
     (void)jpeg_calc_output_dimensions(cinfo);
     unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components);
     if (memory_limit > 0 &&
@@ -336,14 +344,10 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b)
         (*cinfo->mem->alloc_sarray)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1);
     (void)jpeg_start_decompress(cinfo);
-    while (cinfo->output_scanline < cinfo->output_height &&
-           (!throw_on_corrupt_data || cinfo->err->num_warnings == 0)) {
+    while (cinfo->output_scanline < cinfo->output_height) {
         (void)jpeg_read_scanlines(cinfo, buffer, 1);
         getNext()->write(buffer[0], width * sizeof(buffer[0][0]));
     }
     (void)jpeg_finish_decompress(cinfo);
-    if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) {
-        throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt");
-    }
     getNext()->finish();
 }
@@ -334,7 +334,7 @@ QPDF::setSuppressWarnings(bool val)
 void
 QPDF::setMaxWarnings(int val)
 {
-    m->suppress_warnings = val;
+    m->max_warnings = val;
 }
 void
@@ -641,6 +641,11 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
         throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file");
     }
+    if (m->xref_table.empty()) {
+        // We cannot check for an empty xref table in parse because empty tables are valid when
+        // creating QPDF objects from JSON.
+        throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
+    }
     // We could iterate through the objects looking for streams and try to find objects inside of
     // them, but it's probably not worth the trouble.  Acrobat can't recover files with any errors
@@ -4,4 +4,4 @@ WARNING: issue-147.pdf: can&#39;t find startxref
 WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
 WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
 WARNING: issue-147.pdf: ignoring object with impossibly large id 62
-qpdf: issue-147.pdf: unable to find /Root dictionary
+qpdf: issue-147.pdf: unable to find objects while recovering damaged file