Commit ce2deaf1850f11501016b62f5a9a124c0ab30ced

Authored by m-holger
Committed by GitHub
2 parents 7d478651 f0ded6bc

Merge pull request #1230 from m-holger/clean-dct-fuzz-changes

Alternative clean dct fuzz changes
CMakeLists.txt
... ... @@ -131,10 +131,6 @@ if(FUTURE)
131 131 add_compile_definitions(QPDF_FUTURE=1)
132 132 endif()
133 133  
134   -if(OSS_FUZZ)
135   - add_compile_definitions(QPDF_OSS_FUZZ=1)
136   -endif()
137   -
138 134 enable_testing()
139 135 set(RUN_QTEST perl ${qpdf_SOURCE_DIR}/run-qtest ${ENABLE_QTC_ARG})
140 136  
... ...
ChangeLog
  1 +2024-07-04 M Holger <m.holger@qpdf.org>
  2 +
  3 + * Treat corrupt JPEG streams as unfilterable. This avoids them
  4 + getting uncompressed when writing PDF files with decode level all.
  5 +
  6 +2024-07-02 Jay Berkenbilt <ejb@ql.org>
  7 +
  8 + * Add QPDF::setMaxWarnings to set the maximum of warnings before
  9 + warning suppression.
  10 +
  11 + * Add static option to Pl_DCT to limit memory usage of
  12 + decompression. The option is generally exposed but is primarily
  13 + intended to support fuzz tests, which have explicit memory limits
  14 + that are smaller than what is commonly seen in the wild with PDF
  15 + files.
  16 +
  17 + * Add static option to Pl_DCT to control whether decompression of
  18 + corrupt JPEG data is attempted.
  19 +
  20 +2024-07-01 M Holger <m.holger@qpdf.org>
  21 +
  22 + * Bug fix: certain invalid object streams caused the insertion of
  23 + invalid entries into in the xref table.
  24 +
  25 +2024-06-29 M Holger <m.holger@qpdf.org>
  26 +
  27 + * Bug fix: in QPDFOutlineObjectHelper detect loops in the list of
  28 + direct children of an outline item.
  29 +
  30 +2024-06-27 M Holger <m.holger@qpdf.org>
  31 +
  32 + * Add sanity check in QPDF xref table reconstruction to reject
  33 + objects with impossibly large object id in order to improve
  34 + handling of severely damaged PDF files.
  35 +
  36 +2024-06-25 M Holger <m.holger@qpdf.org>
  37 +
  38 + * Detect severely damaged PDF files early. After parsing the xref
  39 + table in QPDF throw a damagedPDF exception if the root of the pages
  40 + tree is not a dictionary.
  41 +
1 42 2024-06-07 Jay Berkenbilt <ejb@ql.org>
2 43  
3 44 * 11.9.1: release
... ...
fuzz/dct_fuzzer.cc
... ... @@ -26,8 +26,18 @@ FuzzHelper::FuzzHelper(unsigned char const* data, size_t size) :
26 26 void
27 27 FuzzHelper::doChecks()
28 28 {
  29 + // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during
  30 + // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before
  31 + // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally
  32 + // occur legitimately and therefore must be allowed during normal operations.
  33 + Pl_DCT::setMemoryLimit(1'000'000'000);
  34 +
  35 + // Do not decompress corrupt data. This may cause extended runtime within jpeglib without
  36 + // exercising additional code paths in qpdf.
  37 + Pl_DCT::setThrowOnCorruptData(true);
  38 +
29 39 Pl_Discard discard;
30   - Pl_DCT p("decode", &discard, 20'000'000);
  40 + Pl_DCT p("decode", &discard);
31 41 p.write(const_cast<unsigned char*>(data), size);
32 42 p.finish();
33 43 }
... ...
fuzz/qpdf_fuzzer.cc
1 1 #include <qpdf/Buffer.hh>
2 2 #include <qpdf/BufferInputSource.hh>
  3 +#include <qpdf/Pl_DCT.hh>
3 4 #include <qpdf/Pl_Discard.hh>
4 5 #include <qpdf/QPDF.hh>
5 6 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
... ... @@ -56,6 +57,7 @@ FuzzHelper::getQpdf()
56 57 auto is =
57 58 std::shared_ptr<InputSource>(new BufferInputSource("fuzz input", &this->input_buffer));
58 59 auto qpdf = QPDF::create();
  60 + qpdf->setMaxWarnings(20);
59 61 qpdf->processInputSource(is);
60 62 return qpdf;
61 63 }
... ... @@ -171,6 +173,16 @@ FuzzHelper::testOutlines()
171 173 void
172 174 FuzzHelper::doChecks()
173 175 {
  176 + // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during
  177 + // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before
  178 + // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally
  179 + // occur legitimately and therefore must be allowed during normal operations.
  180 + Pl_DCT::setMemoryLimit(1'000'000'000);
  181 +
  182 + // Do not decompress corrupt data. This may cause extended runtime within jpeglib without
  183 + // exercising additional code paths in qpdf, and potentially causing counterproductive timeouts.
  184 + Pl_DCT::setThrowOnCorruptData(true);
  185 +
174 186 // Get as much coverage as possible in parts of the library that
175 187 // might benefit from fuzzing.
176 188 std::cerr << "\ninfo: starting testWrite\n";
... ...
include/qpdf/Pl_DCT.hh
... ... @@ -34,20 +34,17 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
34 34 QPDF_DLL
35 35 Pl_DCT(char const* identifier, Pipeline* next);
36 36  
37   - // Constructor for decompressing image data. If corrupt_data_limit is non-zero and the data is
38   - // corrupt, only attempt to uncompress if the uncompressed size is less than corrupt_data_limit.
  37 + // Limit the memory used by jpeglib when decompressing data.
  38 + // NB This is a static option affecting all Pl_DCT instances.
39 39 QPDF_DLL
40   - Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit);
  40 + static void setMemoryLimit(long limit);
41 41  
42   - class QPDF_DLL_CLASS CompressConfig
43   - {
44   - public:
45   - QPDF_DLL
46   - CompressConfig() = default;
47   - QPDF_DLL
48   - virtual ~CompressConfig() = default;
49   - virtual void apply(jpeg_compress_struct*) = 0;
50   - };
  42 + // Treat corrupt data as a runtime error rather than attempting to decompress regardless. This
  43 + // is the qpdf default behaviour. To attempt to decompress corrupt data set 'treat_as_error' to
  44 + // false.
  45 + // NB This is a static option affecting all Pl_DCT instances.
  46 + QPDF_DLL
  47 + static void setThrowOnCorruptData(bool treat_as_error);
51 48  
52 49 // Constructor for compressing image data
53 50 QPDF_DLL
... ... @@ -57,8 +54,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
57 54 JDIMENSION image_width,
58 55 JDIMENSION image_height,
59 56 int components,
60   - J_COLOR_SPACE color_space,
61   - CompressConfig* config_callback = nullptr);
  57 + J_COLOR_SPACE color_space);
62 58  
63 59 QPDF_DLL
64 60 ~Pl_DCT() override;
... ... @@ -83,6 +79,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
83 79 public:
84 80 QPDF_DLL
85 81 ~Members() = default;
  82 + Members(Members const&) = delete;
86 83  
87 84 private:
88 85 // For compression
... ... @@ -90,25 +87,18 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
90 87 JDIMENSION image_width,
91 88 JDIMENSION image_height,
92 89 int components,
93   - J_COLOR_SPACE color_space,
94   - CompressConfig* config_callback);
  90 + J_COLOR_SPACE color_space);
95 91 // For decompression
96   - Members(size_t corrupt_data_limit);
97   - Members(Members const&) = delete;
  92 + Members();
98 93  
99 94 action_e action;
100 95 Pl_Buffer buf;
101 96  
102   - // Used for decompression
103   - size_t corrupt_data_limit{0};
104   -
105 97 // Used for compression
106 98 JDIMENSION image_width{0};
107 99 JDIMENSION image_height{0};
108 100 int components{1};
109 101 J_COLOR_SPACE color_space{JCS_GRAYSCALE};
110   -
111   - CompressConfig* config_callback{nullptr};
112 102 };
113 103  
114 104 std::shared_ptr<Members> m;
... ...
include/qpdf/QPDF.hh
... ... @@ -228,6 +228,10 @@ class QPDF
228 228 QPDF_DLL
229 229 void setSuppressWarnings(bool);
230 230  
  231 + // Set the maximum number of warnings to output. Subsequent warnings are suppressed.
  232 + QPDF_DLL
  233 + void setMaxWarnings(int);
  234 +
231 235 // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If
232 236 // turned off, it will throw an exception on the first such problem it finds without attempting
233 237 // recovery.
... ... @@ -1497,6 +1501,7 @@ class QPDF
1497 1501 bool provided_password_is_hex_key{false};
1498 1502 bool ignore_xref_streams{false};
1499 1503 bool suppress_warnings{false};
  1504 + int max_warnings{0};
1500 1505 bool attempt_recovery{true};
1501 1506 bool check_mode{false};
1502 1507 std::shared_ptr<EncryptionParameters> encp;
... ...
job.sums
1 1 # Generated by generate_auto_job
2   -CMakeLists.txt 456938b9debc4997f142ccfb13f3baf2517ae5855e1fe9b2ada1a0b8f7e4facf
  2 +CMakeLists.txt 47752f33b17fa526d46fc608a25ad6b8c61feba9deb1bd659fddf93e6e08b102
3 3 generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a86
4 4 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
5 5 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
... ...
libqpdf/Pl_DCT.cc
... ... @@ -20,6 +20,9 @@ namespace
20 20 jmp_buf jmpbuf;
21 21 std::string msg;
22 22 };
  23 +
  24 + long memory_limit{0};
  25 + bool throw_on_corrupt_data{true};
23 26 } // namespace
24 27  
25 28 static void
... ... @@ -32,38 +35,39 @@ error_handler(j_common_ptr cinfo)
32 35 longjmp(jerr->jmpbuf, 1);
33 36 }
34 37  
35   -Pl_DCT::Members::Members(size_t corrupt_data_limit) :
  38 +Pl_DCT::Members::Members() :
36 39 action(a_decompress),
37   - buf("DCT compressed image"),
38   - corrupt_data_limit(corrupt_data_limit)
  40 + buf("DCT compressed image")
39 41 {
40 42 }
41 43  
42 44 Pl_DCT::Members::Members(
43   - JDIMENSION image_width,
44   - JDIMENSION image_height,
45   - int components,
46   - J_COLOR_SPACE color_space,
47   - CompressConfig* config_callback) :
  45 + JDIMENSION image_width, JDIMENSION image_height, int components, J_COLOR_SPACE color_space) :
48 46 action(a_compress),
49 47 buf("DCT uncompressed image"),
50 48 image_width(image_width),
51 49 image_height(image_height),
52 50 components(components),
53   - color_space(color_space),
54   - config_callback(config_callback)
  51 + color_space(color_space)
55 52 {
56 53 }
57 54  
58 55 Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next) :
59   - Pl_DCT(identifier, next, 0)
  56 + Pipeline(identifier, next),
  57 + m(new Members())
60 58 {
61 59 }
62 60  
63   -Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit) :
64   - Pipeline(identifier, next),
65   - m(new Members(corrupt_data_limit))
  61 +void
  62 +Pl_DCT::setMemoryLimit(long limit)
66 63 {
  64 + memory_limit = limit;
  65 +}
  66 +
  67 +void
  68 +Pl_DCT::setThrowOnCorruptData(bool treat_as_error)
  69 +{
  70 + throw_on_corrupt_data = treat_as_error;
67 71 }
68 72  
69 73 Pl_DCT::Pl_DCT(
... ... @@ -72,10 +76,9 @@ Pl_DCT::Pl_DCT(
72 76 JDIMENSION image_width,
73 77 JDIMENSION image_height,
74 78 int components,
75   - J_COLOR_SPACE color_space,
76   - CompressConfig* config_callback) :
  79 + J_COLOR_SPACE color_space) :
77 80 Pipeline(identifier, next),
78   - m(new Members(image_width, image_height, components, color_space, config_callback))
  81 + m(new Members(image_width, image_height, components, color_space))
79 82 {
80 83 }
81 84  
... ... @@ -273,9 +276,6 @@ Pl_DCT::compress(void* cinfo_p, Buffer* b)
273 276 cinfo->input_components = m->components;
274 277 cinfo->in_color_space = m->color_space;
275 278 jpeg_set_defaults(cinfo);
276   - if (m->config_callback) {
277   - m->config_callback->apply(cinfo);
278   - }
279 279  
280 280 jpeg_start_compress(cinfo, TRUE);
281 281  
... ... @@ -312,36 +312,32 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b)
312 312 # pragma GCC diagnostic pop
313 313 #endif
314 314  
315   -#ifdef QPDF_OSS_FUZZ
316   - // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during
317   - // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before
318   - // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally
319   - // occur legitimately and therefore must be allowed during normal operations.
320   - cinfo->mem->max_memory_to_use = 1'000'000'000;
321   - // For some corrupt files the memory used internally by libjpeg stays within the above limits
322   - // even though the size written to the next pipeline is significantly larger.
323   - m->corrupt_data_limit = 10'000'000;
324   -#endif
  315 + if (memory_limit > 0) {
  316 + cinfo->mem->max_memory_to_use = memory_limit;
  317 + }
  318 +
325 319 jpeg_buffer_src(cinfo, b);
326 320  
327 321 (void)jpeg_read_header(cinfo, TRUE);
  322 + if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) {
  323 + throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt");
  324 + }
328 325 (void)jpeg_calc_output_dimensions(cinfo);
329 326 unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components);
330   - if (cinfo->err->num_warnings == 0 || m->corrupt_data_limit == 0 ||
331   - (width * QIntC::to_uint(cinfo->output_height)) < m->corrupt_data_limit) {
332   - // err->num_warnings is the number of corrupt data warnings emitted.
333   - // err->msg_code could also be the code of an informational message.
334   - JSAMPARRAY buffer = (*cinfo->mem->alloc_sarray)(
335   - reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1);
336   -
337   - (void)jpeg_start_decompress(cinfo);
338   - while (cinfo->output_scanline < cinfo->output_height) {
339   - (void)jpeg_read_scanlines(cinfo, buffer, 1);
340   - getNext()->write(buffer[0], width * sizeof(buffer[0][0]));
341   - }
342   - (void)jpeg_finish_decompress(cinfo);
343   - } else {
344   - *QPDFLogger::defaultLogger()->getError() << "corrupt JPEG data ignored" << "\n";
  327 + // err->num_warnings is the number of corrupt data warnings emitted.
  328 + // err->msg_code could also be the code of an informational message.
  329 + JSAMPARRAY buffer =
  330 + (*cinfo->mem->alloc_sarray)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1);
  331 +
  332 + (void)jpeg_start_decompress(cinfo);
  333 + while (cinfo->output_scanline < cinfo->output_height &&
  334 + (!throw_on_corrupt_data || cinfo->err->num_warnings == 0)) {
  335 + (void)jpeg_read_scanlines(cinfo, buffer, 1);
  336 + getNext()->write(buffer[0], width * sizeof(buffer[0][0]));
  337 + }
  338 + (void)jpeg_finish_decompress(cinfo);
  339 + if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) {
  340 + throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt");
345 341 }
346 342 getNext()->finish();
347 343 }
... ...
libqpdf/QPDF.cc
... ... @@ -332,6 +332,12 @@ QPDF::setSuppressWarnings(bool val)
332 332 }
333 333  
334 334 void
  335 +QPDF::setMaxWarnings(int val)
  336 +{
  337 + m->suppress_warnings = val;
  338 +}
  339 +
  340 +void
335 341 QPDF::setAttemptRecovery(bool val)
336 342 {
337 343 m->attempt_recovery = val;
... ... @@ -500,13 +506,11 @@ QPDF::warn(QPDFExc const&amp; e)
500 506 {
501 507 m->warnings.push_back(e);
502 508 if (!m->suppress_warnings) {
503   -#ifdef QPDF_OSS_FUZZ
504   - if (m->warnings.size() > 20) {
505   - *m->log->getWarn() << "WARNING: too many warnings - additional warnings surpressed\n";
  509 + if (m->max_warnings > 0 && m->warnings.size() > 20) {
  510 + *m->log->getWarn() << "WARNING: too many warnings - additional warnings suppressed\n";
506 511 m->suppress_warnings = true;
507 512 return;
508 513 }
509   -#endif
510 514 *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
511 515 }
512 516 }
... ... @@ -1934,6 +1938,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1934 1938 continue;
1935 1939 }
1936 1940 if (num == obj_stream_number) {
  1941 + QTC::TC("qpdf", "QPDF ignore self-referential object stream");
1937 1942 warn(damagedPDF(
1938 1943 input,
1939 1944 m->last_object_description,
... ...
libtests/dct_compress.cc
... ... @@ -14,21 +14,6 @@ usage()
14 14 exit(2);
15 15 }
16 16  
17   -class Callback: public Pl_DCT::CompressConfig
18   -{
19   - public:
20   - Callback() = default;
21   - ~Callback() override = default;
22   - void apply(jpeg_compress_struct*) override;
23   - bool called{false};
24   -};
25   -
26   -void
27   -Callback::apply(jpeg_compress_struct*)
28   -{
29   - this->called = true;
30   -}
31   -
32 17 int
33 18 main(int argc, char* argv[])
34 19 {
... ... @@ -66,21 +51,12 @@ main(int argc, char* argv[])
66 51 FILE* outfile = QUtil::safe_fopen(outfilename, "wb");
67 52 Pl_StdioFile out("stdout", outfile);
68 53 unsigned char buf[100];
69   - bool done = false;
70   - Callback callback;
71   - Pl_DCT dct("dct", &out, width, height, components, cs, &callback);
72   - while (!done) {
73   - size_t len = fread(buf, 1, sizeof(buf), infile);
74   - if (len <= 0) {
75   - done = true;
76   - } else {
77   - dct.write(buf, len);
78   - }
  54 + Pl_DCT dct("dct", &out, width, height, components, cs);
  55 + while (size_t len = fread(buf, 1, sizeof(buf), infile)) {
  56 + dct.write(buf, len);
79 57 }
80 58 dct.finish();
81   - if (!callback.called) {
82   - std::cout << "Callback was not called" << std::endl;
83   - }
  59 +
84 60 fclose(infile);
85 61 fclose(outfile);
86 62 return 0;
... ...
qpdf/qpdf.testcov
... ... @@ -4,6 +4,7 @@ QPDF err wrong objid/generation 0
4 4 QPDF check objid 1
5 5 QPDF check generation 1
6 6 QPDF check obj 1
  7 +QPDF ignore self-referential object stream 0
7 8 QPDF hint table length indirect 0
8 9 QPDF hint table length direct 0
9 10 QPDF P absent in lindict 0
... ...
qpdf/qtest/object-stream.test
... ... @@ -16,7 +16,7 @@ cleanup();
16 16  
17 17 my $td = new TestDriver('object-stream');
18 18  
19   -my $n_tests = 7 + (36 * 4) + (12 * 2);
  19 +my $n_tests = 9 + (36 * 4) + (12 * 2);
20 20 my $n_compare_pdfs = 36;
21 21  
22 22 for (my $n = 16; $n <= 19; ++$n)
... ... @@ -107,5 +107,16 @@ $td-&gt;runtest(&quot;check file&quot;,
107 107 {$td->FILE => "a.pdf"},
108 108 {$td->FILE => "recover-xref-stream-recovered.pdf"});
109 109  
  110 +# Self-referential object stream
  111 +$td->runtest("self-referential object stream",
  112 + {$td->COMMAND => "qpdf --static-id --qdf" .
  113 + " object-stream-self-ref.pdf a.pdf"},
  114 + {$td->FILE => "object-stream-self-ref.out", $td->EXIT_STATUS => 3},
  115 + $td->NORMALIZE_NEWLINES);
  116 +$td->runtest("check file",
  117 + {$td->FILE => "a.pdf"},
  118 + {$td->FILE => "object-stream-self-ref.out.pdf"});
  119 +
  120 +
110 121 cleanup();
111 122 $td->report(calc_ntests($n_tests, $n_compare_pdfs));
... ...
qpdf/qtest/qpdf/object-stream-self-ref.out 0 โ†’ 100644
  1 +WARNING: object-stream-self-ref.pdf object stream 1 (object 1 0, offset 2): object stream claims to contain itself
  2 +qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/object-stream-self-ref.out.pdf 0 โ†’ 100644
No preview for this file type
qpdf/qtest/qpdf/object-stream-self-ref.pdf 0 โ†’ 100644
No preview for this file type