Commit 6e580e2453988e170f0cc41942bfa22877c44d1f

Authored by m-holger
Committed by GitHub
2 parents 8ef5cfad 2b94c755

Merge pull request #1444 from m-holger/fuzz

During xref reconstruction reject unreasonably large objects
fuzz/CMakeLists.txt
@@ -158,6 +158,7 @@ set(CORPUS_OTHER @@ -158,6 +158,7 @@ set(CORPUS_OTHER
158 398060137.fuzz 158 398060137.fuzz
159 409905355.fuzz 159 409905355.fuzz
160 411312393.fuzz 160 411312393.fuzz
  161 + 5109284021272576.fuzz
161 ) 162 )
162 163
163 set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus) 164 set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus)
fuzz/qpdf_extra/5109284021272576.fuzz 0 → 100644
No preview for this file type
fuzz/qtest/fuzz.test
@@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz'); @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz');
11 11
12 my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; 12 my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS";
13 13
14 -my $n_qpdf_files = 95; # increment when adding new files 14 +my $n_qpdf_files = 96; # increment when adding new files
15 15
16 my @fuzzers = ( 16 my @fuzzers = (
17 ['ascii85' => 1], 17 ['ascii85' => 1],
libqpdf/QPDFParser.cc
@@ -71,7 +71,8 @@ QPDFParser::parse( @@ -71,7 +71,8 @@ QPDFParser::parse(
71 std::string const& object_description, 71 std::string const& object_description,
72 qpdf::Tokenizer& tokenizer, 72 qpdf::Tokenizer& tokenizer,
73 QPDFObjectHandle::StringDecrypter* decrypter, 73 QPDFObjectHandle::StringDecrypter* decrypter,
74 - QPDF& context) 74 + QPDF& context,
  75 + bool sanity_checks)
75 { 76 {
76 bool empty{false}; 77 bool empty{false};
77 auto result = QPDFParser( 78 auto result = QPDFParser(
@@ -81,7 +82,10 @@ QPDFParser::parse( @@ -81,7 +82,10 @@ QPDFParser::parse(
81 tokenizer, 82 tokenizer,
82 decrypter, 83 decrypter,
83 &context, 84 &context,
84 - true) 85 + true,
  86 + 0,
  87 + 0,
  88 + sanity_checks)
85 .parse(empty, false); 89 .parse(empty, false);
86 return {result, empty}; 90 return {result, empty};
87 } 91 }
@@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream)
298 continue; 302 continue;
299 303
300 case QPDFTokenizer::tt_array_close: 304 case QPDFTokenizer::tt_array_close:
301 - if (bad_count && !max_bad_count) { 305 + if ((bad_count || sanity_checks) && !max_bad_count) {
302 // Trigger warning. 306 // Trigger warning.
303 (void)tooManyBadTokens(); 307 (void)tooManyBadTokens();
304 return {QPDFObject::create<QPDF_Null>()}; 308 return {QPDFObject::create<QPDF_Null>()};
@@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream)
329 continue; 333 continue;
330 334
331 case QPDFTokenizer::tt_dict_close: 335 case QPDFTokenizer::tt_dict_close:
332 - if (bad_count && !max_bad_count) { 336 + if ((bad_count || sanity_checks) && !max_bad_count) {
333 // Trigger warning. 337 // Trigger warning.
334 (void)tooManyBadTokens(); 338 (void)tooManyBadTokens();
335 return {QPDFObject::create<QPDF_Null>()}; 339 return {QPDFObject::create<QPDF_Null>()};
@@ -514,7 +518,8 @@ template &lt;typename T, typename... Args&gt; @@ -514,7 +518,8 @@ template &lt;typename T, typename... Args&gt;
514 void 518 void
515 QPDFParser::addScalar(Args&&... args) 519 QPDFParser::addScalar(Args&&... args)
516 { 520 {
517 - if (bad_count && (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { 521 + if ((bad_count || sanity_checks) &&
  522 + (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
518 // Stop adding scalars. We are going to abort when the close token or a bad token is 523 // Stop adding scalars. We are going to abort when the close token or a bad token is
519 // encountered. 524 // encountered.
520 max_bad_count = 0; 525 max_bad_count = 0;
@@ -572,10 +577,15 @@ bool @@ -572,10 +577,15 @@ bool
572 QPDFParser::tooManyBadTokens() 577 QPDFParser::tooManyBadTokens()
573 { 578 {
574 if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { 579 if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) {
  580 + if (bad_count) {
  581 + warn(
  582 + "encountered errors while parsing an array or dictionary with more than 5000 "
  583 + "elements; giving up on reading object");
  584 + return true;
  585 + }
575 warn( 586 warn(
576 - "encountered errors while parsing an array or dictionary with more than 5000 "  
577 - "elements; giving up on reading object");  
578 - return true; 587 + "encountered an array or dictionary with more than 5000 elements during xref recovery; "
  588 + "giving up on reading object");
579 } 589 }
580 if (--max_bad_count > 0 && good_count > 4) { 590 if (--max_bad_count > 0 && good_count > 4) {
581 good_count = 0; 591 good_count = 0;
libqpdf/QPDF_objects.cc
@@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
200 }; 200 };
201 201
202 m->reconstructed_xref = true; 202 m->reconstructed_xref = true;
  203 + m->in_xref_reconstruction = true;
203 // We may find more objects, which may contain dangling references. 204 // We may find more objects, which may contain dangling references.
204 m->fixed_dangling_refs = false; 205 m->fixed_dangling_refs = false;
205 206
@@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
377 throw damagedPDF("", -1, "unable to find any pages while recovering damaged file"); 378 throw damagedPDF("", -1, "unable to find any pages while recovering damaged file");
378 } 379 }
379 } 380 }
  381 +
  382 + m->in_xref_reconstruction = false;
380 // We could iterate through the objects looking for streams and try to find objects inside of 383 // We could iterate through the objects looking for streams and try to find objects inside of
381 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors 384 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
382 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything 385 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
@@ -1154,7 +1157,8 @@ QPDFObjectHandle @@ -1154,7 +1157,8 @@ QPDFObjectHandle
1154 QPDF::readTrailer() 1157 QPDF::readTrailer()
1155 { 1158 {
1156 qpdf_offset_t offset = m->file->tell(); 1159 qpdf_offset_t offset = m->file->tell();
1157 - auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this); 1160 + auto [object, empty] = QPDFParser::parse(
  1161 + *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction);
1158 if (empty) { 1162 if (empty) {
1159 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1163 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1160 // actual PDF files and Adobe Reader appears to ignore them. 1164 // actual PDF files and Adobe Reader appears to ignore them.
@@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og) @@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1175 1179
1176 StringDecrypter decrypter{this, og}; 1180 StringDecrypter decrypter{this, og};
1177 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; 1181 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1178 - auto [object, empty] =  
1179 - QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this); 1182 + auto [object, empty] = QPDFParser::parse(
  1183 + *m->file,
  1184 + m->last_object_description,
  1185 + m->tokenizer,
  1186 + decrypter_ptr,
  1187 + *this,
  1188 + m->in_xref_reconstruction);
1180 if (empty) { 1189 if (empty) {
1181 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1190 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1182 // actual PDF files and Adobe Reader appears to ignore them. 1191 // actual PDF files and Adobe Reader appears to ignore them.
libqpdf/qpdf/QPDFParser.hh
@@ -36,7 +36,8 @@ class QPDFParser @@ -36,7 +36,8 @@ class QPDFParser
36 std::string const& object_description, 36 std::string const& object_description,
37 qpdf::Tokenizer& tokenizer, 37 qpdf::Tokenizer& tokenizer,
38 QPDFObjectHandle::StringDecrypter* decrypter, 38 QPDFObjectHandle::StringDecrypter* decrypter,
39 - QPDF& context); 39 + QPDF& context,
  40 + bool sanity_checks);
40 41
41 static std::pair<QPDFObjectHandle, bool> parse( 42 static std::pair<QPDFObjectHandle, bool> parse(
42 qpdf::is::OffsetBuffer& input, 43 qpdf::is::OffsetBuffer& input,
@@ -63,7 +64,8 @@ class QPDFParser @@ -63,7 +64,8 @@ class QPDFParser
63 QPDF* context, 64 QPDF* context,
64 bool parse_pdf, 65 bool parse_pdf,
65 int stream_id = 0, 66 int stream_id = 0,
66 - int obj_id = 0) : 67 + int obj_id = 0,
  68 + bool sanity_checks = false) :
67 input(input), 69 input(input),
68 object_description(object_description), 70 object_description(object_description),
69 tokenizer(tokenizer), 71 tokenizer(tokenizer),
@@ -72,7 +74,8 @@ class QPDFParser @@ -72,7 +74,8 @@ class QPDFParser
72 description(std::move(sp_description)), 74 description(std::move(sp_description)),
73 parse_pdf(parse_pdf), 75 parse_pdf(parse_pdf),
74 stream_id(stream_id), 76 stream_id(stream_id),
75 - obj_id(obj_id) 77 + obj_id(obj_id),
  78 + sanity_checks(sanity_checks)
76 { 79 {
77 } 80 }
78 81
@@ -125,6 +128,7 @@ class QPDFParser @@ -125,6 +128,7 @@ class QPDFParser
125 bool parse_pdf{false}; 128 bool parse_pdf{false};
126 int stream_id{0}; 129 int stream_id{0};
127 int obj_id{0}; 130 int obj_id{0};
  131 + bool sanity_checks{false};
128 132
129 std::vector<StackFrame> stack; 133 std::vector<StackFrame> stack;
130 StackFrame* frame{nullptr}; 134 StackFrame* frame{nullptr};
libqpdf/qpdf/QPDF_private.hh
@@ -490,6 +490,7 @@ class QPDF::Members @@ -490,6 +490,7 @@ class QPDF::Members
490 // copied_stream_data_provider is owned by copied_streams 490 // copied_stream_data_provider is owned by copied_streams
491 CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; 491 CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
492 bool reconstructed_xref{false}; 492 bool reconstructed_xref{false};
  493 + bool in_xref_reconstruction{false};
493 bool fixed_dangling_refs{false}; 494 bool fixed_dangling_refs{false};
494 bool immediate_copy_from{false}; 495 bool immediate_copy_from{false};
495 bool in_parse{false}; 496 bool in_parse{false};