Commit 2b94c75535b5af1c27276f343005af0219fbe90a

Authored by m-holger
1 parent 8ef5cfad

During xref reconstruction reject unreasonably large objects

Reject objects containing arrays or dictionaries with more than 5000
elements. We are by definition dealing with damaged files, and such
objects are extremely likely to be invalid or malicious.
fuzz/CMakeLists.txt
@@ -158,6 +158,7 @@ set(CORPUS_OTHER @@ -158,6 +158,7 @@ set(CORPUS_OTHER
158 398060137.fuzz 158 398060137.fuzz
159 409905355.fuzz 159 409905355.fuzz
160 411312393.fuzz 160 411312393.fuzz
  161 + 5109284021272576.fuzz
161 ) 162 )
162 163
163 set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus) 164 set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus)
fuzz/qpdf_extra/5109284021272576.fuzz 0 → 100644
No preview for this file type
fuzz/qtest/fuzz.test
@@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz'); @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz');
11 11
12 my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; 12 my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS";
13 13
14 -my $n_qpdf_files = 95; # increment when adding new files 14 +my $n_qpdf_files = 96; # increment when adding new files
15 15
16 my @fuzzers = ( 16 my @fuzzers = (
17 ['ascii85' => 1], 17 ['ascii85' => 1],
libqpdf/QPDFParser.cc
@@ -71,7 +71,8 @@ QPDFParser::parse( @@ -71,7 +71,8 @@ QPDFParser::parse(
71 std::string const& object_description, 71 std::string const& object_description,
72 qpdf::Tokenizer& tokenizer, 72 qpdf::Tokenizer& tokenizer,
73 QPDFObjectHandle::StringDecrypter* decrypter, 73 QPDFObjectHandle::StringDecrypter* decrypter,
74 - QPDF& context) 74 + QPDF& context,
  75 + bool sanity_checks)
75 { 76 {
76 bool empty{false}; 77 bool empty{false};
77 auto result = QPDFParser( 78 auto result = QPDFParser(
@@ -81,7 +82,10 @@ QPDFParser::parse( @@ -81,7 +82,10 @@ QPDFParser::parse(
81 tokenizer, 82 tokenizer,
82 decrypter, 83 decrypter,
83 &context, 84 &context,
84 - true) 85 + true,
  86 + 0,
  87 + 0,
  88 + sanity_checks)
85 .parse(empty, false); 89 .parse(empty, false);
86 return {result, empty}; 90 return {result, empty};
87 } 91 }
@@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream)
298 continue; 302 continue;
299 303
300 case QPDFTokenizer::tt_array_close: 304 case QPDFTokenizer::tt_array_close:
301 - if (bad_count && !max_bad_count) { 305 + if ((bad_count || sanity_checks) && !max_bad_count) {
302 // Trigger warning. 306 // Trigger warning.
303 (void)tooManyBadTokens(); 307 (void)tooManyBadTokens();
304 return {QPDFObject::create<QPDF_Null>()}; 308 return {QPDFObject::create<QPDF_Null>()};
@@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream)
329 continue; 333 continue;
330 334
331 case QPDFTokenizer::tt_dict_close: 335 case QPDFTokenizer::tt_dict_close:
332 - if (bad_count && !max_bad_count) { 336 + if ((bad_count || sanity_checks) && !max_bad_count) {
333 // Trigger warning. 337 // Trigger warning.
334 (void)tooManyBadTokens(); 338 (void)tooManyBadTokens();
335 return {QPDFObject::create<QPDF_Null>()}; 339 return {QPDFObject::create<QPDF_Null>()};
@@ -514,7 +518,8 @@ template &lt;typename T, typename... Args&gt; @@ -514,7 +518,8 @@ template &lt;typename T, typename... Args&gt;
514 void 518 void
515 QPDFParser::addScalar(Args&&... args) 519 QPDFParser::addScalar(Args&&... args)
516 { 520 {
517 - if (bad_count && (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) { 521 + if ((bad_count || sanity_checks) &&
  522 + (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
518 // Stop adding scalars. We are going to abort when the close token or a bad token is 523 // Stop adding scalars. We are going to abort when the close token or a bad token is
519 // encountered. 524 // encountered.
520 max_bad_count = 0; 525 max_bad_count = 0;
@@ -572,10 +577,15 @@ bool @@ -572,10 +577,15 @@ bool
572 QPDFParser::tooManyBadTokens() 577 QPDFParser::tooManyBadTokens()
573 { 578 {
574 if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) { 579 if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) {
  580 + if (bad_count) {
  581 + warn(
  582 + "encountered errors while parsing an array or dictionary with more than 5000 "
  583 + "elements; giving up on reading object");
  584 + return true;
  585 + }
575 warn( 586 warn(
576 - "encountered errors while parsing an array or dictionary with more than 5000 "  
577 - "elements; giving up on reading object");  
578 - return true; 587 + "encountered an array or dictionary with more than 5000 elements during xref recovery; "
  588 + "giving up on reading object");
579 } 589 }
580 if (--max_bad_count > 0 && good_count > 4) { 590 if (--max_bad_count > 0 && good_count > 4) {
581 good_count = 0; 591 good_count = 0;
libqpdf/QPDF_objects.cc
@@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
200 }; 200 };
201 201
202 m->reconstructed_xref = true; 202 m->reconstructed_xref = true;
  203 + m->in_xref_reconstruction = true;
203 // We may find more objects, which may contain dangling references. 204 // We may find more objects, which may contain dangling references.
204 m->fixed_dangling_refs = false; 205 m->fixed_dangling_refs = false;
205 206
@@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
377 throw damagedPDF("", -1, "unable to find any pages while recovering damaged file"); 378 throw damagedPDF("", -1, "unable to find any pages while recovering damaged file");
378 } 379 }
379 } 380 }
  381 +
  382 + m->in_xref_reconstruction = false;
380 // We could iterate through the objects looking for streams and try to find objects inside of 383 // We could iterate through the objects looking for streams and try to find objects inside of
381 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors 384 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
382 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything 385 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
@@ -1154,7 +1157,8 @@ QPDFObjectHandle @@ -1154,7 +1157,8 @@ QPDFObjectHandle
1154 QPDF::readTrailer() 1157 QPDF::readTrailer()
1155 { 1158 {
1156 qpdf_offset_t offset = m->file->tell(); 1159 qpdf_offset_t offset = m->file->tell();
1157 - auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this); 1160 + auto [object, empty] = QPDFParser::parse(
  1161 + *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction);
1158 if (empty) { 1162 if (empty) {
1159 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1163 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1160 // actual PDF files and Adobe Reader appears to ignore them. 1164 // actual PDF files and Adobe Reader appears to ignore them.
@@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og) @@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1175 1179
1176 StringDecrypter decrypter{this, og}; 1180 StringDecrypter decrypter{this, og};
1177 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; 1181 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1178 - auto [object, empty] =  
1179 - QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this); 1182 + auto [object, empty] = QPDFParser::parse(
  1183 + *m->file,
  1184 + m->last_object_description,
  1185 + m->tokenizer,
  1186 + decrypter_ptr,
  1187 + *this,
  1188 + m->in_xref_reconstruction);
1180 if (empty) { 1189 if (empty) {
1181 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1190 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1182 // actual PDF files and Adobe Reader appears to ignore them. 1191 // actual PDF files and Adobe Reader appears to ignore them.
libqpdf/qpdf/QPDFParser.hh
@@ -36,7 +36,8 @@ class QPDFParser @@ -36,7 +36,8 @@ class QPDFParser
36 std::string const& object_description, 36 std::string const& object_description,
37 qpdf::Tokenizer& tokenizer, 37 qpdf::Tokenizer& tokenizer,
38 QPDFObjectHandle::StringDecrypter* decrypter, 38 QPDFObjectHandle::StringDecrypter* decrypter,
39 - QPDF& context); 39 + QPDF& context,
  40 + bool sanity_checks);
40 41
41 static std::pair<QPDFObjectHandle, bool> parse( 42 static std::pair<QPDFObjectHandle, bool> parse(
42 qpdf::is::OffsetBuffer& input, 43 qpdf::is::OffsetBuffer& input,
@@ -63,7 +64,8 @@ class QPDFParser @@ -63,7 +64,8 @@ class QPDFParser
63 QPDF* context, 64 QPDF* context,
64 bool parse_pdf, 65 bool parse_pdf,
65 int stream_id = 0, 66 int stream_id = 0,
66 - int obj_id = 0) : 67 + int obj_id = 0,
  68 + bool sanity_checks = false) :
67 input(input), 69 input(input),
68 object_description(object_description), 70 object_description(object_description),
69 tokenizer(tokenizer), 71 tokenizer(tokenizer),
@@ -72,7 +74,8 @@ class QPDFParser @@ -72,7 +74,8 @@ class QPDFParser
72 description(std::move(sp_description)), 74 description(std::move(sp_description)),
73 parse_pdf(parse_pdf), 75 parse_pdf(parse_pdf),
74 stream_id(stream_id), 76 stream_id(stream_id),
75 - obj_id(obj_id) 77 + obj_id(obj_id),
  78 + sanity_checks(sanity_checks)
76 { 79 {
77 } 80 }
78 81
@@ -125,6 +128,7 @@ class QPDFParser @@ -125,6 +128,7 @@ class QPDFParser
125 bool parse_pdf{false}; 128 bool parse_pdf{false};
126 int stream_id{0}; 129 int stream_id{0};
127 int obj_id{0}; 130 int obj_id{0};
  131 + bool sanity_checks{false};
128 132
129 std::vector<StackFrame> stack; 133 std::vector<StackFrame> stack;
130 StackFrame* frame{nullptr}; 134 StackFrame* frame{nullptr};
libqpdf/qpdf/QPDF_private.hh
@@ -490,6 +490,7 @@ class QPDF::Members @@ -490,6 +490,7 @@ class QPDF::Members
490 // copied_stream_data_provider is owned by copied_streams 490 // copied_stream_data_provider is owned by copied_streams
491 CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; 491 CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
492 bool reconstructed_xref{false}; 492 bool reconstructed_xref{false};
  493 + bool in_xref_reconstruction{false};
493 bool fixed_dangling_refs{false}; 494 bool fixed_dangling_refs{false};
494 bool immediate_copy_from{false}; 495 bool immediate_copy_from{false};
495 bool in_parse{false}; 496 bool in_parse{false};