Commit 6e580e2453988e170f0cc41942bfa22877c44d1f

Authored by m-holger
Committed by GitHub
2 parents 8ef5cfad 2b94c755

Merge pull request #1444 from m-holger/fuzz

During xref reconstruction reject unreasonably large objects
fuzz/CMakeLists.txt
... ... @@ -158,6 +158,7 @@ set(CORPUS_OTHER
158 158 398060137.fuzz
159 159 409905355.fuzz
160 160 411312393.fuzz
  161 + 5109284021272576.fuzz
161 162 )
162 163  
163 164 set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus)
... ...
fuzz/qpdf_extra/5109284021272576.fuzz 0 → 100644
No preview for this file type
fuzz/qtest/fuzz.test
... ... @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz');
11 11  
12 12 my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS";
13 13  
14   -my $n_qpdf_files = 95; # increment when adding new files
  14 +my $n_qpdf_files = 96; # increment when adding new files
15 15  
16 16 my @fuzzers = (
17 17 ['ascii85' => 1],
... ...
libqpdf/QPDFParser.cc
... ... @@ -71,7 +71,8 @@ QPDFParser::parse(
71 71 std::string const& object_description,
72 72 qpdf::Tokenizer& tokenizer,
73 73 QPDFObjectHandle::StringDecrypter* decrypter,
74   - QPDF& context)
  74 + QPDF& context,
  75 + bool sanity_checks)
75 76 {
76 77 bool empty{false};
77 78 auto result = QPDFParser(
... ... @@ -81,7 +82,10 @@ QPDFParser::parse(
81 82 tokenizer,
82 83 decrypter,
83 84 &context,
84   - true)
  85 + true,
  86 + 0,
  87 + 0,
  88 + sanity_checks)
85 89 .parse(empty, false);
86 90 return {result, empty};
87 91 }
... ... @@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream)
298 302 continue;
299 303  
300 304 case QPDFTokenizer::tt_array_close:
301   - if (bad_count && !max_bad_count) {
  305 + if ((bad_count || sanity_checks) && !max_bad_count) {
302 306 // Trigger warning.
303 307 (void)tooManyBadTokens();
304 308 return {QPDFObject::create<QPDF_Null>()};
... ... @@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream)
329 333 continue;
330 334  
331 335 case QPDFTokenizer::tt_dict_close:
332   - if (bad_count && !max_bad_count) {
  336 + if ((bad_count || sanity_checks) && !max_bad_count) {
333 337 // Trigger warning.
334 338 (void)tooManyBadTokens();
335 339 return {QPDFObject::create<QPDF_Null>()};
... ... @@ -514,7 +518,8 @@ template &lt;typename T, typename... Args&gt;
514 518 void
515 519 QPDFParser::addScalar(Args&&... args)
516 520 {
517   - if (bad_count && (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
  521 + if ((bad_count || sanity_checks) &&
  522 + (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
518 523 // Stop adding scalars. We are going to abort when the close token or a bad token is
519 524 // encountered.
520 525 max_bad_count = 0;
... ... @@ -572,10 +577,15 @@ bool
572 577 QPDFParser::tooManyBadTokens()
573 578 {
574 579 if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) {
  580 + if (bad_count) {
  581 + warn(
  582 + "encountered errors while parsing an array or dictionary with more than 5000 "
  583 + "elements; giving up on reading object");
  584 + return true;
  585 + }
575 586 warn(
576   - "encountered errors while parsing an array or dictionary with more than 5000 "
577   - "elements; giving up on reading object");
578   - return true;
  587 + "encountered an array or dictionary with more than 5000 elements during xref recovery; "
  588 + "giving up on reading object");
579 589 }
580 590 if (--max_bad_count > 0 && good_count > 4) {
581 591 good_count = 0;
... ...
libqpdf/QPDF_objects.cc
... ... @@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
200 200 };
201 201  
202 202 m->reconstructed_xref = true;
  203 + m->in_xref_reconstruction = true;
203 204 // We may find more objects, which may contain dangling references.
204 205 m->fixed_dangling_refs = false;
205 206  
... ... @@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
377 378 throw damagedPDF("", -1, "unable to find any pages while recovering damaged file");
378 379 }
379 380 }
  381 +
  382 + m->in_xref_reconstruction = false;
380 383 // We could iterate through the objects looking for streams and try to find objects inside of
381 384 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
382 385 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
... ... @@ -1154,7 +1157,8 @@ QPDFObjectHandle
1154 1157 QPDF::readTrailer()
1155 1158 {
1156 1159 qpdf_offset_t offset = m->file->tell();
1157   - auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this);
  1160 + auto [object, empty] = QPDFParser::parse(
  1161 + *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction);
1158 1162 if (empty) {
1159 1163 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1160 1164 // actual PDF files and Adobe Reader appears to ignore them.
... ... @@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1175 1179  
1176 1180 StringDecrypter decrypter{this, og};
1177 1181 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1178   - auto [object, empty] =
1179   - QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this);
  1182 + auto [object, empty] = QPDFParser::parse(
  1183 + *m->file,
  1184 + m->last_object_description,
  1185 + m->tokenizer,
  1186 + decrypter_ptr,
  1187 + *this,
  1188 + m->in_xref_reconstruction);
1180 1189 if (empty) {
1181 1190 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1182 1191 // actual PDF files and Adobe Reader appears to ignore them.
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -36,7 +36,8 @@ class QPDFParser
36 36 std::string const& object_description,
37 37 qpdf::Tokenizer& tokenizer,
38 38 QPDFObjectHandle::StringDecrypter* decrypter,
39   - QPDF& context);
  39 + QPDF& context,
  40 + bool sanity_checks);
40 41  
41 42 static std::pair<QPDFObjectHandle, bool> parse(
42 43 qpdf::is::OffsetBuffer& input,
... ... @@ -63,7 +64,8 @@ class QPDFParser
63 64 QPDF* context,
64 65 bool parse_pdf,
65 66 int stream_id = 0,
66   - int obj_id = 0) :
  67 + int obj_id = 0,
  68 + bool sanity_checks = false) :
67 69 input(input),
68 70 object_description(object_description),
69 71 tokenizer(tokenizer),
... ... @@ -72,7 +74,8 @@ class QPDFParser
72 74 description(std::move(sp_description)),
73 75 parse_pdf(parse_pdf),
74 76 stream_id(stream_id),
75   - obj_id(obj_id)
  77 + obj_id(obj_id),
  78 + sanity_checks(sanity_checks)
76 79 {
77 80 }
78 81  
... ... @@ -125,6 +128,7 @@ class QPDFParser
125 128 bool parse_pdf{false};
126 129 int stream_id{0};
127 130 int obj_id{0};
  131 + bool sanity_checks{false};
128 132  
129 133 std::vector<StackFrame> stack;
130 134 StackFrame* frame{nullptr};
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -490,6 +490,7 @@ class QPDF::Members
490 490 // copied_stream_data_provider is owned by copied_streams
491 491 CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
492 492 bool reconstructed_xref{false};
  493 + bool in_xref_reconstruction{false};
493 494 bool fixed_dangling_refs{false};
494 495 bool immediate_copy_from{false};
495 496 bool in_parse{false};
... ...