Commit 2b94c75535b5af1c27276f343005af0219fbe90a

Authored by m-holger
1 parent 8ef5cfad

During xref reconstruction reject unreasonably large objects

Reject objects containing arrays or dictionaries with more than 5000
elements. We are by definition dealing with damaged files, and such
objects are extremely likely to be invalid or malicious.
fuzz/CMakeLists.txt
... ... @@ -158,6 +158,7 @@ set(CORPUS_OTHER
158 158 398060137.fuzz
159 159 409905355.fuzz
160 160 411312393.fuzz
  161 + 5109284021272576.fuzz
161 162 )
162 163  
163 164 set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus)
... ...
fuzz/qpdf_extra/5109284021272576.fuzz 0 → 100644
No preview for this file type
fuzz/qtest/fuzz.test
... ... @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz');
11 11  
12 12 my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS";
13 13  
14   -my $n_qpdf_files = 95; # increment when adding new files
  14 +my $n_qpdf_files = 96; # increment when adding new files
15 15  
16 16 my @fuzzers = (
17 17 ['ascii85' => 1],
... ...
libqpdf/QPDFParser.cc
... ... @@ -71,7 +71,8 @@ QPDFParser::parse(
71 71 std::string const& object_description,
72 72 qpdf::Tokenizer& tokenizer,
73 73 QPDFObjectHandle::StringDecrypter* decrypter,
74   - QPDF& context)
  74 + QPDF& context,
  75 + bool sanity_checks)
75 76 {
76 77 bool empty{false};
77 78 auto result = QPDFParser(
... ... @@ -81,7 +82,10 @@ QPDFParser::parse(
81 82 tokenizer,
82 83 decrypter,
83 84 &context,
84   - true)
  85 + true,
  86 + 0,
  87 + 0,
  88 + sanity_checks)
85 89 .parse(empty, false);
86 90 return {result, empty};
87 91 }
... ... @@ -298,7 +302,7 @@ QPDFParser::parseRemainder(bool content_stream)
298 302 continue;
299 303  
300 304 case QPDFTokenizer::tt_array_close:
301   - if (bad_count && !max_bad_count) {
  305 + if ((bad_count || sanity_checks) && !max_bad_count) {
302 306 // Trigger warning.
303 307 (void)tooManyBadTokens();
304 308 return {QPDFObject::create<QPDF_Null>()};
... ... @@ -329,7 +333,7 @@ QPDFParser::parseRemainder(bool content_stream)
329 333 continue;
330 334  
331 335 case QPDFTokenizer::tt_dict_close:
332   - if (bad_count && !max_bad_count) {
  336 + if ((bad_count || sanity_checks) && !max_bad_count) {
333 337 // Trigger warning.
334 338 (void)tooManyBadTokens();
335 339 return {QPDFObject::create<QPDF_Null>()};
... ... @@ -514,7 +518,8 @@ template &lt;typename T, typename... Args&gt;
514 518 void
515 519 QPDFParser::addScalar(Args&&... args)
516 520 {
517   - if (bad_count && (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
  521 + if ((bad_count || sanity_checks) &&
  522 + (frame->olist.size() > 5'000 || frame->dict.size() > 5'000)) {
518 523 // Stop adding scalars. We are going to abort when the close token or a bad token is
519 524 // encountered.
520 525 max_bad_count = 0;
... ... @@ -572,10 +577,15 @@ bool
572 577 QPDFParser::tooManyBadTokens()
573 578 {
574 579 if (frame->olist.size() > 5'000 || frame->dict.size() > 5'000) {
  580 + if (bad_count) {
  581 + warn(
  582 + "encountered errors while parsing an array or dictionary with more than 5000 "
  583 + "elements; giving up on reading object");
  584 + return true;
  585 + }
575 586 warn(
576   - "encountered errors while parsing an array or dictionary with more than 5000 "
577   - "elements; giving up on reading object");
578   - return true;
  587 + "encountered an array or dictionary with more than 5000 elements during xref recovery; "
  588 + "giving up on reading object");
579 589 }
580 590 if (--max_bad_count > 0 && good_count > 4) {
581 591 good_count = 0;
... ...
libqpdf/QPDF_objects.cc
... ... @@ -200,6 +200,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
200 200 };
201 201  
202 202 m->reconstructed_xref = true;
  203 + m->in_xref_reconstruction = true;
203 204 // We may find more objects, which may contain dangling references.
204 205 m->fixed_dangling_refs = false;
205 206  
... ... @@ -377,6 +378,8 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
377 378 throw damagedPDF("", -1, "unable to find any pages while recovering damaged file");
378 379 }
379 380 }
  381 +
  382 + m->in_xref_reconstruction = false;
380 383 // We could iterate through the objects looking for streams and try to find objects inside of
381 384 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
382 385 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
... ... @@ -1154,7 +1157,8 @@ QPDFObjectHandle
1154 1157 QPDF::readTrailer()
1155 1158 {
1156 1159 qpdf_offset_t offset = m->file->tell();
1157   - auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this);
  1160 + auto [object, empty] = QPDFParser::parse(
  1161 + *m->file, "trailer", m->tokenizer, nullptr, *this, m->in_xref_reconstruction);
1158 1162 if (empty) {
1159 1163 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1160 1164 // actual PDF files and Adobe Reader appears to ignore them.
... ... @@ -1175,8 +1179,13 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1175 1179  
1176 1180 StringDecrypter decrypter{this, og};
1177 1181 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1178   - auto [object, empty] =
1179   - QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this);
  1182 + auto [object, empty] = QPDFParser::parse(
  1183 + *m->file,
  1184 + m->last_object_description,
  1185 + m->tokenizer,
  1186 + decrypter_ptr,
  1187 + *this,
  1188 + m->in_xref_reconstruction);
1180 1189 if (empty) {
1181 1190 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1182 1191 // actual PDF files and Adobe Reader appears to ignore them.
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -36,7 +36,8 @@ class QPDFParser
36 36 std::string const& object_description,
37 37 qpdf::Tokenizer& tokenizer,
38 38 QPDFObjectHandle::StringDecrypter* decrypter,
39   - QPDF& context);
  39 + QPDF& context,
  40 + bool sanity_checks);
40 41  
41 42 static std::pair<QPDFObjectHandle, bool> parse(
42 43 qpdf::is::OffsetBuffer& input,
... ... @@ -63,7 +64,8 @@ class QPDFParser
63 64 QPDF* context,
64 65 bool parse_pdf,
65 66 int stream_id = 0,
66   - int obj_id = 0) :
  67 + int obj_id = 0,
  68 + bool sanity_checks = false) :
67 69 input(input),
68 70 object_description(object_description),
69 71 tokenizer(tokenizer),
... ... @@ -72,7 +74,8 @@ class QPDFParser
72 74 description(std::move(sp_description)),
73 75 parse_pdf(parse_pdf),
74 76 stream_id(stream_id),
75   - obj_id(obj_id)
  77 + obj_id(obj_id),
  78 + sanity_checks(sanity_checks)
76 79 {
77 80 }
78 81  
... ... @@ -125,6 +128,7 @@ class QPDFParser
125 128 bool parse_pdf{false};
126 129 int stream_id{0};
127 130 int obj_id{0};
  131 + bool sanity_checks{false};
128 132  
129 133 std::vector<StackFrame> stack;
130 134 StackFrame* frame{nullptr};
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -490,6 +490,7 @@ class QPDF::Members
490 490 // copied_stream_data_provider is owned by copied_streams
491 491 CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
492 492 bool reconstructed_xref{false};
  493 + bool in_xref_reconstruction{false};
493 494 bool fixed_dangling_refs{false};
494 495 bool immediate_copy_from{false};
495 496 bool in_parse{false};
... ...