Commit fabff0f3ec571b8dc423946e252c060e229c49d7

Authored by Jay Berkenbilt
1 parent caf5e39c

Limit token length during xref recovery

While scanning the file looking for objects, limit the length of
tokens we allow. This prevents us from getting caught up in reading a
file character by character while digging through large streams.
include/qpdf/QPDF.hh
@@ -671,7 +671,8 @@ class QPDF @@ -671,7 +671,8 @@ class QPDF
671 PointerHolder<InputSource> input, int objid, int generation, 671 PointerHolder<InputSource> input, int objid, int generation,
672 qpdf_offset_t stream_offset); 672 qpdf_offset_t stream_offset);
673 QPDFTokenizer::Token readToken(PointerHolder<InputSource>, 673 QPDFTokenizer::Token readToken(PointerHolder<InputSource>,
674 - bool allow_bad = false); 674 + bool allow_bad = false,
  675 + size_t max_len = 0);
675 676
676 QPDFObjectHandle readObjectAtOffset( 677 QPDFObjectHandle readObjectAtOffset(
677 bool attempt_recovery, 678 bool attempt_recovery,
include/qpdf/QPDFTokenizer.hh
@@ -139,7 +139,8 @@ class QPDFTokenizer @@ -139,7 +139,8 @@ class QPDFTokenizer
139 QPDF_DLL 139 QPDF_DLL
140 Token readToken(PointerHolder<InputSource> input, 140 Token readToken(PointerHolder<InputSource> input,
141 std::string const& context, 141 std::string const& context,
142 - bool allow_bad = false); 142 + bool allow_bad = false,
  143 + size_t max_len = 0);
143 144
144 private: 145 private:
145 void reset(); 146 void reset();
libqpdf/QPDF.cc
@@ -407,12 +407,14 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -407,12 +407,14 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
407 this->m->file->seek(0, SEEK_SET); 407 this->m->file->seek(0, SEEK_SET);
408 bool in_obj = false; 408 bool in_obj = false;
409 qpdf_offset_t line_start = 0; 409 qpdf_offset_t line_start = 0;
  410 + // Don't allow very long tokens here during recovery.
  411 + static size_t const MAX_LEN = 100;
410 while (this->m->file->tell() < eof) 412 while (this->m->file->tell() < eof)
411 { 413 {
412 this->m->file->findAndSkipNextEOL(); 414 this->m->file->findAndSkipNextEOL();
413 qpdf_offset_t next_line_start = this->m->file->tell(); 415 qpdf_offset_t next_line_start = this->m->file->tell();
414 this->m->file->seek(line_start, SEEK_SET); 416 this->m->file->seek(line_start, SEEK_SET);
415 - QPDFTokenizer::Token t1 = readToken(this->m->file, true); 417 + QPDFTokenizer::Token t1 = readToken(this->m->file, true, MAX_LEN);
416 qpdf_offset_t token_start = 418 qpdf_offset_t token_start =
417 this->m->file->tell() - t1.getValue().length(); 419 this->m->file->tell() - t1.getValue().length();
418 if (token_start >= next_line_start) 420 if (token_start >= next_line_start)
@@ -430,8 +432,10 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -430,8 +432,10 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
430 { 432 {
431 if (t1.getType() == QPDFTokenizer::tt_integer) 433 if (t1.getType() == QPDFTokenizer::tt_integer)
432 { 434 {
433 - QPDFTokenizer::Token t2 = readToken(this->m->file, true);  
434 - QPDFTokenizer::Token t3 = readToken(this->m->file, true); 435 + QPDFTokenizer::Token t2 =
  436 + readToken(this->m->file, true, MAX_LEN);
  437 + QPDFTokenizer::Token t3 =
  438 + readToken(this->m->file, true, MAX_LEN);
435 if ((t2.getType() == QPDFTokenizer::tt_integer) && 439 if ((t2.getType() == QPDFTokenizer::tt_integer) &&
436 (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) 440 (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
437 { 441 {
@@ -1411,7 +1415,7 @@ bool @@ -1411,7 +1415,7 @@ bool
1411 QPDF::findEndstream() 1415 QPDF::findEndstream()
1412 { 1416 {
1413 // Find endstream or endobj. Position the input at that token. 1417 // Find endstream or endobj. Position the input at that token.
1414 - QPDFTokenizer::Token t = readToken(this->m->file, true); 1418 + QPDFTokenizer::Token t = readToken(this->m->file, true, 20);
1415 if ((t.getType() == QPDFTokenizer::tt_word) && 1419 if ((t.getType() == QPDFTokenizer::tt_word) &&
1416 ((t.getValue() == "endobj") || 1420 ((t.getValue() == "endobj") ||
1417 (t.getValue() == "endstream"))) 1421 (t.getValue() == "endstream")))
@@ -1504,10 +1508,11 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input, @@ -1504,10 +1508,11 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input,
1504 } 1508 }
1505 1509
1506 QPDFTokenizer::Token 1510 QPDFTokenizer::Token
1507 -QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad) 1511 +QPDF::readToken(PointerHolder<InputSource> input,
  1512 + bool allow_bad, size_t max_len)
1508 { 1513 {
1509 return this->m->tokenizer.readToken( 1514 return this->m->tokenizer.readToken(
1510 - input, this->m->last_object_description, allow_bad); 1515 + input, this->m->last_object_description, allow_bad, max_len);
1511 } 1516 }
1512 1517
1513 QPDFObjectHandle 1518 QPDFObjectHandle
libqpdf/QPDFTokenizer.cc
@@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens() @@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens()
476 QPDFTokenizer::Token 476 QPDFTokenizer::Token
477 QPDFTokenizer::readToken(PointerHolder<InputSource> input, 477 QPDFTokenizer::readToken(PointerHolder<InputSource> input,
478 std::string const& context, 478 std::string const& context,
479 - bool allow_bad) 479 + bool allow_bad,
  480 + size_t max_len)
480 { 481 {
481 qpdf_offset_t offset = input->tell(); 482 qpdf_offset_t offset = input->tell();
482 Token token; 483 Token token;
@@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder&lt;InputSource&gt; input, @@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder&lt;InputSource&gt; input,
507 ++offset; 508 ++offset;
508 } 509 }
509 presentCharacter(ch); 510 presentCharacter(ch);
  511 + if (max_len && (raw_val.length() >= max_len) &&
  512 + (this->state != st_token_ready))
  513 + {
  514 + // terminate this token now
  515 + QTC::TC("qpdf", "QPDFTokenizer block long token");
  516 + this->type = tt_bad;
  517 + this->state = st_token_ready;
  518 + }
510 } 519 }
511 } 520 }
512 521
qpdf/qpdf.testcov
@@ -296,3 +296,4 @@ QPDF_encryption pad short parameter 0 @@ -296,3 +296,4 @@ QPDF_encryption pad short parameter 0
296 QPDFWriter ignore self-referential object stream 0 296 QPDFWriter ignore self-referential object stream 0
297 QPDFObjectHandle found old angle 1 297 QPDFObjectHandle found old angle 1
298 QPDF_Stream special filters 3 298 QPDF_Stream special filters 3
  299 +QPDFTokenizer block long token 0