Commit fabff0f3ec571b8dc423946e252c060e229c49d7

Authored by Jay Berkenbilt
1 parent caf5e39c

Limit token length during xref recovery

While scanning the file looking for objects, limit the length of
tokens we allow. This prevents us from getting caught up in reading a
file character by character while digging through large streams.
include/qpdf/QPDF.hh
... ... @@ -671,7 +671,8 @@ class QPDF
671 671 PointerHolder<InputSource> input, int objid, int generation,
672 672 qpdf_offset_t stream_offset);
673 673 QPDFTokenizer::Token readToken(PointerHolder<InputSource>,
674   - bool allow_bad = false);
  674 + bool allow_bad = false,
  675 + size_t max_len = 0);
675 676  
676 677 QPDFObjectHandle readObjectAtOffset(
677 678 bool attempt_recovery,
... ...
include/qpdf/QPDFTokenizer.hh
... ... @@ -139,7 +139,8 @@ class QPDFTokenizer
139 139 QPDF_DLL
140 140 Token readToken(PointerHolder<InputSource> input,
141 141 std::string const& context,
142   - bool allow_bad = false);
  142 + bool allow_bad = false,
  143 + size_t max_len = 0);
143 144  
144 145 private:
145 146 void reset();
... ...
libqpdf/QPDF.cc
... ... @@ -407,12 +407,14 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
407 407 this->m->file->seek(0, SEEK_SET);
408 408 bool in_obj = false;
409 409 qpdf_offset_t line_start = 0;
  410 + // Don't allow very long tokens here during recovery.
  411 + static size_t const MAX_LEN = 100;
410 412 while (this->m->file->tell() < eof)
411 413 {
412 414 this->m->file->findAndSkipNextEOL();
413 415 qpdf_offset_t next_line_start = this->m->file->tell();
414 416 this->m->file->seek(line_start, SEEK_SET);
415   - QPDFTokenizer::Token t1 = readToken(this->m->file, true);
  417 + QPDFTokenizer::Token t1 = readToken(this->m->file, true, MAX_LEN);
416 418 qpdf_offset_t token_start =
417 419 this->m->file->tell() - t1.getValue().length();
418 420 if (token_start >= next_line_start)
... ... @@ -430,8 +432,10 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
430 432 {
431 433 if (t1.getType() == QPDFTokenizer::tt_integer)
432 434 {
433   - QPDFTokenizer::Token t2 = readToken(this->m->file, true);
434   - QPDFTokenizer::Token t3 = readToken(this->m->file, true);
  435 + QPDFTokenizer::Token t2 =
  436 + readToken(this->m->file, true, MAX_LEN);
  437 + QPDFTokenizer::Token t3 =
  438 + readToken(this->m->file, true, MAX_LEN);
435 439 if ((t2.getType() == QPDFTokenizer::tt_integer) &&
436 440 (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
437 441 {
... ... @@ -1411,7 +1415,7 @@ bool
1411 1415 QPDF::findEndstream()
1412 1416 {
1413 1417 // Find endstream or endobj. Position the input at that token.
1414   - QPDFTokenizer::Token t = readToken(this->m->file, true);
  1418 + QPDFTokenizer::Token t = readToken(this->m->file, true, 20);
1415 1419 if ((t.getType() == QPDFTokenizer::tt_word) &&
1416 1420 ((t.getValue() == "endobj") ||
1417 1421 (t.getValue() == "endstream")))
... ... @@ -1504,10 +1508,11 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input,
1504 1508 }
1505 1509  
1506 1510 QPDFTokenizer::Token
1507   -QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad)
  1511 +QPDF::readToken(PointerHolder<InputSource> input,
  1512 + bool allow_bad, size_t max_len)
1508 1513 {
1509 1514 return this->m->tokenizer.readToken(
1510   - input, this->m->last_object_description, allow_bad);
  1515 + input, this->m->last_object_description, allow_bad, max_len);
1511 1516 }
1512 1517  
1513 1518 QPDFObjectHandle
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens()
476 476 QPDFTokenizer::Token
477 477 QPDFTokenizer::readToken(PointerHolder<InputSource> input,
478 478 std::string const& context,
479   - bool allow_bad)
  479 + bool allow_bad,
  480 + size_t max_len)
480 481 {
481 482 qpdf_offset_t offset = input->tell();
482 483 Token token;
... ... @@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder&lt;InputSource&gt; input,
507 508 ++offset;
508 509 }
509 510 presentCharacter(ch);
  511 + if (max_len && (raw_val.length() >= max_len) &&
  512 + (this->state != st_token_ready))
  513 + {
  514 + // terminate this token now
  515 + QTC::TC("qpdf", "QPDFTokenizer block long token");
  516 + this->type = tt_bad;
  517 + this->state = st_token_ready;
  518 + }
510 519 }
511 520 }
512 521  
... ...
qpdf/qpdf.testcov
... ... @@ -296,3 +296,4 @@ QPDF_encryption pad short parameter 0
296 296 QPDFWriter ignore self-referential object stream 0
297 297 QPDFObjectHandle found old angle 1
298 298 QPDF_Stream special filters 3
  299 +QPDFTokenizer block long token 0
... ...