Commit fabff0f3ec571b8dc423946e252c060e229c49d7
1 parent
caf5e39c
Limit token length during xref recovery
While scanning the file looking for objects, limit the length of tokens we allow. This prevents us from getting caught up in reading a file character by character while digging through large streams.
Showing
5 changed files
with
26 additions
and
9 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -671,7 +671,8 @@ class QPDF |
| 671 | 671 | PointerHolder<InputSource> input, int objid, int generation, |
| 672 | 672 | qpdf_offset_t stream_offset); |
| 673 | 673 | QPDFTokenizer::Token readToken(PointerHolder<InputSource>, |
| 674 | - bool allow_bad = false); | |
| 674 | + bool allow_bad = false, | |
| 675 | + size_t max_len = 0); | |
| 675 | 676 | |
| 676 | 677 | QPDFObjectHandle readObjectAtOffset( |
| 677 | 678 | bool attempt_recovery, | ... | ... |
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -139,7 +139,8 @@ class QPDFTokenizer |
| 139 | 139 | QPDF_DLL |
| 140 | 140 | Token readToken(PointerHolder<InputSource> input, |
| 141 | 141 | std::string const& context, |
| 142 | - bool allow_bad = false); | |
| 142 | + bool allow_bad = false, | |
| 143 | + size_t max_len = 0); | |
| 143 | 144 | |
| 144 | 145 | private: |
| 145 | 146 | void reset(); | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -407,12 +407,14 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 407 | 407 | this->m->file->seek(0, SEEK_SET); |
| 408 | 408 | bool in_obj = false; |
| 409 | 409 | qpdf_offset_t line_start = 0; |
| 410 | + // Don't allow very long tokens here during recovery. | |
| 411 | + static size_t const MAX_LEN = 100; | |
| 410 | 412 | while (this->m->file->tell() < eof) |
| 411 | 413 | { |
| 412 | 414 | this->m->file->findAndSkipNextEOL(); |
| 413 | 415 | qpdf_offset_t next_line_start = this->m->file->tell(); |
| 414 | 416 | this->m->file->seek(line_start, SEEK_SET); |
| 415 | - QPDFTokenizer::Token t1 = readToken(this->m->file, true); | |
| 417 | + QPDFTokenizer::Token t1 = readToken(this->m->file, true, MAX_LEN); | |
| 416 | 418 | qpdf_offset_t token_start = |
| 417 | 419 | this->m->file->tell() - t1.getValue().length(); |
| 418 | 420 | if (token_start >= next_line_start) |
| ... | ... | @@ -430,8 +432,10 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 430 | 432 | { |
| 431 | 433 | if (t1.getType() == QPDFTokenizer::tt_integer) |
| 432 | 434 | { |
| 433 | - QPDFTokenizer::Token t2 = readToken(this->m->file, true); | |
| 434 | - QPDFTokenizer::Token t3 = readToken(this->m->file, true); | |
| 435 | + QPDFTokenizer::Token t2 = | |
| 436 | + readToken(this->m->file, true, MAX_LEN); | |
| 437 | + QPDFTokenizer::Token t3 = | |
| 438 | + readToken(this->m->file, true, MAX_LEN); | |
| 435 | 439 | if ((t2.getType() == QPDFTokenizer::tt_integer) && |
| 436 | 440 | (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) |
| 437 | 441 | { |
| ... | ... | @@ -1411,7 +1415,7 @@ bool |
| 1411 | 1415 | QPDF::findEndstream() |
| 1412 | 1416 | { |
| 1413 | 1417 | // Find endstream or endobj. Position the input at that token. |
| 1414 | - QPDFTokenizer::Token t = readToken(this->m->file, true); | |
| 1418 | + QPDFTokenizer::Token t = readToken(this->m->file, true, 20); | |
| 1415 | 1419 | if ((t.getType() == QPDFTokenizer::tt_word) && |
| 1416 | 1420 | ((t.getValue() == "endobj") || |
| 1417 | 1421 | (t.getValue() == "endstream"))) |
| ... | ... | @@ -1504,10 +1508,11 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, |
| 1504 | 1508 | } |
| 1505 | 1509 | |
| 1506 | 1510 | QPDFTokenizer::Token |
| 1507 | -QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad) | |
| 1511 | +QPDF::readToken(PointerHolder<InputSource> input, | |
| 1512 | + bool allow_bad, size_t max_len) | |
| 1508 | 1513 | { |
| 1509 | 1514 | return this->m->tokenizer.readToken( |
| 1510 | - input, this->m->last_object_description, allow_bad); | |
| 1515 | + input, this->m->last_object_description, allow_bad, max_len); | |
| 1511 | 1516 | } |
| 1512 | 1517 | |
| 1513 | 1518 | QPDFObjectHandle | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens() |
| 476 | 476 | QPDFTokenizer::Token |
| 477 | 477 | QPDFTokenizer::readToken(PointerHolder<InputSource> input, |
| 478 | 478 | std::string const& context, |
| 479 | - bool allow_bad) | |
| 479 | + bool allow_bad, | |
| 480 | + size_t max_len) | |
| 480 | 481 | { |
| 481 | 482 | qpdf_offset_t offset = input->tell(); |
| 482 | 483 | Token token; |
| ... | ... | @@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, |
| 507 | 508 | ++offset; |
| 508 | 509 | } |
| 509 | 510 | presentCharacter(ch); |
| 511 | + if (max_len && (raw_val.length() >= max_len) && | |
| 512 | + (this->state != st_token_ready)) | |
| 513 | + { | |
| 514 | + // terminate this token now | |
| 515 | + QTC::TC("qpdf", "QPDFTokenizer block long token"); | |
| 516 | + this->type = tt_bad; | |
| 517 | + this->state = st_token_ready; | |
| 518 | + } | |
| 510 | 519 | } |
| 511 | 520 | } |
| 512 | 521 | ... | ... |