Commit fabff0f3ec571b8dc423946e252c060e229c49d7
1 parent
caf5e39c
Limit token length during xref recovery
While scanning the file looking for objects, limit the length of tokens we allow. This prevents us from getting caught up in reading a file character by character while digging through large streams.
Showing
5 changed files
with
26 additions
and
9 deletions
include/qpdf/QPDF.hh
| @@ -671,7 +671,8 @@ class QPDF | @@ -671,7 +671,8 @@ class QPDF | ||
| 671 | PointerHolder<InputSource> input, int objid, int generation, | 671 | PointerHolder<InputSource> input, int objid, int generation, |
| 672 | qpdf_offset_t stream_offset); | 672 | qpdf_offset_t stream_offset); |
| 673 | QPDFTokenizer::Token readToken(PointerHolder<InputSource>, | 673 | QPDFTokenizer::Token readToken(PointerHolder<InputSource>, |
| 674 | - bool allow_bad = false); | 674 | + bool allow_bad = false, |
| 675 | + size_t max_len = 0); | ||
| 675 | 676 | ||
| 676 | QPDFObjectHandle readObjectAtOffset( | 677 | QPDFObjectHandle readObjectAtOffset( |
| 677 | bool attempt_recovery, | 678 | bool attempt_recovery, |
include/qpdf/QPDFTokenizer.hh
| @@ -139,7 +139,8 @@ class QPDFTokenizer | @@ -139,7 +139,8 @@ class QPDFTokenizer | ||
| 139 | QPDF_DLL | 139 | QPDF_DLL |
| 140 | Token readToken(PointerHolder<InputSource> input, | 140 | Token readToken(PointerHolder<InputSource> input, |
| 141 | std::string const& context, | 141 | std::string const& context, |
| 142 | - bool allow_bad = false); | 142 | + bool allow_bad = false, |
| 143 | + size_t max_len = 0); | ||
| 143 | 144 | ||
| 144 | private: | 145 | private: |
| 145 | void reset(); | 146 | void reset(); |
libqpdf/QPDF.cc
| @@ -407,12 +407,14 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -407,12 +407,14 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 407 | this->m->file->seek(0, SEEK_SET); | 407 | this->m->file->seek(0, SEEK_SET); |
| 408 | bool in_obj = false; | 408 | bool in_obj = false; |
| 409 | qpdf_offset_t line_start = 0; | 409 | qpdf_offset_t line_start = 0; |
| 410 | + // Don't allow very long tokens here during recovery. | ||
| 411 | + static size_t const MAX_LEN = 100; | ||
| 410 | while (this->m->file->tell() < eof) | 412 | while (this->m->file->tell() < eof) |
| 411 | { | 413 | { |
| 412 | this->m->file->findAndSkipNextEOL(); | 414 | this->m->file->findAndSkipNextEOL(); |
| 413 | qpdf_offset_t next_line_start = this->m->file->tell(); | 415 | qpdf_offset_t next_line_start = this->m->file->tell(); |
| 414 | this->m->file->seek(line_start, SEEK_SET); | 416 | this->m->file->seek(line_start, SEEK_SET); |
| 415 | - QPDFTokenizer::Token t1 = readToken(this->m->file, true); | 417 | + QPDFTokenizer::Token t1 = readToken(this->m->file, true, MAX_LEN); |
| 416 | qpdf_offset_t token_start = | 418 | qpdf_offset_t token_start = |
| 417 | this->m->file->tell() - t1.getValue().length(); | 419 | this->m->file->tell() - t1.getValue().length(); |
| 418 | if (token_start >= next_line_start) | 420 | if (token_start >= next_line_start) |
| @@ -430,8 +432,10 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -430,8 +432,10 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 430 | { | 432 | { |
| 431 | if (t1.getType() == QPDFTokenizer::tt_integer) | 433 | if (t1.getType() == QPDFTokenizer::tt_integer) |
| 432 | { | 434 | { |
| 433 | - QPDFTokenizer::Token t2 = readToken(this->m->file, true); | ||
| 434 | - QPDFTokenizer::Token t3 = readToken(this->m->file, true); | 435 | + QPDFTokenizer::Token t2 = |
| 436 | + readToken(this->m->file, true, MAX_LEN); | ||
| 437 | + QPDFTokenizer::Token t3 = | ||
| 438 | + readToken(this->m->file, true, MAX_LEN); | ||
| 435 | if ((t2.getType() == QPDFTokenizer::tt_integer) && | 439 | if ((t2.getType() == QPDFTokenizer::tt_integer) && |
| 436 | (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) | 440 | (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) |
| 437 | { | 441 | { |
| @@ -1411,7 +1415,7 @@ bool | @@ -1411,7 +1415,7 @@ bool | ||
| 1411 | QPDF::findEndstream() | 1415 | QPDF::findEndstream() |
| 1412 | { | 1416 | { |
| 1413 | // Find endstream or endobj. Position the input at that token. | 1417 | // Find endstream or endobj. Position the input at that token. |
| 1414 | - QPDFTokenizer::Token t = readToken(this->m->file, true); | 1418 | + QPDFTokenizer::Token t = readToken(this->m->file, true, 20); |
| 1415 | if ((t.getType() == QPDFTokenizer::tt_word) && | 1419 | if ((t.getType() == QPDFTokenizer::tt_word) && |
| 1416 | ((t.getValue() == "endobj") || | 1420 | ((t.getValue() == "endobj") || |
| 1417 | (t.getValue() == "endstream"))) | 1421 | (t.getValue() == "endstream"))) |
| @@ -1504,10 +1508,11 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, | @@ -1504,10 +1508,11 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, | ||
| 1504 | } | 1508 | } |
| 1505 | 1509 | ||
| 1506 | QPDFTokenizer::Token | 1510 | QPDFTokenizer::Token |
| 1507 | -QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad) | 1511 | +QPDF::readToken(PointerHolder<InputSource> input, |
| 1512 | + bool allow_bad, size_t max_len) | ||
| 1508 | { | 1513 | { |
| 1509 | return this->m->tokenizer.readToken( | 1514 | return this->m->tokenizer.readToken( |
| 1510 | - input, this->m->last_object_description, allow_bad); | 1515 | + input, this->m->last_object_description, allow_bad, max_len); |
| 1511 | } | 1516 | } |
| 1512 | 1517 | ||
| 1513 | QPDFObjectHandle | 1518 | QPDFObjectHandle |
libqpdf/QPDFTokenizer.cc
| @@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens() | @@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens() | ||
| 476 | QPDFTokenizer::Token | 476 | QPDFTokenizer::Token |
| 477 | QPDFTokenizer::readToken(PointerHolder<InputSource> input, | 477 | QPDFTokenizer::readToken(PointerHolder<InputSource> input, |
| 478 | std::string const& context, | 478 | std::string const& context, |
| 479 | - bool allow_bad) | 479 | + bool allow_bad, |
| 480 | + size_t max_len) | ||
| 480 | { | 481 | { |
| 481 | qpdf_offset_t offset = input->tell(); | 482 | qpdf_offset_t offset = input->tell(); |
| 482 | Token token; | 483 | Token token; |
| @@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | @@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | ||
| 507 | ++offset; | 508 | ++offset; |
| 508 | } | 509 | } |
| 509 | presentCharacter(ch); | 510 | presentCharacter(ch); |
| 511 | + if (max_len && (raw_val.length() >= max_len) && | ||
| 512 | + (this->state != st_token_ready)) | ||
| 513 | + { | ||
| 514 | + // terminate this token now | ||
| 515 | + QTC::TC("qpdf", "QPDFTokenizer block long token"); | ||
| 516 | + this->type = tt_bad; | ||
| 517 | + this->state = st_token_ready; | ||
| 518 | + } | ||
| 510 | } | 519 | } |
| 511 | } | 520 | } |
| 512 | 521 |
qpdf/qpdf.testcov
| @@ -296,3 +296,4 @@ QPDF_encryption pad short parameter 0 | @@ -296,3 +296,4 @@ QPDF_encryption pad short parameter 0 | ||
| 296 | QPDFWriter ignore self-referential object stream 0 | 296 | QPDFWriter ignore self-referential object stream 0 |
| 297 | QPDFObjectHandle found old angle 1 | 297 | QPDFObjectHandle found old angle 1 |
| 298 | QPDF_Stream special filters 3 | 298 | QPDF_Stream special filters 3 |
| 299 | +QPDFTokenizer block long token 0 |