Commit aa4f288291ffca73c40ced0050cf4dcd2a25f3fa
1 parent
bbe732c0
Refactor xref reconstruction
Avoid unnecessary rescanning of lines and repositioning of input file. Limit max size of tokens.
Showing
1 changed file
with
8 additions
and
11 deletions
libqpdf/QPDF.cc
| @@ -572,18 +572,13 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -572,18 +572,13 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 572 | m->file->seek(0, SEEK_END); | 572 | m->file->seek(0, SEEK_END); |
| 573 | qpdf_offset_t eof = m->file->tell(); | 573 | qpdf_offset_t eof = m->file->tell(); |
| 574 | m->file->seek(0, SEEK_SET); | 574 | m->file->seek(0, SEEK_SET); |
| 575 | - qpdf_offset_t line_start = 0; | ||
| 576 | - // Don't allow very long tokens here during recovery. | ||
| 577 | - static size_t const MAX_LEN = 100; | 575 | + // Don't allow very long tokens here during recovery. All the interesting tokens are covered. |
| 576 | + static size_t const MAX_LEN = 10; | ||
| 578 | while (m->file->tell() < eof) { | 577 | while (m->file->tell() < eof) { |
| 579 | - m->file->findAndSkipNextEOL(); | ||
| 580 | - qpdf_offset_t next_line_start = m->file->tell(); | ||
| 581 | - m->file->seek(line_start, SEEK_SET); | ||
| 582 | QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN); | 578 | QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN); |
| 583 | qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length()); | 579 | qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length()); |
| 584 | - if (token_start >= next_line_start) { | ||
| 585 | - // don't process yet -- wait until we get to the line containing this token | ||
| 586 | - } else if (t1.isInteger()) { | 580 | + if (t1.isInteger()) { |
| 581 | + auto pos = m->file->tell(); | ||
| 587 | QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN); | 582 | QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN); |
| 588 | if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { | 583 | if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { |
| 589 | int obj = QUtil::string_to_int(t1.getValue().c_str()); | 584 | int obj = QUtil::string_to_int(t1.getValue().c_str()); |
| @@ -595,17 +590,19 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -595,17 +590,19 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 595 | "", 0, "ignoring object with impossibly large id " + std::to_string(obj))); | 590 | "", 0, "ignoring object with impossibly large id " + std::to_string(obj))); |
| 596 | } | 591 | } |
| 597 | } | 592 | } |
| 593 | + m->file->seek(pos, SEEK_SET); | ||
| 598 | } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) { | 594 | } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) { |
| 595 | + auto pos = m->file->tell(); | ||
| 599 | QPDFObjectHandle t = readTrailer(); | 596 | QPDFObjectHandle t = readTrailer(); |
| 600 | if (!t.isDictionary()) { | 597 | if (!t.isDictionary()) { |
| 601 | // Oh well. It was worth a try. | 598 | // Oh well. It was worth a try. |
| 602 | } else { | 599 | } else { |
| 603 | setTrailer(t); | 600 | setTrailer(t); |
| 604 | } | 601 | } |
| 602 | + m->file->seek(pos, SEEK_SET); | ||
| 605 | } | 603 | } |
| 606 | check_warnings(); | 604 | check_warnings(); |
| 607 | - m->file->seek(next_line_start, SEEK_SET); | ||
| 608 | - line_start = next_line_start; | 605 | + m->file->findAndSkipNextEOL(); |
| 609 | } | 606 | } |
| 610 | m->deleted_objects.clear(); | 607 | m->deleted_objects.clear(); |
| 611 | 608 |