Commit aa4f288291ffca73c40ced0050cf4dcd2a25f3fa

Authored by m-holger
1 parent bbe732c0

Refactor xref reconstruction

Avoid unnecessary rescanning of lines and repositioning of input file.
Limit max size of tokens.
Showing 1 changed file with 8 additions and 11 deletions
libqpdf/QPDF.cc
... ... @@ -572,18 +572,13 @@ QPDF::reconstruct_xref(QPDFExc& e)
572 572 m->file->seek(0, SEEK_END);
573 573 qpdf_offset_t eof = m->file->tell();
574 574 m->file->seek(0, SEEK_SET);
575   - qpdf_offset_t line_start = 0;
576   - // Don't allow very long tokens here during recovery.
577   - static size_t const MAX_LEN = 100;
  575 + // Don't allow very long tokens here during recovery. All the interesting tokens are covered.
  576 + static size_t const MAX_LEN = 10;
578 577 while (m->file->tell() < eof) {
579   - m->file->findAndSkipNextEOL();
580   - qpdf_offset_t next_line_start = m->file->tell();
581   - m->file->seek(line_start, SEEK_SET);
582 578 QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN);
583 579 qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length());
584   - if (token_start >= next_line_start) {
585   - // don't process yet -- wait until we get to the line containing this token
586   - } else if (t1.isInteger()) {
  580 + if (t1.isInteger()) {
  581 + auto pos = m->file->tell();
587 582 QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN);
588 583 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
589 584 int obj = QUtil::string_to_int(t1.getValue().c_str());
... ... @@ -595,17 +590,19 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
595 590 "", 0, "ignoring object with impossibly large id " + std::to_string(obj)));
596 591 }
597 592 }
  593 + m->file->seek(pos, SEEK_SET);
598 594 } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) {
  595 + auto pos = m->file->tell();
599 596 QPDFObjectHandle t = readTrailer();
600 597 if (!t.isDictionary()) {
601 598 // Oh well. It was worth a try.
602 599 } else {
603 600 setTrailer(t);
604 601 }
  602 + m->file->seek(pos, SEEK_SET);
605 603 }
606 604 check_warnings();
607   - m->file->seek(next_line_start, SEEK_SET);
608   - line_start = next_line_start;
  605 + m->file->findAndSkipNextEOL();
609 606 }
610 607 m->deleted_objects.clear();
611 608  
... ...