Commit aa4f288291ffca73c40ced0050cf4dcd2a25f3fa

Authored by m-holger
1 parent bbe732c0

Refactor xref reconstruction

Avoid unnecessary rescanning of lines and repositioning of input file.
Limit max size of tokens.
Showing 1 changed file with 8 additions and 11 deletions
libqpdf/QPDF.cc
@@ -572,18 +572,13 @@ QPDF::reconstruct_xref(QPDFExc& e) @@ -572,18 +572,13 @@ QPDF::reconstruct_xref(QPDFExc& e)
572 m->file->seek(0, SEEK_END); 572 m->file->seek(0, SEEK_END);
573 qpdf_offset_t eof = m->file->tell(); 573 qpdf_offset_t eof = m->file->tell();
574 m->file->seek(0, SEEK_SET); 574 m->file->seek(0, SEEK_SET);
575 - qpdf_offset_t line_start = 0;  
576 - // Don't allow very long tokens here during recovery.  
577 - static size_t const MAX_LEN = 100; 575 + // Don't allow very long tokens here during recovery. All the interesting tokens are covered.
  576 + static size_t const MAX_LEN = 10;
578 while (m->file->tell() < eof) { 577 while (m->file->tell() < eof) {
579 - m->file->findAndSkipNextEOL();  
580 - qpdf_offset_t next_line_start = m->file->tell();  
581 - m->file->seek(line_start, SEEK_SET);  
582 QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN); 578 QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN);
583 qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length()); 579 qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length());
584 - if (token_start >= next_line_start) {  
585 - // don't process yet -- wait until we get to the line containing this token  
586 - } else if (t1.isInteger()) { 580 + if (t1.isInteger()) {
  581 + auto pos = m->file->tell();
587 QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN); 582 QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN);
588 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { 583 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
589 int obj = QUtil::string_to_int(t1.getValue().c_str()); 584 int obj = QUtil::string_to_int(t1.getValue().c_str());
@@ -595,17 +590,19 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -595,17 +590,19 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
595 "", 0, "ignoring object with impossibly large id " + std::to_string(obj))); 590 "", 0, "ignoring object with impossibly large id " + std::to_string(obj)));
596 } 591 }
597 } 592 }
  593 + m->file->seek(pos, SEEK_SET);
598 } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) { 594 } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) {
  595 + auto pos = m->file->tell();
599 QPDFObjectHandle t = readTrailer(); 596 QPDFObjectHandle t = readTrailer();
600 if (!t.isDictionary()) { 597 if (!t.isDictionary()) {
601 // Oh well. It was worth a try. 598 // Oh well. It was worth a try.
602 } else { 599 } else {
603 setTrailer(t); 600 setTrailer(t);
604 } 601 }
  602 + m->file->seek(pos, SEEK_SET);
605 } 603 }
606 check_warnings(); 604 check_warnings();
607 - m->file->seek(next_line_start, SEEK_SET);  
608 - line_start = next_line_start; 605 + m->file->findAndSkipNextEOL();
609 } 606 }
610 m->deleted_objects.clear(); 607 m->deleted_objects.clear();
611 608