Commit e914bbbbbcf1fa9bbd3b67d6b5417a79f71db4d7

Authored by m-holger
1 parent c2c1618e

Add further sanity check to QPDF::reconstruct_xref

If reconstruct_xref generates more than 1000 warnings give up because the
file is so severely damaged that there is very little point continuing.
libqpdf/QPDF.cc
... ... @@ -543,6 +543,10 @@ QPDF::reconstruct_xref(QPDFExc& e)
543 543 throw e;
544 544 }
545 545  
  546 + // If recovery generates more than 1000 warnings, the file is so severely damaged that there
  547 + // probably is no point trying to continue.
  548 + const auto max_warnings = m->warnings.size() + 1000U;
  549 +
546 550 m->reconstructed_xref = true;
547 551 // We may find more objects, which may contain dangling references.
548 552 m->fixed_dangling_refs = false;
... ... @@ -596,6 +600,9 @@ QPDF::reconstruct_xref(QPDFExc& e)
596 600 setTrailer(t);
597 601 }
598 602 }
  603 + if (m->warnings.size() > max_warnings) {
  604 + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
  605 + }
599 606 m->file->seek(next_line_start, SEEK_SET);
600 607 line_start = next_line_start;
601 608 }
... ... @@ -622,6 +629,10 @@ QPDF::reconstruct_xref(QPDFExc& e)
622 629 max_offset = offset;
623 630 setTrailer(oh.getDict());
624 631 }
  632 + if (m->warnings.size() > max_warnings) {
  633 + throw damagedPDF(
  634 + "", 0, "too many errors while reconstructing cross-reference table");
  635 + }
625 636 }
626 637 if (max_offset > 0) {
627 638 try {
... ... @@ -646,7 +657,9 @@ QPDF::reconstruct_xref(QPDFExc& e)
646 657 // creating QPDF objects from JSON.
647 658 throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
648 659 }
649   -
  660 + if (m->warnings.size() > max_warnings) {
  661 + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
  662 + }
650 663 // We could iterate through the objects looking for streams and try to find objects inside of
651 664 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
652 665 // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
... ...
qpdf/qtest/qpdf/issue-335a.out
... ... @@ -1003,21 +1003,4 @@ WARNING: issue-335a.pdf (trailer, offset 20601): unexpected )
1003 1003 WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string
1004 1004 WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring
1005 1005 WARNING: issue-335a.pdf (trailer, offset 20604): too many errors; giving up on reading object
1006   -WARNING: issue-335a.pdf (trailer, offset 20446): unknown token while reading object; treating as string
1007   -WARNING: issue-335a.pdf (trailer, offset 20601): unexpected )
1008   -WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string
1009   -WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring
1010   -WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null
1011   -WARNING: issue-335a.pdf (trailer, offset 20607): treating unexpected brace token as null
1012   -WARNING: issue-335a.pdf (trailer, offset 20607): too many errors; giving up on reading object
1013   -WARNING: issue-335a.pdf (trailer, offset 20598): unknown token while reading object; treating as string
1014   -WARNING: issue-335a.pdf (trailer, offset 20600): unexpected )
1015   -WARNING: issue-335a.pdf (trailer, offset 20601): unexpected )
1016   -WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string
1017   -WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring
1018   -WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null
1019   -WARNING: issue-335a.pdf (trailer, offset 20606): too many errors; giving up on reading object
1020   -WARNING: issue-335a.pdf (trailer, offset 20684): unknown token while reading object; treating as string
1021   -WARNING: issue-335a.pdf (trailer, offset 20683): expected dictionary key but found non-name object; inserting key /QPDFFake1
1022   -WARNING: issue-335a.pdf (trailer, offset 20747): stream keyword found in trailer
1023   -qpdf: issue-335a.pdf: unable to find /Root dictionary
  1006 +qpdf: issue-335a.pdf: too many errors while reconstructing cross-reference table
... ...