Commit 186fca6d8d21dbc4f7ca70c6abfee7dde0e59853

Authored by m-holger
1 parent f4e468b1

Add further sanity checks to QPDF::reconstruct_xref

Run getAllPages as sanity check and throw an exception if too many
warnings are generated or no pages are found.
Showing 1 changed file with 14 additions and 9 deletions
libqpdf/QPDF.cc
... ... @@ -546,6 +546,11 @@ QPDF::reconstruct_xref(QPDFExc& e)
546 546 // If recovery generates more than 1000 warnings, the file is so severely damaged that there
547 547 // probably is no point trying to continue.
548 548 const auto max_warnings = m->warnings.size() + 1000U;
  549 + auto check_warnings = [this, max_warnings]() {
  550 + if (m->warnings.size() > max_warnings) {
  551 + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
  552 + }
  553 + };
549 554  
550 555 m->reconstructed_xref = true;
551 556 // We may find more objects, which may contain dangling references.
... ... @@ -600,9 +605,7 @@ QPDF::reconstruct_xref(QPDFExc& e)
600 605 setTrailer(t);
601 606 }
602 607 }
603   - if (m->warnings.size() > max_warnings) {
604   - throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
605   - }
  608 + check_warnings();
606 609 m->file->seek(next_line_start, SEEK_SET);
607 610 line_start = next_line_start;
608 611 }
... ... @@ -629,10 +632,7 @@ QPDF::reconstruct_xref(QPDFExc& e)
629 632 max_offset = offset;
630 633 setTrailer(oh.getDict());
631 634 }
632   - if (m->warnings.size() > max_warnings) {
633   - throw damagedPDF(
634   - "", 0, "too many errors while reconstructing cross-reference table");
635   - }
  635 + check_warnings();
636 636 }
637 637 if (max_offset > 0) {
638 638 try {
... ... @@ -657,8 +657,13 @@ QPDF::reconstruct_xref(QPDFExc& e)
657 657 // creating QPDF objects from JSON.
658 658 throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
659 659 }
660   - if (m->warnings.size() > max_warnings) {
661   - throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
  660 + check_warnings();
  661 + if (!m->parsed) {
  662 + getAllPages();
  663 + check_warnings();
  664 + if (m->all_pages.empty()) {
  665 + throw damagedPDF("", 0, "unable to find any pages while recovering damaged file");
  666 + }
662 667 }
663 668 // We could iterate through the objects looking for streams and try to find objects inside of
664 669 // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
... ...