Commit 186fca6d8d21dbc4f7ca70c6abfee7dde0e59853
1 parent
f4e468b1
Add further sanity checks to QPDF::reconstruct_xref
Run getAllPages as sanity check and throw an exception if too many warnings are generated or no pages are found.
Showing
1 changed file
with
14 additions
and
9 deletions
libqpdf/QPDF.cc
| ... | ... | @@ -546,6 +546,11 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 546 | 546 | // If recovery generates more than 1000 warnings, the file is so severely damaged that there |
| 547 | 547 | // probably is no point trying to continue. |
| 548 | 548 | const auto max_warnings = m->warnings.size() + 1000U; |
| 549 | + auto check_warnings = [this, max_warnings]() { | |
| 550 | + if (m->warnings.size() > max_warnings) { | |
| 551 | + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table"); | |
| 552 | + } | |
| 553 | + }; | |
| 549 | 554 | |
| 550 | 555 | m->reconstructed_xref = true; |
| 551 | 556 | // We may find more objects, which may contain dangling references. |
| ... | ... | @@ -600,9 +605,7 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 600 | 605 | setTrailer(t); |
| 601 | 606 | } |
| 602 | 607 | } |
| 603 | - if (m->warnings.size() > max_warnings) { | |
| 604 | - throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table"); | |
| 605 | - } | |
| 608 | + check_warnings(); | |
| 606 | 609 | m->file->seek(next_line_start, SEEK_SET); |
| 607 | 610 | line_start = next_line_start; |
| 608 | 611 | } |
| ... | ... | @@ -629,10 +632,7 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 629 | 632 | max_offset = offset; |
| 630 | 633 | setTrailer(oh.getDict()); |
| 631 | 634 | } |
| 632 | - if (m->warnings.size() > max_warnings) { | |
| 633 | - throw damagedPDF( | |
| 634 | - "", 0, "too many errors while reconstructing cross-reference table"); | |
| 635 | - } | |
| 635 | + check_warnings(); | |
| 636 | 636 | } |
| 637 | 637 | if (max_offset > 0) { |
| 638 | 638 | try { |
| ... | ... | @@ -657,8 +657,13 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 657 | 657 | // creating QPDF objects from JSON. |
| 658 | 658 | throw damagedPDF("", 0, "unable to find objects while recovering damaged file"); |
| 659 | 659 | } |
| 660 | - if (m->warnings.size() > max_warnings) { | |
| 661 | - throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table"); | |
| 660 | + check_warnings(); | |
| 661 | + if (!m->parsed) { | |
| 662 | + getAllPages(); | |
| 663 | + check_warnings(); | |
| 664 | + if (m->all_pages.empty()) { | |
| 665 | + throw damagedPDF("", 0, "unable to find any pages while recovering damaged file"); | |
| 666 | + } | |
| 662 | 667 | } |
| 663 | 668 | // We could iterate through the objects looking for streams and try to find objects inside of |
| 664 | 669 | // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors | ... | ... |