Commit 9740930b2de30249cb94dd1cc1044ecc2e88095c
Committed by
GitHub
Merge pull request #1392 from m-holger/i1335
Refine recovery from missing startxref (fixes #1335)
Showing
4 changed files
with
29 additions
and
4 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -762,7 +762,7 @@ class QPDF |
| 762 | 762 | void setTrailer(QPDFObjectHandle obj); |
| 763 | 763 | void read_xref(qpdf_offset_t offset); |
| 764 | 764 | bool resolveXRefTable(); |
| 765 | - void reconstruct_xref(QPDFExc& e); | |
| 765 | + void reconstruct_xref(QPDFExc& e, bool found_startxref = true); | |
| 766 | 766 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); |
| 767 | 767 | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 768 | 768 | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -471,7 +471,7 @@ QPDF::parse(char const* password) |
| 471 | 471 | } |
| 472 | 472 | } catch (QPDFExc& e) { |
| 473 | 473 | if (m->attempt_recovery) { |
| 474 | - reconstruct_xref(e); | |
| 474 | + reconstruct_xref(e, xref_offset > 0); | |
| 475 | 475 | QTC::TC("qpdf", "QPDF reconstructed xref table"); |
| 476 | 476 | } else { |
| 477 | 477 | throw; |
| ... | ... | @@ -531,7 +531,7 @@ QPDF::setTrailer(QPDFObjectHandle obj) |
| 531 | 531 | } |
| 532 | 532 | |
| 533 | 533 | void |
| 534 | -QPDF::reconstruct_xref(QPDFExc& e) | |
| 534 | +QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) | |
| 535 | 535 | { |
| 536 | 536 | if (m->reconstructed_xref) { |
| 537 | 537 | // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because |
| ... | ... | @@ -569,6 +569,7 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 569 | 569 | |
| 570 | 570 | std::vector<std::tuple<int, int, qpdf_offset_t>> found_objects; |
| 571 | 571 | std::vector<qpdf_offset_t> trailers; |
| 572 | + std::vector<qpdf_offset_t> startxrefs; | |
| 572 | 573 | |
| 573 | 574 | m->file->seek(0, SEEK_END); |
| 574 | 575 | qpdf_offset_t eof = m->file->tell(); |
| ... | ... | @@ -594,11 +595,34 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 594 | 595 | m->file->seek(pos, SEEK_SET); |
| 595 | 596 | } else if (!m->trailer && t1.isWord("trailer")) { |
| 596 | 597 | trailers.emplace_back(m->file->tell()); |
| 598 | + } else if (!found_startxref && t1.isWord("startxref")) { | |
| 599 | + startxrefs.emplace_back(m->file->tell()); | |
| 597 | 600 | } |
| 598 | 601 | check_warnings(); |
| 599 | 602 | m->file->findAndSkipNextEOL(); |
| 600 | 603 | } |
| 601 | 604 | |
| 605 | + if (!found_startxref && !startxrefs.empty() && !found_objects.empty() && | |
| 606 | + startxrefs.back() > std::get<2>(found_objects.back())) { | |
| 607 | + try { | |
| 608 | + m->file->seek(startxrefs.back(), SEEK_SET); | |
| 609 | + if (auto offset = QUtil::string_to_ll(readToken(*m->file).getValue().data())) { | |
| 610 | + read_xref(offset); | |
| 611 | + if (getRoot().getKey("/Pages").isDictionary()) { | |
| 612 | + QTC::TC("qpdf", "QPDF startxref more than 1024 before end"); | |
| 613 | + warn( | |
| 614 | + damagedPDF("", 0, "startxref was more than 1024 bytes before end of file")); | |
| 615 | + initializeEncryption(); | |
| 616 | + m->parsed = true; | |
| 617 | + m->reconstructed_xref = false; | |
| 618 | + return; | |
| 619 | + } | |
| 620 | + } | |
| 621 | + } catch (...) { | |
| 622 | + // ok, bad luck. Do recovery. | |
| 623 | + } | |
| 624 | + } | |
| 625 | + | |
| 602 | 626 | auto rend = found_objects.rend(); |
| 603 | 627 | for (auto it = found_objects.rbegin(); it != rend; it++) { |
| 604 | 628 | auto [obj, gen, token_start] = *it; | ... | ... |
qpdf/qpdf.testcov
qpdf/qtest/qpdf/recover-xref-stream.out
| 1 | 1 | WARNING: recover-xref-stream.pdf: file is damaged |
| 2 | 2 | WARNING: recover-xref-stream.pdf: can't find startxref |
| 3 | 3 | WARNING: recover-xref-stream.pdf: Attempting to reconstruct cross-reference table |
| 4 | -WARNING: recover-xref-stream.pdf: reported number of objects (14) is not one plus the highest object number (15) | |
| 4 | +WARNING: recover-xref-stream.pdf: startxref was more than 1024 bytes before end of file | |
| 5 | 5 | qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |