Commit 7927241dcd0e7b5350b9b3e56eabbf5ebf34c378
1 parent
09d970e2
Refine recovery from missing startxref (fixes #1335)
If startxref cannot be found in the last 1024 try finding it in the whole file and check whether it is valid.
Showing
4 changed files
with
29 additions
and
4 deletions
include/qpdf/QPDF.hh
| @@ -762,7 +762,7 @@ class QPDF | @@ -762,7 +762,7 @@ class QPDF | ||
| 762 | void setTrailer(QPDFObjectHandle obj); | 762 | void setTrailer(QPDFObjectHandle obj); |
| 763 | void read_xref(qpdf_offset_t offset); | 763 | void read_xref(qpdf_offset_t offset); |
| 764 | bool resolveXRefTable(); | 764 | bool resolveXRefTable(); |
| 765 | - void reconstruct_xref(QPDFExc& e); | 765 | + void reconstruct_xref(QPDFExc& e, bool found_startxref = true); |
| 766 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); | 766 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); |
| 767 | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | 767 | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 768 | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | 768 | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
libqpdf/QPDF.cc
| @@ -470,7 +470,7 @@ QPDF::parse(char const* password) | @@ -470,7 +470,7 @@ QPDF::parse(char const* password) | ||
| 470 | } | 470 | } |
| 471 | } catch (QPDFExc& e) { | 471 | } catch (QPDFExc& e) { |
| 472 | if (m->attempt_recovery) { | 472 | if (m->attempt_recovery) { |
| 473 | - reconstruct_xref(e); | 473 | + reconstruct_xref(e, xref_offset > 0); |
| 474 | QTC::TC("qpdf", "QPDF reconstructed xref table"); | 474 | QTC::TC("qpdf", "QPDF reconstructed xref table"); |
| 475 | } else { | 475 | } else { |
| 476 | throw; | 476 | throw; |
| @@ -530,7 +530,7 @@ QPDF::setTrailer(QPDFObjectHandle obj) | @@ -530,7 +530,7 @@ QPDF::setTrailer(QPDFObjectHandle obj) | ||
| 530 | } | 530 | } |
| 531 | 531 | ||
| 532 | void | 532 | void |
| 533 | -QPDF::reconstruct_xref(QPDFExc& e) | 533 | +QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 534 | { | 534 | { |
| 535 | if (m->reconstructed_xref) { | 535 | if (m->reconstructed_xref) { |
| 536 | // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because | 536 | // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because |
| @@ -568,6 +568,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -568,6 +568,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 568 | 568 | ||
| 569 | std::vector<std::tuple<int, int, qpdf_offset_t>> found_objects; | 569 | std::vector<std::tuple<int, int, qpdf_offset_t>> found_objects; |
| 570 | std::vector<qpdf_offset_t> trailers; | 570 | std::vector<qpdf_offset_t> trailers; |
| 571 | + std::vector<qpdf_offset_t> startxrefs; | ||
| 571 | 572 | ||
| 572 | m->file->seek(0, SEEK_END); | 573 | m->file->seek(0, SEEK_END); |
| 573 | qpdf_offset_t eof = m->file->tell(); | 574 | qpdf_offset_t eof = m->file->tell(); |
| @@ -593,11 +594,34 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -593,11 +594,34 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 593 | m->file->seek(pos, SEEK_SET); | 594 | m->file->seek(pos, SEEK_SET); |
| 594 | } else if (!m->trailer && t1.isWord("trailer")) { | 595 | } else if (!m->trailer && t1.isWord("trailer")) { |
| 595 | trailers.emplace_back(m->file->tell()); | 596 | trailers.emplace_back(m->file->tell()); |
| 597 | + } else if (!found_startxref && t1.isWord("startxref")) { | ||
| 598 | + startxrefs.emplace_back(m->file->tell()); | ||
| 596 | } | 599 | } |
| 597 | check_warnings(); | 600 | check_warnings(); |
| 598 | m->file->findAndSkipNextEOL(); | 601 | m->file->findAndSkipNextEOL(); |
| 599 | } | 602 | } |
| 600 | 603 | ||
| 604 | + if (!found_startxref && !startxrefs.empty() && !found_objects.empty() && | ||
| 605 | + startxrefs.back() > std::get<2>(found_objects.back())) { | ||
| 606 | + try { | ||
| 607 | + m->file->seek(startxrefs.back(), SEEK_SET); | ||
| 608 | + if (auto offset = QUtil::string_to_ll(readToken(*m->file).getValue().data())) { | ||
| 609 | + read_xref(offset); | ||
| 610 | + if (getRoot().getKey("/Pages").isDictionary()) { | ||
| 611 | + QTC::TC("qpdf", "QPDF startxref more than 1024 before end"); | ||
| 612 | + warn( | ||
| 613 | + damagedPDF("", 0, "startxref was more than 1024 bytes before end of file")); | ||
| 614 | + initializeEncryption(); | ||
| 615 | + m->parsed = true; | ||
| 616 | + m->reconstructed_xref = false; | ||
| 617 | + return; | ||
| 618 | + } | ||
| 619 | + } | ||
| 620 | + } catch (...) { | ||
| 621 | + // ok, bad luck. Do recovery. | ||
| 622 | + } | ||
| 623 | + } | ||
| 624 | + | ||
| 601 | auto rend = found_objects.rend(); | 625 | auto rend = found_objects.rend(); |
| 602 | for (auto it = found_objects.rbegin(); it != rend; it++) { | 626 | for (auto it = found_objects.rbegin(); it != rend; it++) { |
| 603 | auto [obj, gen, token_start] = *it; | 627 | auto [obj, gen, token_start] = *it; |
qpdf/qpdf.testcov
| @@ -53,6 +53,7 @@ QPDF xref gen > 0 1 | @@ -53,6 +53,7 @@ QPDF xref gen > 0 1 | ||
| 53 | QPDF xref size mismatch 0 | 53 | QPDF xref size mismatch 0 |
| 54 | QPDF not a pdf file 0 | 54 | QPDF not a pdf file 0 |
| 55 | QPDF can't find startxref 0 | 55 | QPDF can't find startxref 0 |
| 56 | +QPDF startxref more than 1024 before end 0 | ||
| 56 | QPDF invalid xref 0 | 57 | QPDF invalid xref 0 |
| 57 | QPDF invalid xref entry 0 | 58 | QPDF invalid xref entry 0 |
| 58 | QPDF missing trailer 0 | 59 | QPDF missing trailer 0 |
qpdf/qtest/qpdf/recover-xref-stream.out
| 1 | WARNING: recover-xref-stream.pdf: file is damaged | 1 | WARNING: recover-xref-stream.pdf: file is damaged |
| 2 | WARNING: recover-xref-stream.pdf: can't find startxref | 2 | WARNING: recover-xref-stream.pdf: can't find startxref |
| 3 | WARNING: recover-xref-stream.pdf: Attempting to reconstruct cross-reference table | 3 | WARNING: recover-xref-stream.pdf: Attempting to reconstruct cross-reference table |
| 4 | -WARNING: recover-xref-stream.pdf: reported number of objects (14) is not one plus the highest object number (15) | 4 | +WARNING: recover-xref-stream.pdf: startxref was more than 1024 bytes before end of file |
| 5 | qpdf: operation succeeded with warnings; resulting file may have some problems | 5 | qpdf: operation succeeded with warnings; resulting file may have some problems |