Commit b3c4b4dbab135643c06a074b6b13993ae1b9a741
1 parent
ad3bac2c
Refine xref table reconstruction
During xref table reconstruction ignore uncompressed object entries found in xref streams. The xref table gets populated with entries for the objects actually found in the file. The entries for uncompressed object in xref streams are redundant and potentially incorrect.
Showing
2 changed files
with
17 additions
and
10 deletions
include/qpdf/QPDF.hh
| @@ -765,15 +765,15 @@ class QPDF | @@ -765,15 +765,15 @@ class QPDF | ||
| 765 | void parse(char const* password); | 765 | void parse(char const* password); |
| 766 | void inParse(bool); | 766 | void inParse(bool); |
| 767 | void setTrailer(QPDFObjectHandle obj); | 767 | void setTrailer(QPDFObjectHandle obj); |
| 768 | - void read_xref(qpdf_offset_t offset); | 768 | + void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false); |
| 769 | bool resolveXRefTable(); | 769 | bool resolveXRefTable(); |
| 770 | void reconstruct_xref(QPDFExc& e, bool found_startxref = true); | 770 | void reconstruct_xref(QPDFExc& e, bool found_startxref = true); |
| 771 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); | 771 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); |
| 772 | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | 772 | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 773 | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); | 773 | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 774 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); | 774 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
| 775 | - qpdf_offset_t read_xrefStream(qpdf_offset_t offset); | ||
| 776 | - qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); | 775 | + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery=false); |
| 776 | + qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery=false); | ||
| 777 | std::pair<int, std::array<int, 3>> | 777 | std::pair<int, std::array<int, 3>> |
| 778 | processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); | 778 | processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); |
| 779 | int processXRefSize( | 779 | int processXRefSize( |
libqpdf/QPDF_objects.cc
| @@ -325,7 +325,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) | @@ -325,7 +325,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) | ||
| 325 | } | 325 | } |
| 326 | if (max_offset > 0) { | 326 | if (max_offset > 0) { |
| 327 | try { | 327 | try { |
| 328 | - read_xref(max_offset); | 328 | + read_xref(max_offset, true); |
| 329 | } catch (std::exception&) { | 329 | } catch (std::exception&) { |
| 330 | warn(damagedPDF( | 330 | warn(damagedPDF( |
| 331 | "", -1, "error decoding candidate xref stream while recovering damaged file")); | 331 | "", -1, "error decoding candidate xref stream while recovering damaged file")); |
| @@ -388,7 +388,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) | @@ -388,7 +388,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) | ||
| 388 | } | 388 | } |
| 389 | 389 | ||
| 390 | void | 390 | void |
| 391 | -QPDF::read_xref(qpdf_offset_t xref_offset) | 391 | +QPDF::read_xref(qpdf_offset_t xref_offset, bool in_stream_recovery) |
| 392 | { | 392 | { |
| 393 | std::map<int, int> free_table; | 393 | std::map<int, int> free_table; |
| 394 | std::set<qpdf_offset_t> visited; | 394 | std::set<qpdf_offset_t> visited; |
| @@ -440,7 +440,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -440,7 +440,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 440 | } | 440 | } |
| 441 | xref_offset = read_xrefTable(xref_offset + skip); | 441 | xref_offset = read_xrefTable(xref_offset + skip); |
| 442 | } else { | 442 | } else { |
| 443 | - xref_offset = read_xrefStream(xref_offset); | 443 | + xref_offset = read_xrefStream(xref_offset, in_stream_recovery); |
| 444 | } | 444 | } |
| 445 | if (visited.count(xref_offset) != 0) { | 445 | if (visited.count(xref_offset) != 0) { |
| 446 | QTC::TC("qpdf", "QPDF xref loop"); | 446 | QTC::TC("qpdf", "QPDF xref loop"); |
| @@ -759,7 +759,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -759,7 +759,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 759 | 759 | ||
| 760 | // Read a single cross-reference stream. | 760 | // Read a single cross-reference stream. |
| 761 | qpdf_offset_t | 761 | qpdf_offset_t |
| 762 | -QPDF::read_xrefStream(qpdf_offset_t xref_offset) | 762 | +QPDF::read_xrefStream(qpdf_offset_t xref_offset, bool in_stream_recovery) |
| 763 | { | 763 | { |
| 764 | if (!m->ignore_xref_streams) { | 764 | if (!m->ignore_xref_streams) { |
| 765 | QPDFObjGen x_og; | 765 | QPDFObjGen x_og; |
| @@ -772,7 +772,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) | @@ -772,7 +772,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) | ||
| 772 | } | 772 | } |
| 773 | if (xref_obj.isStreamOfType("/XRef")) { | 773 | if (xref_obj.isStreamOfType("/XRef")) { |
| 774 | QTC::TC("qpdf", "QPDF found xref stream"); | 774 | QTC::TC("qpdf", "QPDF found xref stream"); |
| 775 | - return processXRefStream(xref_offset, xref_obj); | 775 | + return processXRefStream(xref_offset, xref_obj, in_stream_recovery); |
| 776 | } | 776 | } |
| 777 | } | 777 | } |
| 778 | 778 | ||
| @@ -905,7 +905,8 @@ QPDF::processXRefIndex( | @@ -905,7 +905,8 @@ QPDF::processXRefIndex( | ||
| 905 | } | 905 | } |
| 906 | 906 | ||
| 907 | qpdf_offset_t | 907 | qpdf_offset_t |
| 908 | -QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | 908 | +QPDF::processXRefStream( |
| 909 | + qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj, bool in_stream_recovery) | ||
| 909 | { | 910 | { |
| 910 | auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc { | 911 | auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc { |
| 911 | return damagedPDF("xref stream", xref_offset, msg.data()); | 912 | return damagedPDF("xref stream", xref_offset, msg.data()); |
| @@ -971,7 +972,13 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -971,7 +972,13 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 971 | // objects. | 972 | // objects. |
| 972 | insertFreeXrefEntry(QPDFObjGen(obj, 0)); | 973 | insertFreeXrefEntry(QPDFObjGen(obj, 0)); |
| 973 | } else { | 974 | } else { |
| 974 | - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | 975 | + auto typ = toI(fields[0]); |
| 976 | + if (!in_stream_recovery || typ == 2) { | ||
| 977 | + // If we are in xref stream recovery all actual uncompressed objects have | ||
| 978 | + // already been inserted into the xref table. Avoid adding junk data into the | ||
| 979 | + // xref table. | ||
| 980 | + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | ||
| 981 | + } | ||
| 975 | } | 982 | } |
| 976 | ++obj; | 983 | ++obj; |
| 977 | } | 984 | } |