Commit b3c4b4dbab135643c06a074b6b13993ae1b9a741
1 parent
ad3bac2c
Refine xref table reconstruction
During xref table reconstruction ignore uncompressed object entries found in xref streams. The xref table gets populated with entries for the objects actually found in the file. The entries for uncompressed object in xref streams are redundant and potentially incorrect.
Showing
2 changed files
with
17 additions
and
10 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -765,15 +765,15 @@ class QPDF |
| 765 | 765 | void parse(char const* password); |
| 766 | 766 | void inParse(bool); |
| 767 | 767 | void setTrailer(QPDFObjectHandle obj); |
| 768 | - void read_xref(qpdf_offset_t offset); | |
| 768 | + void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false); | |
| 769 | 769 | bool resolveXRefTable(); |
| 770 | 770 | void reconstruct_xref(QPDFExc& e, bool found_startxref = true); |
| 771 | 771 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); |
| 772 | 772 | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 773 | 773 | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 774 | 774 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
| 775 | - qpdf_offset_t read_xrefStream(qpdf_offset_t offset); | |
| 776 | - qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); | |
| 775 | + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery=false); | |
| 776 | + qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery=false); | |
| 777 | 777 | std::pair<int, std::array<int, 3>> |
| 778 | 778 | processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); |
| 779 | 779 | int processXRefSize( | ... | ... |
libqpdf/QPDF_objects.cc
| ... | ... | @@ -325,7 +325,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 325 | 325 | } |
| 326 | 326 | if (max_offset > 0) { |
| 327 | 327 | try { |
| 328 | - read_xref(max_offset); | |
| 328 | + read_xref(max_offset, true); | |
| 329 | 329 | } catch (std::exception&) { |
| 330 | 330 | warn(damagedPDF( |
| 331 | 331 | "", -1, "error decoding candidate xref stream while recovering damaged file")); |
| ... | ... | @@ -388,7 +388,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 388 | 388 | } |
| 389 | 389 | |
| 390 | 390 | void |
| 391 | -QPDF::read_xref(qpdf_offset_t xref_offset) | |
| 391 | +QPDF::read_xref(qpdf_offset_t xref_offset, bool in_stream_recovery) | |
| 392 | 392 | { |
| 393 | 393 | std::map<int, int> free_table; |
| 394 | 394 | std::set<qpdf_offset_t> visited; |
| ... | ... | @@ -440,7 +440,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) |
| 440 | 440 | } |
| 441 | 441 | xref_offset = read_xrefTable(xref_offset + skip); |
| 442 | 442 | } else { |
| 443 | - xref_offset = read_xrefStream(xref_offset); | |
| 443 | + xref_offset = read_xrefStream(xref_offset, in_stream_recovery); | |
| 444 | 444 | } |
| 445 | 445 | if (visited.count(xref_offset) != 0) { |
| 446 | 446 | QTC::TC("qpdf", "QPDF xref loop"); |
| ... | ... | @@ -759,7 +759,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 759 | 759 | |
| 760 | 760 | // Read a single cross-reference stream. |
| 761 | 761 | qpdf_offset_t |
| 762 | -QPDF::read_xrefStream(qpdf_offset_t xref_offset) | |
| 762 | +QPDF::read_xrefStream(qpdf_offset_t xref_offset, bool in_stream_recovery) | |
| 763 | 763 | { |
| 764 | 764 | if (!m->ignore_xref_streams) { |
| 765 | 765 | QPDFObjGen x_og; |
| ... | ... | @@ -772,7 +772,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 772 | 772 | } |
| 773 | 773 | if (xref_obj.isStreamOfType("/XRef")) { |
| 774 | 774 | QTC::TC("qpdf", "QPDF found xref stream"); |
| 775 | - return processXRefStream(xref_offset, xref_obj); | |
| 775 | + return processXRefStream(xref_offset, xref_obj, in_stream_recovery); | |
| 776 | 776 | } |
| 777 | 777 | } |
| 778 | 778 | |
| ... | ... | @@ -905,7 +905,8 @@ QPDF::processXRefIndex( |
| 905 | 905 | } |
| 906 | 906 | |
| 907 | 907 | qpdf_offset_t |
| 908 | -QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | |
| 908 | +QPDF::processXRefStream( | |
| 909 | + qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj, bool in_stream_recovery) | |
| 909 | 910 | { |
| 910 | 911 | auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc { |
| 911 | 912 | return damagedPDF("xref stream", xref_offset, msg.data()); |
| ... | ... | @@ -971,7 +972,13 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) |
| 971 | 972 | // objects. |
| 972 | 973 | insertFreeXrefEntry(QPDFObjGen(obj, 0)); |
| 973 | 974 | } else { |
| 974 | - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | |
| 975 | + auto typ = toI(fields[0]); | |
| 976 | + if (!in_stream_recovery || typ == 2) { | |
| 977 | + // If we are in xref stream recovery all actual uncompressed objects have | |
| 978 | + // already been inserted into the xref table. Avoid adding junk data into the | |
| 979 | + // xref table. | |
| 980 | + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | |
| 981 | + } | |
| 975 | 982 | } |
| 976 | 983 | ++obj; |
| 977 | 984 | } | ... | ... |