Commit 3427df9bb46f462b5ef895bee94cbbb775ec2708
Committed by
GitHub
Merge pull request #1451 from m-holger/fuzz
Refine xref table reconstruction
Showing
2 changed files
with
17 additions
and
10 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -765,15 +765,15 @@ class QPDF |
| 765 | 765 | void parse(char const* password); |
| 766 | 766 | void inParse(bool); |
| 767 | 767 | void setTrailer(QPDFObjectHandle obj); |
| 768 | - void read_xref(qpdf_offset_t offset); | |
| 768 | + void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false); | |
| 769 | 769 | bool resolveXRefTable(); |
| 770 | 770 | void reconstruct_xref(QPDFExc& e, bool found_startxref = true); |
| 771 | 771 | bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); |
| 772 | 772 | bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 773 | 773 | bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); |
| 774 | 774 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
| 775 | - qpdf_offset_t read_xrefStream(qpdf_offset_t offset); | |
| 776 | - qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); | |
| 775 | + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery=false); | |
| 776 | + qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery=false); | |
| 777 | 777 | std::pair<int, std::array<int, 3>> |
| 778 | 778 | processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); |
| 779 | 779 | int processXRefSize( | ... | ... |
libqpdf/QPDF_objects.cc
| ... | ... | @@ -325,7 +325,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 325 | 325 | } |
| 326 | 326 | if (max_offset > 0) { |
| 327 | 327 | try { |
| 328 | - read_xref(max_offset); | |
| 328 | + read_xref(max_offset, true); | |
| 329 | 329 | } catch (std::exception&) { |
| 330 | 330 | warn(damagedPDF( |
| 331 | 331 | "", -1, "error decoding candidate xref stream while recovering damaged file")); |
| ... | ... | @@ -388,7 +388,7 @@ QPDF::reconstruct_xref(QPDFExc& e, bool found_startxref) |
| 388 | 388 | } |
| 389 | 389 | |
| 390 | 390 | void |
| 391 | -QPDF::read_xref(qpdf_offset_t xref_offset) | |
| 391 | +QPDF::read_xref(qpdf_offset_t xref_offset, bool in_stream_recovery) | |
| 392 | 392 | { |
| 393 | 393 | std::map<int, int> free_table; |
| 394 | 394 | std::set<qpdf_offset_t> visited; |
| ... | ... | @@ -440,7 +440,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) |
| 440 | 440 | } |
| 441 | 441 | xref_offset = read_xrefTable(xref_offset + skip); |
| 442 | 442 | } else { |
| 443 | - xref_offset = read_xrefStream(xref_offset); | |
| 443 | + xref_offset = read_xrefStream(xref_offset, in_stream_recovery); | |
| 444 | 444 | } |
| 445 | 445 | if (visited.count(xref_offset) != 0) { |
| 446 | 446 | QTC::TC("qpdf", "QPDF xref loop"); |
| ... | ... | @@ -759,7 +759,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 759 | 759 | |
| 760 | 760 | // Read a single cross-reference stream. |
| 761 | 761 | qpdf_offset_t |
| 762 | -QPDF::read_xrefStream(qpdf_offset_t xref_offset) | |
| 762 | +QPDF::read_xrefStream(qpdf_offset_t xref_offset, bool in_stream_recovery) | |
| 763 | 763 | { |
| 764 | 764 | if (!m->ignore_xref_streams) { |
| 765 | 765 | QPDFObjGen x_og; |
| ... | ... | @@ -772,7 +772,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 772 | 772 | } |
| 773 | 773 | if (xref_obj.isStreamOfType("/XRef")) { |
| 774 | 774 | QTC::TC("qpdf", "QPDF found xref stream"); |
| 775 | - return processXRefStream(xref_offset, xref_obj); | |
| 775 | + return processXRefStream(xref_offset, xref_obj, in_stream_recovery); | |
| 776 | 776 | } |
| 777 | 777 | } |
| 778 | 778 | |
| ... | ... | @@ -905,7 +905,8 @@ QPDF::processXRefIndex( |
| 905 | 905 | } |
| 906 | 906 | |
| 907 | 907 | qpdf_offset_t |
| 908 | -QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | |
| 908 | +QPDF::processXRefStream( | |
| 909 | + qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj, bool in_stream_recovery) | |
| 909 | 910 | { |
| 910 | 911 | auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc { |
| 911 | 912 | return damagedPDF("xref stream", xref_offset, msg.data()); |
| ... | ... | @@ -971,7 +972,13 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) |
| 971 | 972 | // objects. |
| 972 | 973 | insertFreeXrefEntry(QPDFObjGen(obj, 0)); |
| 973 | 974 | } else { |
| 974 | - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | |
| 975 | + auto typ = toI(fields[0]); | |
| 976 | + if (!in_stream_recovery || typ == 2) { | |
| 977 | + // If we are in xref stream recovery all actual uncompressed objects have | |
| 978 | + // already been inserted into the xref table. Avoid adding junk data into the | |
| 979 | + // xref table. | |
| 980 | + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | |
| 981 | + } | |
| 975 | 982 | } |
| 976 | 983 | ++obj; |
| 977 | 984 | } | ... | ... |