Commit 3427df9bb46f462b5ef895bee94cbbb775ec2708

Authored by m-holger
Committed by GitHub
2 parents e637d843 b3c4b4db

Merge pull request #1451 from m-holger/fuzz

Refine xref table reconstruction
include/qpdf/QPDF.hh
@@ -765,15 +765,15 @@ class QPDF @@ -765,15 +765,15 @@ class QPDF
765 void parse(char const* password); 765 void parse(char const* password);
766 void inParse(bool); 766 void inParse(bool);
767 void setTrailer(QPDFObjectHandle obj); 767 void setTrailer(QPDFObjectHandle obj);
768 - void read_xref(qpdf_offset_t offset); 768 + void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false);
769 bool resolveXRefTable(); 769 bool resolveXRefTable();
770 void reconstruct_xref(QPDFExc& e, bool found_startxref = true); 770 void reconstruct_xref(QPDFExc& e, bool found_startxref = true);
771 bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); 771 bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
772 bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); 772 bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
773 bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type); 773 bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
774 qpdf_offset_t read_xrefTable(qpdf_offset_t offset); 774 qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
775 - qpdf_offset_t read_xrefStream(qpdf_offset_t offset);  
776 - qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); 775 + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery=false);
  776 + qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery=false);
777 std::pair<int, std::array<int, 3>> 777 std::pair<int, std::array<int, 3>>
778 processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged); 778 processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
779 int processXRefSize( 779 int processXRefSize(
libqpdf/QPDF_objects.cc
@@ -325,7 +325,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -325,7 +325,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
325 } 325 }
326 if (max_offset > 0) { 326 if (max_offset > 0) {
327 try { 327 try {
328 - read_xref(max_offset); 328 + read_xref(max_offset, true);
329 } catch (std::exception&) { 329 } catch (std::exception&) {
330 warn(damagedPDF( 330 warn(damagedPDF(
331 "", -1, "error decoding candidate xref stream while recovering damaged file")); 331 "", -1, "error decoding candidate xref stream while recovering damaged file"));
@@ -388,7 +388,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref) @@ -388,7 +388,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
388 } 388 }
389 389
390 void 390 void
391 -QPDF::read_xref(qpdf_offset_t xref_offset) 391 +QPDF::read_xref(qpdf_offset_t xref_offset, bool in_stream_recovery)
392 { 392 {
393 std::map<int, int> free_table; 393 std::map<int, int> free_table;
394 std::set<qpdf_offset_t> visited; 394 std::set<qpdf_offset_t> visited;
@@ -440,7 +440,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -440,7 +440,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
440 } 440 }
441 xref_offset = read_xrefTable(xref_offset + skip); 441 xref_offset = read_xrefTable(xref_offset + skip);
442 } else { 442 } else {
443 - xref_offset = read_xrefStream(xref_offset); 443 + xref_offset = read_xrefStream(xref_offset, in_stream_recovery);
444 } 444 }
445 if (visited.count(xref_offset) != 0) { 445 if (visited.count(xref_offset) != 0) {
446 QTC::TC("qpdf", "QPDF xref loop"); 446 QTC::TC("qpdf", "QPDF xref loop");
@@ -759,7 +759,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -759,7 +759,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
759 759
760 // Read a single cross-reference stream. 760 // Read a single cross-reference stream.
761 qpdf_offset_t 761 qpdf_offset_t
762 -QPDF::read_xrefStream(qpdf_offset_t xref_offset) 762 +QPDF::read_xrefStream(qpdf_offset_t xref_offset, bool in_stream_recovery)
763 { 763 {
764 if (!m->ignore_xref_streams) { 764 if (!m->ignore_xref_streams) {
765 QPDFObjGen x_og; 765 QPDFObjGen x_og;
@@ -772,7 +772,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) @@ -772,7 +772,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
772 } 772 }
773 if (xref_obj.isStreamOfType("/XRef")) { 773 if (xref_obj.isStreamOfType("/XRef")) {
774 QTC::TC("qpdf", "QPDF found xref stream"); 774 QTC::TC("qpdf", "QPDF found xref stream");
775 - return processXRefStream(xref_offset, xref_obj); 775 + return processXRefStream(xref_offset, xref_obj, in_stream_recovery);
776 } 776 }
777 } 777 }
778 778
@@ -905,7 +905,8 @@ QPDF::processXRefIndex( @@ -905,7 +905,8 @@ QPDF::processXRefIndex(
905 } 905 }
906 906
907 qpdf_offset_t 907 qpdf_offset_t
908 -QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) 908 +QPDF::processXRefStream(
  909 + qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj, bool in_stream_recovery)
909 { 910 {
910 auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc { 911 auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
911 return damagedPDF("xref stream", xref_offset, msg.data()); 912 return damagedPDF("xref stream", xref_offset, msg.data());
@@ -971,7 +972,13 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -971,7 +972,13 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
971 // objects. 972 // objects.
972 insertFreeXrefEntry(QPDFObjGen(obj, 0)); 973 insertFreeXrefEntry(QPDFObjGen(obj, 0));
973 } else { 974 } else {
974 - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); 975 + auto typ = toI(fields[0]);
  976 + if (!in_stream_recovery || typ == 2) {
  977 + // If we are in xref stream recovery all actual uncompressed objects have
  978 + // already been inserted into the xref table. Avoid adding junk data into the
  979 + // xref table.
  980 + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
  981 + }
975 } 982 }
976 ++obj; 983 ++obj;
977 } 984 }