Commit 3427df9bb46f462b5ef895bee94cbbb775ec2708

Authored by m-holger
Committed by GitHub
2 parents e637d843 b3c4b4db

Merge pull request #1451 from m-holger/fuzz

Refine xref table reconstruction
include/qpdf/QPDF.hh
... ... @@ -765,15 +765,15 @@ class QPDF
765 765 void parse(char const* password);
766 766 void inParse(bool);
767 767 void setTrailer(QPDFObjectHandle obj);
768   - void read_xref(qpdf_offset_t offset);
  768 + void read_xref(qpdf_offset_t offset, bool in_stream_recovery = false);
769 769 bool resolveXRefTable();
770 770 void reconstruct_xref(QPDFExc& e, bool found_startxref = true);
771 771 bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
772 772 bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
773 773 bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
774 774 qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
775   - qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
776   - qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
  775 + qpdf_offset_t read_xrefStream(qpdf_offset_t offset, bool in_stream_recovery=false);
  776 + qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream, bool in_stream_recovery=false);
777 777 std::pair<int, std::array<int, 3>>
778 778 processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
779 779 int processXRefSize(
... ...
libqpdf/QPDF_objects.cc
... ... @@ -325,7 +325,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
325 325 }
326 326 if (max_offset > 0) {
327 327 try {
328   - read_xref(max_offset);
  328 + read_xref(max_offset, true);
329 329 } catch (std::exception&) {
330 330 warn(damagedPDF(
331 331 "", -1, "error decoding candidate xref stream while recovering damaged file"));
... ... @@ -388,7 +388,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e, bool found_startxref)
388 388 }
389 389  
390 390 void
391   -QPDF::read_xref(qpdf_offset_t xref_offset)
  391 +QPDF::read_xref(qpdf_offset_t xref_offset, bool in_stream_recovery)
392 392 {
393 393 std::map<int, int> free_table;
394 394 std::set<qpdf_offset_t> visited;
... ... @@ -440,7 +440,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
440 440 }
441 441 xref_offset = read_xrefTable(xref_offset + skip);
442 442 } else {
443   - xref_offset = read_xrefStream(xref_offset);
  443 + xref_offset = read_xrefStream(xref_offset, in_stream_recovery);
444 444 }
445 445 if (visited.count(xref_offset) != 0) {
446 446 QTC::TC("qpdf", "QPDF xref loop");
... ... @@ -759,7 +759,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
759 759  
760 760 // Read a single cross-reference stream.
761 761 qpdf_offset_t
762   -QPDF::read_xrefStream(qpdf_offset_t xref_offset)
  762 +QPDF::read_xrefStream(qpdf_offset_t xref_offset, bool in_stream_recovery)
763 763 {
764 764 if (!m->ignore_xref_streams) {
765 765 QPDFObjGen x_og;
... ... @@ -772,7 +772,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
772 772 }
773 773 if (xref_obj.isStreamOfType("/XRef")) {
774 774 QTC::TC("qpdf", "QPDF found xref stream");
775   - return processXRefStream(xref_offset, xref_obj);
  775 + return processXRefStream(xref_offset, xref_obj, in_stream_recovery);
776 776 }
777 777 }
778 778  
... ... @@ -905,7 +905,8 @@ QPDF::processXRefIndex(
905 905 }
906 906  
907 907 qpdf_offset_t
908   -QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
  908 +QPDF::processXRefStream(
  909 + qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj, bool in_stream_recovery)
909 910 {
910 911 auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
911 912 return damagedPDF("xref stream", xref_offset, msg.data());
... ... @@ -971,7 +972,13 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
971 972 // objects.
972 973 insertFreeXrefEntry(QPDFObjGen(obj, 0));
973 974 } else {
974   - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
  975 + auto typ = toI(fields[0]);
  976 + if (!in_stream_recovery || typ == 2) {
  977 + // If we are in xref stream recovery all actual uncompressed objects have
  978 + // already been inserted into the xref table. Avoid adding junk data into the
  979 + // xref table.
  980 + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
  981 + }
975 982 }
976 983 ++obj;
977 984 }
... ...