Commit ddd78ac7c62d4494e7d6098f395f64bdf5950b00

Authored by Jay Berkenbilt
Committed by GitHub
2 parents e51a1948 098b98c8

Merge pull request #1002 from m-holger/ixe

 Add new private method QPDF::insertReconstructedXrefEntry
include/qpdf/QPDF.hh
... ... @@ -1002,7 +1002,9 @@ class QPDF
1002 1002 qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
1003 1003 qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
1004 1004 qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
1005   - void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite = false);
  1005 + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);
  1006 + void insertFreeXrefEntry(QPDFObjGen);
  1007 + void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2);
1006 1008 void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
1007 1009 QPDFObjectHandle readObject(
1008 1010 std::shared_ptr<InputSource>,
... ...
libqpdf/QPDF.cc
... ... @@ -564,7 +564,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
564 564 if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
565 565 int obj = QUtil::string_to_int(t1.getValue().c_str());
566 566 int gen = QUtil::string_to_int(t2.getValue().c_str());
567   - insertXrefEntry(obj, 1, token_start, gen, true);
  567 + insertReconstructedXrefEntry(obj, token_start, gen);
568 568 }
569 569 } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) {
570 570 QPDFObjectHandle t = readObject(m->file, "trailer", QPDFObjGen(), false);
... ... @@ -577,6 +577,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
577 577 m->file->seek(next_line_start, SEEK_SET);
578 578 line_start = next_line_start;
579 579 }
  580 + m->deleted_objects.clear();
580 581  
581 582 if (!m->trailer.isInitialized()) {
582 583 // We could check the last encountered object to see if it was an xref stream. If so, we
... ... @@ -889,7 +890,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
889 890  
890 891 // Handle any deleted items now that we've read the /XRefStm.
891 892 for (auto const& og: deleted_items) {
892   - insertXrefEntry(og.getObj(), 0, 0, og.getGen());
  893 + insertFreeXrefEntry(og);
893 894 }
894 895  
895 896 if (cur_trailer.hasKey("/Prev")) {
... ... @@ -909,7 +910,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
909 910 qpdf_offset_t
910 911 QPDF::read_xrefStream(qpdf_offset_t xref_offset)
911 912 {
912   - bool found = false;
913 913 if (!m->ignore_xref_streams) {
914 914 QPDFObjGen x_og;
915 915 QPDFObjectHandle xref_obj;
... ... @@ -921,17 +921,13 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
921 921 }
922 922 if (xref_obj.isStreamOfType("/XRef")) {
923 923 QTC::TC("qpdf", "QPDF found xref stream");
924   - found = true;
925   - xref_offset = processXRefStream(xref_offset, xref_obj);
  924 + return processXRefStream(xref_offset, xref_obj);
926 925 }
927 926 }
928 927  
929   - if (!found) {
930   - QTC::TC("qpdf", "QPDF can't find xref");
931   - throw damagedPDF("", xref_offset, "xref not found");
932   - }
933   -
934   - return xref_offset;
  928 + QTC::TC("qpdf", "QPDF can't find xref");
  929 + throw damagedPDF("", xref_offset, "xref not found");
  930 + return 0; // unreachable
935 931 }
936 932  
937 933 qpdf_offset_t
... ... @@ -1087,9 +1083,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1087 1083 if (fields[0] == 0) {
1088 1084 // Ignore fields[2], which we don't care about in this case. This works around the issue
1089 1085 // of some PDF files that put invalid values, like -1, here for deleted objects.
1090   - fields[2] = 0;
  1086 + insertFreeXrefEntry(QPDFObjGen(obj, 0));
  1087 + } else {
  1088 + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
1091 1089 }
1092   - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
1093 1090 }
1094 1091  
1095 1092 if (!m->trailer.isInitialized()) {
... ... @@ -1111,47 +1108,35 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1111 1108 }
1112 1109  
1113 1110 void
1114   -QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite)
  1111 +QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2)
1115 1112 {
1116 1113 // Populate the xref table in such a way that the first reference to an object that we see,
1117 1114 // which is the one in the latest xref table in which it appears, is the one that gets stored.
1118   - // This works because we are reading more recent appends before older ones. Exception: if
1119   - // overwrite is true, then replace any existing object. This is used in xref recovery mode,
1120   - // which reads the file from beginning to end.
  1115 + // This works because we are reading more recent appends before older ones.
1121 1116  
1122 1117 // If there is already an entry for this object and generation in the table, it means that a
1123 1118 // later xref table has registered this object. Disregard this one.
1124   - { // private scope
1125   - int gen = (f0 == 2 ? 0 : f2);
1126   - QPDFObjGen og(obj, gen);
1127   - if (m->xref_table.count(og)) {
1128   - if (overwrite) {
1129   - QTC::TC("qpdf", "QPDF xref overwrite object");
1130   - m->xref_table.erase(og);
1131   - } else {
1132   - QTC::TC("qpdf", "QPDF xref reused object");
1133   - return;
1134   - }
1135   - }
1136   - if (m->deleted_objects.count(obj)) {
1137   - QTC::TC("qpdf", "QPDF xref deleted object");
1138   - return;
1139   - }
  1119 +
  1120 + if (m->deleted_objects.count(obj)) {
  1121 + QTC::TC("qpdf", "QPDF xref deleted object");
  1122 + return;
1140 1123 }
1141 1124  
1142   - switch (f0) {
1143   - case 0:
1144   - m->deleted_objects.insert(obj);
1145   - break;
  1125 + auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
  1126 + if (!created) {
  1127 + QTC::TC("qpdf", "QPDF xref reused object");
  1128 + return;
  1129 + }
1146 1130  
  1131 + switch (f0) {
1147 1132 case 1:
1148 1133 // f2 is generation
1149 1134 QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0));
1150   - m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1);
  1135 + iter->second = QPDFXRefEntry(f1);
1151 1136 break;
1152 1137  
1153 1138 case 2:
1154   - m->xref_table[QPDFObjGen(obj, 0)] = QPDFXRefEntry(toI(f1), f2);
  1139 + iter->second = QPDFXRefEntry(toI(f1), f2);
1155 1140 break;
1156 1141  
1157 1142 default:
... ... @@ -1161,6 +1146,28 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite)
1161 1146 }
1162 1147  
1163 1148 void
  1149 +QPDF::insertFreeXrefEntry(QPDFObjGen og)
  1150 +{
  1151 + if (!m->xref_table.count(og)) {
  1152 + m->deleted_objects.insert(og.getObj());
  1153 + }
  1154 +}
  1155 +
  1156 +// Replace uncompressed object. This is used in xref recovery mode, which reads the file from
  1157 +// beginning to end.
  1158 +void
  1159 +QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2)
  1160 +{
  1161 + QPDFObjGen og(obj, f2);
  1162 + if (!m->deleted_objects.count(obj)) {
  1163 + // deleted_objects stores the uncompressed objects removed from the xref table at the start
  1164 + // of recovery.
  1165 + QTC::TC("qpdf", "QPDF xref overwrite object");
  1166 + m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1);
  1167 + }
  1168 +}
  1169 +
  1170 +void
1164 1171 QPDF::showXRefTable()
1165 1172 {
1166 1173 auto& cout = *m->log->getInfo();
... ...