Commit ddd78ac7c62d4494e7d6098f395f64bdf5950b00
Committed by
GitHub
Merge pull request #1002 from m-holger/ixe
Add new private method QPDF::insertReconstructedXrefEntry
Showing
2 changed files
with
49 additions
and
40 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -1002,7 +1002,9 @@ class QPDF |
| 1002 | 1002 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
| 1003 | 1003 | qpdf_offset_t read_xrefStream(qpdf_offset_t offset); |
| 1004 | 1004 | qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); |
| 1005 | - void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite = false); | |
| 1005 | + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2); | |
| 1006 | + void insertFreeXrefEntry(QPDFObjGen); | |
| 1007 | + void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2); | |
| 1006 | 1008 | void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); |
| 1007 | 1009 | QPDFObjectHandle readObject( |
| 1008 | 1010 | std::shared_ptr<InputSource>, | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -564,7 +564,7 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 564 | 564 | if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { |
| 565 | 565 | int obj = QUtil::string_to_int(t1.getValue().c_str()); |
| 566 | 566 | int gen = QUtil::string_to_int(t2.getValue().c_str()); |
| 567 | - insertXrefEntry(obj, 1, token_start, gen, true); | |
| 567 | + insertReconstructedXrefEntry(obj, token_start, gen); | |
| 568 | 568 | } |
| 569 | 569 | } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) { |
| 570 | 570 | QPDFObjectHandle t = readObject(m->file, "trailer", QPDFObjGen(), false); |
| ... | ... | @@ -577,6 +577,7 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 577 | 577 | m->file->seek(next_line_start, SEEK_SET); |
| 578 | 578 | line_start = next_line_start; |
| 579 | 579 | } |
| 580 | + m->deleted_objects.clear(); | |
| 580 | 581 | |
| 581 | 582 | if (!m->trailer.isInitialized()) { |
| 582 | 583 | // We could check the last encountered object to see if it was an xref stream. If so, we |
| ... | ... | @@ -889,7 +890,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 889 | 890 | |
| 890 | 891 | // Handle any deleted items now that we've read the /XRefStm. |
| 891 | 892 | for (auto const& og: deleted_items) { |
| 892 | - insertXrefEntry(og.getObj(), 0, 0, og.getGen()); | |
| 893 | + insertFreeXrefEntry(og); | |
| 893 | 894 | } |
| 894 | 895 | |
| 895 | 896 | if (cur_trailer.hasKey("/Prev")) { |
| ... | ... | @@ -909,7 +910,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 909 | 910 | qpdf_offset_t |
| 910 | 911 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 911 | 912 | { |
| 912 | - bool found = false; | |
| 913 | 913 | if (!m->ignore_xref_streams) { |
| 914 | 914 | QPDFObjGen x_og; |
| 915 | 915 | QPDFObjectHandle xref_obj; |
| ... | ... | @@ -921,17 +921,13 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 921 | 921 | } |
| 922 | 922 | if (xref_obj.isStreamOfType("/XRef")) { |
| 923 | 923 | QTC::TC("qpdf", "QPDF found xref stream"); |
| 924 | - found = true; | |
| 925 | - xref_offset = processXRefStream(xref_offset, xref_obj); | |
| 924 | + return processXRefStream(xref_offset, xref_obj); | |
| 926 | 925 | } |
| 927 | 926 | } |
| 928 | 927 | |
| 929 | - if (!found) { | |
| 930 | - QTC::TC("qpdf", "QPDF can't find xref"); | |
| 931 | - throw damagedPDF("", xref_offset, "xref not found"); | |
| 932 | - } | |
| 933 | - | |
| 934 | - return xref_offset; | |
| 928 | + QTC::TC("qpdf", "QPDF can't find xref"); | |
| 929 | + throw damagedPDF("", xref_offset, "xref not found"); | |
| 930 | + return 0; // unreachable | |
| 935 | 931 | } |
| 936 | 932 | |
| 937 | 933 | qpdf_offset_t |
| ... | ... | @@ -1087,9 +1083,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) |
| 1087 | 1083 | if (fields[0] == 0) { |
| 1088 | 1084 | // Ignore fields[2], which we don't care about in this case. This works around the issue |
| 1089 | 1085 | // of some PDF files that put invalid values, like -1, here for deleted objects. |
| 1090 | - fields[2] = 0; | |
| 1086 | + insertFreeXrefEntry(QPDFObjGen(obj, 0)); | |
| 1087 | + } else { | |
| 1088 | + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | |
| 1091 | 1089 | } |
| 1092 | - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | |
| 1093 | 1090 | } |
| 1094 | 1091 | |
| 1095 | 1092 | if (!m->trailer.isInitialized()) { |
| ... | ... | @@ -1111,47 +1108,35 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) |
| 1111 | 1108 | } |
| 1112 | 1109 | |
| 1113 | 1110 | void |
| 1114 | -QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) | |
| 1111 | +QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) | |
| 1115 | 1112 | { |
| 1116 | 1113 | // Populate the xref table in such a way that the first reference to an object that we see, |
| 1117 | 1114 | // which is the one in the latest xref table in which it appears, is the one that gets stored. |
| 1118 | - // This works because we are reading more recent appends before older ones. Exception: if | |
| 1119 | - // overwrite is true, then replace any existing object. This is used in xref recovery mode, | |
| 1120 | - // which reads the file from beginning to end. | |
| 1115 | + // This works because we are reading more recent appends before older ones. | |
| 1121 | 1116 | |
| 1122 | 1117 | // If there is already an entry for this object and generation in the table, it means that a |
| 1123 | 1118 | // later xref table has registered this object. Disregard this one. |
| 1124 | - { // private scope | |
| 1125 | - int gen = (f0 == 2 ? 0 : f2); | |
| 1126 | - QPDFObjGen og(obj, gen); | |
| 1127 | - if (m->xref_table.count(og)) { | |
| 1128 | - if (overwrite) { | |
| 1129 | - QTC::TC("qpdf", "QPDF xref overwrite object"); | |
| 1130 | - m->xref_table.erase(og); | |
| 1131 | - } else { | |
| 1132 | - QTC::TC("qpdf", "QPDF xref reused object"); | |
| 1133 | - return; | |
| 1134 | - } | |
| 1135 | - } | |
| 1136 | - if (m->deleted_objects.count(obj)) { | |
| 1137 | - QTC::TC("qpdf", "QPDF xref deleted object"); | |
| 1138 | - return; | |
| 1139 | - } | |
| 1119 | + | |
| 1120 | + if (m->deleted_objects.count(obj)) { | |
| 1121 | + QTC::TC("qpdf", "QPDF xref deleted object"); | |
| 1122 | + return; | |
| 1140 | 1123 | } |
| 1141 | 1124 | |
| 1142 | - switch (f0) { | |
| 1143 | - case 0: | |
| 1144 | - m->deleted_objects.insert(obj); | |
| 1145 | - break; | |
| 1125 | + auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2))); | |
| 1126 | + if (!created) { | |
| 1127 | + QTC::TC("qpdf", "QPDF xref reused object"); | |
| 1128 | + return; | |
| 1129 | + } | |
| 1146 | 1130 | |
| 1131 | + switch (f0) { | |
| 1147 | 1132 | case 1: |
| 1148 | 1133 | // f2 is generation |
| 1149 | 1134 | QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0)); |
| 1150 | - m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1); | |
| 1135 | + iter->second = QPDFXRefEntry(f1); | |
| 1151 | 1136 | break; |
| 1152 | 1137 | |
| 1153 | 1138 | case 2: |
| 1154 | - m->xref_table[QPDFObjGen(obj, 0)] = QPDFXRefEntry(toI(f1), f2); | |
| 1139 | + iter->second = QPDFXRefEntry(toI(f1), f2); | |
| 1155 | 1140 | break; |
| 1156 | 1141 | |
| 1157 | 1142 | default: |
| ... | ... | @@ -1161,6 +1146,28 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) |
| 1161 | 1146 | } |
| 1162 | 1147 | |
| 1163 | 1148 | void |
| 1149 | +QPDF::insertFreeXrefEntry(QPDFObjGen og) | |
| 1150 | +{ | |
| 1151 | + if (!m->xref_table.count(og)) { | |
| 1152 | + m->deleted_objects.insert(og.getObj()); | |
| 1153 | + } | |
| 1154 | +} | |
| 1155 | + | |
| 1156 | +// Replace uncompressed object. This is used in xref recovery mode, which reads the file from | |
| 1157 | +// beginning to end. | |
| 1158 | +void | |
| 1159 | +QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2) | |
| 1160 | +{ | |
| 1161 | + QPDFObjGen og(obj, f2); | |
| 1162 | + if (!m->deleted_objects.count(obj)) { | |
| 1163 | + // deleted_objects stores the uncompressed objects removed from the xref table at the start | |
| 1164 | + // of recovery. | |
| 1165 | + QTC::TC("qpdf", "QPDF xref overwrite object"); | |
| 1166 | + m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1); | |
| 1167 | + } | |
| 1168 | +} | |
| 1169 | + | |
| 1170 | +void | |
| 1164 | 1171 | QPDF::showXRefTable() |
| 1165 | 1172 | { |
| 1166 | 1173 | auto& cout = *m->log->getInfo(); | ... | ... |