Commit ddd78ac7c62d4494e7d6098f395f64bdf5950b00
Committed by
GitHub
Merge pull request #1002 from m-holger/ixe
Add new private method QPDF::insertReconstructedXrefEntry
Showing
2 changed files
with
49 additions
and
40 deletions
include/qpdf/QPDF.hh
| @@ -1002,7 +1002,9 @@ class QPDF | @@ -1002,7 +1002,9 @@ class QPDF | ||
| 1002 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); | 1002 | qpdf_offset_t read_xrefTable(qpdf_offset_t offset); |
| 1003 | qpdf_offset_t read_xrefStream(qpdf_offset_t offset); | 1003 | qpdf_offset_t read_xrefStream(qpdf_offset_t offset); |
| 1004 | qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); | 1004 | qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); |
| 1005 | - void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite = false); | 1005 | + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2); |
| 1006 | + void insertFreeXrefEntry(QPDFObjGen); | ||
| 1007 | + void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2); | ||
| 1006 | void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); | 1008 | void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); |
| 1007 | QPDFObjectHandle readObject( | 1009 | QPDFObjectHandle readObject( |
| 1008 | std::shared_ptr<InputSource>, | 1010 | std::shared_ptr<InputSource>, |
libqpdf/QPDF.cc
| @@ -564,7 +564,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -564,7 +564,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 564 | if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { | 564 | if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { |
| 565 | int obj = QUtil::string_to_int(t1.getValue().c_str()); | 565 | int obj = QUtil::string_to_int(t1.getValue().c_str()); |
| 566 | int gen = QUtil::string_to_int(t2.getValue().c_str()); | 566 | int gen = QUtil::string_to_int(t2.getValue().c_str()); |
| 567 | - insertXrefEntry(obj, 1, token_start, gen, true); | 567 | + insertReconstructedXrefEntry(obj, token_start, gen); |
| 568 | } | 568 | } |
| 569 | } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) { | 569 | } else if (!m->trailer.isInitialized() && t1.isWord("trailer")) { |
| 570 | QPDFObjectHandle t = readObject(m->file, "trailer", QPDFObjGen(), false); | 570 | QPDFObjectHandle t = readObject(m->file, "trailer", QPDFObjGen(), false); |
| @@ -577,6 +577,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -577,6 +577,7 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 577 | m->file->seek(next_line_start, SEEK_SET); | 577 | m->file->seek(next_line_start, SEEK_SET); |
| 578 | line_start = next_line_start; | 578 | line_start = next_line_start; |
| 579 | } | 579 | } |
| 580 | + m->deleted_objects.clear(); | ||
| 580 | 581 | ||
| 581 | if (!m->trailer.isInitialized()) { | 582 | if (!m->trailer.isInitialized()) { |
| 582 | // We could check the last encountered object to see if it was an xref stream. If so, we | 583 | // We could check the last encountered object to see if it was an xref stream. If so, we |
| @@ -889,7 +890,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -889,7 +890,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 889 | 890 | ||
| 890 | // Handle any deleted items now that we've read the /XRefStm. | 891 | // Handle any deleted items now that we've read the /XRefStm. |
| 891 | for (auto const& og: deleted_items) { | 892 | for (auto const& og: deleted_items) { |
| 892 | - insertXrefEntry(og.getObj(), 0, 0, og.getGen()); | 893 | + insertFreeXrefEntry(og); |
| 893 | } | 894 | } |
| 894 | 895 | ||
| 895 | if (cur_trailer.hasKey("/Prev")) { | 896 | if (cur_trailer.hasKey("/Prev")) { |
| @@ -909,7 +910,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | @@ -909,7 +910,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) | ||
| 909 | qpdf_offset_t | 910 | qpdf_offset_t |
| 910 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) | 911 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 911 | { | 912 | { |
| 912 | - bool found = false; | ||
| 913 | if (!m->ignore_xref_streams) { | 913 | if (!m->ignore_xref_streams) { |
| 914 | QPDFObjGen x_og; | 914 | QPDFObjGen x_og; |
| 915 | QPDFObjectHandle xref_obj; | 915 | QPDFObjectHandle xref_obj; |
| @@ -921,17 +921,13 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) | @@ -921,17 +921,13 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) | ||
| 921 | } | 921 | } |
| 922 | if (xref_obj.isStreamOfType("/XRef")) { | 922 | if (xref_obj.isStreamOfType("/XRef")) { |
| 923 | QTC::TC("qpdf", "QPDF found xref stream"); | 923 | QTC::TC("qpdf", "QPDF found xref stream"); |
| 924 | - found = true; | ||
| 925 | - xref_offset = processXRefStream(xref_offset, xref_obj); | 924 | + return processXRefStream(xref_offset, xref_obj); |
| 926 | } | 925 | } |
| 927 | } | 926 | } |
| 928 | 927 | ||
| 929 | - if (!found) { | ||
| 930 | - QTC::TC("qpdf", "QPDF can't find xref"); | ||
| 931 | - throw damagedPDF("", xref_offset, "xref not found"); | ||
| 932 | - } | ||
| 933 | - | ||
| 934 | - return xref_offset; | 928 | + QTC::TC("qpdf", "QPDF can't find xref"); |
| 929 | + throw damagedPDF("", xref_offset, "xref not found"); | ||
| 930 | + return 0; // unreachable | ||
| 935 | } | 931 | } |
| 936 | 932 | ||
| 937 | qpdf_offset_t | 933 | qpdf_offset_t |
| @@ -1087,9 +1083,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -1087,9 +1083,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 1087 | if (fields[0] == 0) { | 1083 | if (fields[0] == 0) { |
| 1088 | // Ignore fields[2], which we don't care about in this case. This works around the issue | 1084 | // Ignore fields[2], which we don't care about in this case. This works around the issue |
| 1089 | // of some PDF files that put invalid values, like -1, here for deleted objects. | 1085 | // of some PDF files that put invalid values, like -1, here for deleted objects. |
| 1090 | - fields[2] = 0; | 1086 | + insertFreeXrefEntry(QPDFObjGen(obj, 0)); |
| 1087 | + } else { | ||
| 1088 | + insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | ||
| 1091 | } | 1089 | } |
| 1092 | - insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); | ||
| 1093 | } | 1090 | } |
| 1094 | 1091 | ||
| 1095 | if (!m->trailer.isInitialized()) { | 1092 | if (!m->trailer.isInitialized()) { |
| @@ -1111,47 +1108,35 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | @@ -1111,47 +1108,35 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) | ||
| 1111 | } | 1108 | } |
| 1112 | 1109 | ||
| 1113 | void | 1110 | void |
| 1114 | -QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) | 1111 | +QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) |
| 1115 | { | 1112 | { |
| 1116 | // Populate the xref table in such a way that the first reference to an object that we see, | 1113 | // Populate the xref table in such a way that the first reference to an object that we see, |
| 1117 | // which is the one in the latest xref table in which it appears, is the one that gets stored. | 1114 | // which is the one in the latest xref table in which it appears, is the one that gets stored. |
| 1118 | - // This works because we are reading more recent appends before older ones. Exception: if | ||
| 1119 | - // overwrite is true, then replace any existing object. This is used in xref recovery mode, | ||
| 1120 | - // which reads the file from beginning to end. | 1115 | + // This works because we are reading more recent appends before older ones. |
| 1121 | 1116 | ||
| 1122 | // If there is already an entry for this object and generation in the table, it means that a | 1117 | // If there is already an entry for this object and generation in the table, it means that a |
| 1123 | // later xref table has registered this object. Disregard this one. | 1118 | // later xref table has registered this object. Disregard this one. |
| 1124 | - { // private scope | ||
| 1125 | - int gen = (f0 == 2 ? 0 : f2); | ||
| 1126 | - QPDFObjGen og(obj, gen); | ||
| 1127 | - if (m->xref_table.count(og)) { | ||
| 1128 | - if (overwrite) { | ||
| 1129 | - QTC::TC("qpdf", "QPDF xref overwrite object"); | ||
| 1130 | - m->xref_table.erase(og); | ||
| 1131 | - } else { | ||
| 1132 | - QTC::TC("qpdf", "QPDF xref reused object"); | ||
| 1133 | - return; | ||
| 1134 | - } | ||
| 1135 | - } | ||
| 1136 | - if (m->deleted_objects.count(obj)) { | ||
| 1137 | - QTC::TC("qpdf", "QPDF xref deleted object"); | ||
| 1138 | - return; | ||
| 1139 | - } | 1119 | + |
| 1120 | + if (m->deleted_objects.count(obj)) { | ||
| 1121 | + QTC::TC("qpdf", "QPDF xref deleted object"); | ||
| 1122 | + return; | ||
| 1140 | } | 1123 | } |
| 1141 | 1124 | ||
| 1142 | - switch (f0) { | ||
| 1143 | - case 0: | ||
| 1144 | - m->deleted_objects.insert(obj); | ||
| 1145 | - break; | 1125 | + auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2))); |
| 1126 | + if (!created) { | ||
| 1127 | + QTC::TC("qpdf", "QPDF xref reused object"); | ||
| 1128 | + return; | ||
| 1129 | + } | ||
| 1146 | 1130 | ||
| 1131 | + switch (f0) { | ||
| 1147 | case 1: | 1132 | case 1: |
| 1148 | // f2 is generation | 1133 | // f2 is generation |
| 1149 | QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0)); | 1134 | QTC::TC("qpdf", "QPDF xref gen > 0", ((f2 > 0) ? 1 : 0)); |
| 1150 | - m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1); | 1135 | + iter->second = QPDFXRefEntry(f1); |
| 1151 | break; | 1136 | break; |
| 1152 | 1137 | ||
| 1153 | case 2: | 1138 | case 2: |
| 1154 | - m->xref_table[QPDFObjGen(obj, 0)] = QPDFXRefEntry(toI(f1), f2); | 1139 | + iter->second = QPDFXRefEntry(toI(f1), f2); |
| 1155 | break; | 1140 | break; |
| 1156 | 1141 | ||
| 1157 | default: | 1142 | default: |
| @@ -1161,6 +1146,28 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) | @@ -1161,6 +1146,28 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) | ||
| 1161 | } | 1146 | } |
| 1162 | 1147 | ||
| 1163 | void | 1148 | void |
| 1149 | +QPDF::insertFreeXrefEntry(QPDFObjGen og) | ||
| 1150 | +{ | ||
| 1151 | + if (!m->xref_table.count(og)) { | ||
| 1152 | + m->deleted_objects.insert(og.getObj()); | ||
| 1153 | + } | ||
| 1154 | +} | ||
| 1155 | + | ||
| 1156 | +// Replace uncompressed object. This is used in xref recovery mode, which reads the file from | ||
| 1157 | +// beginning to end. | ||
| 1158 | +void | ||
| 1159 | +QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2) | ||
| 1160 | +{ | ||
| 1161 | + QPDFObjGen og(obj, f2); | ||
| 1162 | + if (!m->deleted_objects.count(obj)) { | ||
| 1163 | + // deleted_objects stores the uncompressed objects removed from the xref table at the start | ||
| 1164 | + // of recovery. | ||
| 1165 | + QTC::TC("qpdf", "QPDF xref overwrite object"); | ||
| 1166 | + m->xref_table[QPDFObjGen(obj, f2)] = QPDFXRefEntry(f1); | ||
| 1167 | + } | ||
| 1168 | +} | ||
| 1169 | + | ||
| 1170 | +void | ||
| 1164 | QPDF::showXRefTable() | 1171 | QPDF::showXRefTable() |
| 1165 | { | 1172 | { |
| 1166 | auto& cout = *m->log->getInfo(); | 1173 | auto& cout = *m->log->getInfo(); |