Commit 3f632458ae15b5c63b639e46bf2d89653401d8aa
Committed by
Jay Berkenbilt
1 parent
19a8d3fe
Refactor QPDF::fixDanglingReferences
Showing
3 changed files
with
40 additions
and
49 deletions
libqpdf/QPDF.cc
| @@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e) | @@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e) | ||
| 577 | } | 577 | } |
| 578 | 578 | ||
| 579 | this->m->reconstructed_xref = true; | 579 | this->m->reconstructed_xref = true; |
| 580 | + // We may find more objects, which may contain dangling references. | ||
| 581 | + this->m->fixed_dangling_refs = false; | ||
| 580 | 582 | ||
| 581 | warn(damagedPDF("", 0, "file is damaged")); | 583 | warn(damagedPDF("", 0, "file is damaged")); |
| 582 | warn(e); | 584 | warn(e); |
| @@ -1290,65 +1292,48 @@ QPDF::showXRefTable() | @@ -1290,65 +1292,48 @@ QPDF::showXRefTable() | ||
| 1290 | } | 1292 | } |
| 1291 | } | 1293 | } |
| 1292 | 1294 | ||
| 1295 | +// Ensure all objects in the pdf file, including those in indirect references, | ||
| 1296 | +// appear in the object cache. | ||
| 1293 | void | 1297 | void |
| 1294 | QPDF::fixDanglingReferences(bool force) | 1298 | QPDF::fixDanglingReferences(bool force) |
| 1295 | { | 1299 | { |
| 1296 | - if (this->m->fixed_dangling_refs && (!force)) { | 1300 | + if (this->m->fixed_dangling_refs && !force) { |
| 1297 | return; | 1301 | return; |
| 1298 | } | 1302 | } |
| 1299 | - this->m->fixed_dangling_refs = true; | ||
| 1300 | - | ||
| 1301 | - // Create a set of all known indirect objects including those | ||
| 1302 | - // we've previously resolved and those that we have created. | ||
| 1303 | - std::set<QPDFObjGen> to_process; | ||
| 1304 | - for (auto const& iter: this->m->obj_cache) { | ||
| 1305 | - to_process.insert(iter.first); | ||
| 1306 | - } | ||
| 1307 | - for (auto const& iter: this->m->xref_table) { | ||
| 1308 | - to_process.insert(iter.first); | ||
| 1309 | - } | ||
| 1310 | 1303 | ||
| 1311 | - // For each non-scalar item to process, put it in the queue. | ||
| 1312 | - std::list<QPDFObjectHandle> queue; | ||
| 1313 | - queue.push_back(this->m->trailer); | ||
| 1314 | - for (auto const& og: to_process) { | ||
| 1315 | - auto obj = getObject(og); | ||
| 1316 | - if (obj.isDictionary() || obj.isArray()) { | ||
| 1317 | - queue.push_back(obj); | ||
| 1318 | - } else if (obj.isStream()) { | ||
| 1319 | - queue.push_back(obj.getDict()); | ||
| 1320 | - } | ||
| 1321 | - } | ||
| 1322 | - | ||
| 1323 | - // Process the queue by recursively resolving all object | ||
| 1324 | - // references. We don't need to do loop detection because we don't | ||
| 1325 | - // traverse known indirect objects when processing the queue. | ||
| 1326 | - while (!queue.empty()) { | ||
| 1327 | - QPDFObjectHandle obj = queue.front(); | ||
| 1328 | - queue.pop_front(); | ||
| 1329 | - std::list<QPDFObjectHandle> to_check; | ||
| 1330 | - if (obj.isDictionary()) { | ||
| 1331 | - std::map<std::string, QPDFObjectHandle> members = | ||
| 1332 | - obj.getDictAsMap(); | ||
| 1333 | - for (auto const& iter: members) { | ||
| 1334 | - to_check.push_back(iter.second); | 1304 | + if (!this->m->fixed_dangling_refs) { |
| 1305 | + // First pass is only run if the the xref table has not been | ||
| 1306 | + // reconstructed. It will be terminated as soon as reconstruction is | ||
| 1307 | + // triggered. | ||
| 1308 | + if (!this->m->reconstructed_xref) { | ||
| 1309 | + for (auto const& iter: this->m->xref_table) { | ||
| 1310 | + auto og = iter.first; | ||
| 1311 | + if (!isCached(og)) { | ||
| 1312 | + m->obj_cache[og] = | ||
| 1313 | + ObjCache(QPDF_Unresolved::create(this, og), -1, -1); | ||
| 1314 | + if (this->m->reconstructed_xref) { | ||
| 1315 | + break; | ||
| 1316 | + } | ||
| 1317 | + } | ||
| 1335 | } | 1318 | } |
| 1336 | - } else if (obj.isArray()) { | ||
| 1337 | - auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj); | ||
| 1338 | - arr->addExplicitElementsToList(to_check); | ||
| 1339 | - } | ||
| 1340 | - for (auto sub: to_check) { | ||
| 1341 | - if (sub.isIndirect()) { | ||
| 1342 | - if ((sub.getOwningQPDF() == this) && | ||
| 1343 | - isUnresolved(sub.getObjGen())) { | ||
| 1344 | - QTC::TC("qpdf", "QPDF detected dangling ref"); | ||
| 1345 | - queue.push_back(sub); | 1319 | + } |
| 1320 | + // Second pass is skipped if the first pass did not trigger | ||
| 1321 | + // reconstruction of the xref table. | ||
| 1322 | + if (this->m->reconstructed_xref) { | ||
| 1323 | + for (auto const& iter: this->m->xref_table) { | ||
| 1324 | + auto og = iter.first; | ||
| 1325 | + if (!isCached(og)) { | ||
| 1326 | + m->obj_cache[og] = | ||
| 1327 | + ObjCache(QPDF_Unresolved::create(this, og), -1, -1); | ||
| 1346 | } | 1328 | } |
| 1347 | - } else { | ||
| 1348 | - queue.push_back(sub); | ||
| 1349 | } | 1329 | } |
| 1350 | } | 1330 | } |
| 1351 | } | 1331 | } |
| 1332 | + // Final pass adds all indirect references to the object cache. | ||
| 1333 | + for (auto const& iter: this->m->obj_cache) { | ||
| 1334 | + resolve(iter.first); | ||
| 1335 | + } | ||
| 1336 | + this->m->fixed_dangling_refs = true; | ||
| 1352 | } | 1337 | } |
| 1353 | 1338 | ||
| 1354 | size_t | 1339 | size_t |
| @@ -2082,6 +2067,8 @@ QPDF::reserveStream(QPDFObjGen const& og) | @@ -2082,6 +2067,8 @@ QPDF::reserveStream(QPDFObjGen const& og) | ||
| 2082 | QPDFObjectHandle | 2067 | QPDFObjectHandle |
| 2083 | QPDF::getObject(QPDFObjGen const& og) | 2068 | QPDF::getObject(QPDFObjGen const& og) |
| 2084 | { | 2069 | { |
| 2070 | + // This method is called by the parser and therefore must not | ||
| 2071 | + // resolve any objects. | ||
| 2085 | if (!isCached(og)) { | 2072 | if (!isCached(og)) { |
| 2086 | m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); | 2073 | m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); |
| 2087 | } | 2074 | } |
libqpdf/QPDFParser.cc
| @@ -190,6 +190,11 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -190,6 +190,11 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 190 | olist.at(size - 2).getIntValueAsInt(), | 190 | olist.at(size - 2).getIntValueAsInt(), |
| 191 | olist.back().getIntValueAsInt()); | 191 | olist.back().getIntValueAsInt()); |
| 192 | if (ref_og.isIndirect()) { | 192 | if (ref_og.isIndirect()) { |
| 193 | + // This action has the desirable side effect | ||
| 194 | + // of causing dangling references (references | ||
| 195 | + // to indirect objects that don't appear in | ||
| 196 | + // the PDF) in any parsed object to appear in | ||
| 197 | + // the object cache. | ||
| 193 | object = context->getObject(ref_og); | 198 | object = context->getObject(ref_og); |
| 194 | indirect_ref = true; | 199 | indirect_ref = true; |
| 195 | } else { | 200 | } else { |
qpdf/qpdf.testcov
| @@ -381,7 +381,6 @@ QPDFFormFieldObjectHelper list not found 0 | @@ -381,7 +381,6 @@ QPDFFormFieldObjectHelper list not found 0 | ||
| 381 | QPDFFormFieldObjectHelper list found 0 | 381 | QPDFFormFieldObjectHelper list found 0 |
| 382 | QPDFFormFieldObjectHelper list first too low 0 | 382 | QPDFFormFieldObjectHelper list first too low 0 |
| 383 | QPDFFormFieldObjectHelper list last too high 0 | 383 | QPDFFormFieldObjectHelper list last too high 0 |
| 384 | -QPDF detected dangling ref 0 | ||
| 385 | QPDFJob image optimize no pipeline 0 | 384 | QPDFJob image optimize no pipeline 0 |
| 386 | QPDFJob image optimize no shrink 0 | 385 | QPDFJob image optimize no shrink 0 |
| 387 | QPDFJob image optimize too small 0 | 386 | QPDFJob image optimize too small 0 |