Commit 3f632458ae15b5c63b639e46bf2d89653401d8aa

Authored by m-holger
Committed by Jay Berkenbilt
1 parent 19a8d3fe

Refactor QPDF::fixDanglingReferences

libqpdf/QPDF.cc
... ... @@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e)
577 577 }
578 578  
579 579 this->m->reconstructed_xref = true;
  580 + // We may find more objects, which may contain dangling references.
  581 + this->m->fixed_dangling_refs = false;
580 582  
581 583 warn(damagedPDF("", 0, "file is damaged"));
582 584 warn(e);
... ... @@ -1290,65 +1292,48 @@ QPDF::showXRefTable()
1290 1292 }
1291 1293 }
1292 1294  
  1295 +// Ensure all objects in the pdf file, including those in indirect references,
  1296 +// appear in the object cache.
1293 1297 void
1294 1298 QPDF::fixDanglingReferences(bool force)
1295 1299 {
1296   - if (this->m->fixed_dangling_refs && (!force)) {
  1300 + if (this->m->fixed_dangling_refs && !force) {
1297 1301 return;
1298 1302 }
1299   - this->m->fixed_dangling_refs = true;
1300   -
1301   - // Create a set of all known indirect objects including those
1302   - // we've previously resolved and those that we have created.
1303   - std::set<QPDFObjGen> to_process;
1304   - for (auto const& iter: this->m->obj_cache) {
1305   - to_process.insert(iter.first);
1306   - }
1307   - for (auto const& iter: this->m->xref_table) {
1308   - to_process.insert(iter.first);
1309   - }
1310 1303  
1311   - // For each non-scalar item to process, put it in the queue.
1312   - std::list<QPDFObjectHandle> queue;
1313   - queue.push_back(this->m->trailer);
1314   - for (auto const& og: to_process) {
1315   - auto obj = getObject(og);
1316   - if (obj.isDictionary() || obj.isArray()) {
1317   - queue.push_back(obj);
1318   - } else if (obj.isStream()) {
1319   - queue.push_back(obj.getDict());
1320   - }
1321   - }
1322   -
1323   - // Process the queue by recursively resolving all object
1324   - // references. We don't need to do loop detection because we don't
1325   - // traverse known indirect objects when processing the queue.
1326   - while (!queue.empty()) {
1327   - QPDFObjectHandle obj = queue.front();
1328   - queue.pop_front();
1329   - std::list<QPDFObjectHandle> to_check;
1330   - if (obj.isDictionary()) {
1331   - std::map<std::string, QPDFObjectHandle> members =
1332   - obj.getDictAsMap();
1333   - for (auto const& iter: members) {
1334   - to_check.push_back(iter.second);
  1304 + if (!this->m->fixed_dangling_refs) {
  1305 + // First pass is only run if the the xref table has not been
  1306 + // reconstructed. It will be terminated as soon as reconstruction is
  1307 + // triggered.
  1308 + if (!this->m->reconstructed_xref) {
  1309 + for (auto const& iter: this->m->xref_table) {
  1310 + auto og = iter.first;
  1311 + if (!isCached(og)) {
  1312 + m->obj_cache[og] =
  1313 + ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
  1314 + if (this->m->reconstructed_xref) {
  1315 + break;
  1316 + }
  1317 + }
1335 1318 }
1336   - } else if (obj.isArray()) {
1337   - auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj);
1338   - arr->addExplicitElementsToList(to_check);
1339   - }
1340   - for (auto sub: to_check) {
1341   - if (sub.isIndirect()) {
1342   - if ((sub.getOwningQPDF() == this) &&
1343   - isUnresolved(sub.getObjGen())) {
1344   - QTC::TC("qpdf", "QPDF detected dangling ref");
1345   - queue.push_back(sub);
  1319 + }
  1320 + // Second pass is skipped if the first pass did not trigger
  1321 + // reconstruction of the xref table.
  1322 + if (this->m->reconstructed_xref) {
  1323 + for (auto const& iter: this->m->xref_table) {
  1324 + auto og = iter.first;
  1325 + if (!isCached(og)) {
  1326 + m->obj_cache[og] =
  1327 + ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
1346 1328 }
1347   - } else {
1348   - queue.push_back(sub);
1349 1329 }
1350 1330 }
1351 1331 }
  1332 + // Final pass adds all indirect references to the object cache.
  1333 + for (auto const& iter: this->m->obj_cache) {
  1334 + resolve(iter.first);
  1335 + }
  1336 + this->m->fixed_dangling_refs = true;
1352 1337 }
1353 1338  
1354 1339 size_t
... ... @@ -2082,6 +2067,8 @@ QPDF::reserveStream(QPDFObjGen const&amp; og)
2082 2067 QPDFObjectHandle
2083 2068 QPDF::getObject(QPDFObjGen const& og)
2084 2069 {
  2070 + // This method is called by the parser and therefore must not
  2071 + // resolve any objects.
2085 2072 if (!isCached(og)) {
2086 2073 m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
2087 2074 }
... ...
libqpdf/QPDFParser.cc
... ... @@ -190,6 +190,11 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
190 190 olist.at(size - 2).getIntValueAsInt(),
191 191 olist.back().getIntValueAsInt());
192 192 if (ref_og.isIndirect()) {
  193 + // This action has the desirable side effect
  194 + // of causing dangling references (references
  195 + // to indirect objects that don't appear in
  196 + // the PDF) in any parsed object to appear in
  197 + // the object cache.
193 198 object = context->getObject(ref_og);
194 199 indirect_ref = true;
195 200 } else {
... ...
qpdf/qpdf.testcov
... ... @@ -381,7 +381,6 @@ QPDFFormFieldObjectHelper list not found 0
381 381 QPDFFormFieldObjectHelper list found 0
382 382 QPDFFormFieldObjectHelper list first too low 0
383 383 QPDFFormFieldObjectHelper list last too high 0
384   -QPDF detected dangling ref 0
385 384 QPDFJob image optimize no pipeline 0
386 385 QPDFJob image optimize no shrink 0
387 386 QPDFJob image optimize too small 0
... ...