Commit 3f632458ae15b5c63b639e46bf2d89653401d8aa

Authored by m-holger
Committed by Jay Berkenbilt
1 parent 19a8d3fe

Refactor QPDF::fixDanglingReferences

libqpdf/QPDF.cc
@@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e) @@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e)
577 } 577 }
578 578
579 this->m->reconstructed_xref = true; 579 this->m->reconstructed_xref = true;
  580 + // We may find more objects, which may contain dangling references.
  581 + this->m->fixed_dangling_refs = false;
580 582
581 warn(damagedPDF("", 0, "file is damaged")); 583 warn(damagedPDF("", 0, "file is damaged"));
582 warn(e); 584 warn(e);
@@ -1290,65 +1292,48 @@ QPDF::showXRefTable() @@ -1290,65 +1292,48 @@ QPDF::showXRefTable()
1290 } 1292 }
1291 } 1293 }
1292 1294
  1295 +// Ensure all objects in the pdf file, including those in indirect references,
  1296 +// appear in the object cache.
1293 void 1297 void
1294 QPDF::fixDanglingReferences(bool force) 1298 QPDF::fixDanglingReferences(bool force)
1295 { 1299 {
1296 - if (this->m->fixed_dangling_refs && (!force)) { 1300 + if (this->m->fixed_dangling_refs && !force) {
1297 return; 1301 return;
1298 } 1302 }
1299 - this->m->fixed_dangling_refs = true;  
1300 -  
1301 - // Create a set of all known indirect objects including those  
1302 - // we've previously resolved and those that we have created.  
1303 - std::set<QPDFObjGen> to_process;  
1304 - for (auto const& iter: this->m->obj_cache) {  
1305 - to_process.insert(iter.first);  
1306 - }  
1307 - for (auto const& iter: this->m->xref_table) {  
1308 - to_process.insert(iter.first);  
1309 - }  
1310 1303
1311 - // For each non-scalar item to process, put it in the queue.  
1312 - std::list<QPDFObjectHandle> queue;  
1313 - queue.push_back(this->m->trailer);  
1314 - for (auto const& og: to_process) {  
1315 - auto obj = getObject(og);  
1316 - if (obj.isDictionary() || obj.isArray()) {  
1317 - queue.push_back(obj);  
1318 - } else if (obj.isStream()) {  
1319 - queue.push_back(obj.getDict());  
1320 - }  
1321 - }  
1322 -  
1323 - // Process the queue by recursively resolving all object  
1324 - // references. We don't need to do loop detection because we don't  
1325 - // traverse known indirect objects when processing the queue.  
1326 - while (!queue.empty()) {  
1327 - QPDFObjectHandle obj = queue.front();  
1328 - queue.pop_front();  
1329 - std::list<QPDFObjectHandle> to_check;  
1330 - if (obj.isDictionary()) {  
1331 - std::map<std::string, QPDFObjectHandle> members =  
1332 - obj.getDictAsMap();  
1333 - for (auto const& iter: members) {  
1334 - to_check.push_back(iter.second); 1304 + if (!this->m->fixed_dangling_refs) {
  1305 + // First pass is only run if the the xref table has not been
  1306 + // reconstructed. It will be terminated as soon as reconstruction is
  1307 + // triggered.
  1308 + if (!this->m->reconstructed_xref) {
  1309 + for (auto const& iter: this->m->xref_table) {
  1310 + auto og = iter.first;
  1311 + if (!isCached(og)) {
  1312 + m->obj_cache[og] =
  1313 + ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
  1314 + if (this->m->reconstructed_xref) {
  1315 + break;
  1316 + }
  1317 + }
1335 } 1318 }
1336 - } else if (obj.isArray()) {  
1337 - auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj);  
1338 - arr->addExplicitElementsToList(to_check);  
1339 - }  
1340 - for (auto sub: to_check) {  
1341 - if (sub.isIndirect()) {  
1342 - if ((sub.getOwningQPDF() == this) &&  
1343 - isUnresolved(sub.getObjGen())) {  
1344 - QTC::TC("qpdf", "QPDF detected dangling ref");  
1345 - queue.push_back(sub); 1319 + }
  1320 + // Second pass is skipped if the first pass did not trigger
  1321 + // reconstruction of the xref table.
  1322 + if (this->m->reconstructed_xref) {
  1323 + for (auto const& iter: this->m->xref_table) {
  1324 + auto og = iter.first;
  1325 + if (!isCached(og)) {
  1326 + m->obj_cache[og] =
  1327 + ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
1346 } 1328 }
1347 - } else {  
1348 - queue.push_back(sub);  
1349 } 1329 }
1350 } 1330 }
1351 } 1331 }
  1332 + // Final pass adds all indirect references to the object cache.
  1333 + for (auto const& iter: this->m->obj_cache) {
  1334 + resolve(iter.first);
  1335 + }
  1336 + this->m->fixed_dangling_refs = true;
1352 } 1337 }
1353 1338
1354 size_t 1339 size_t
@@ -2082,6 +2067,8 @@ QPDF::reserveStream(QPDFObjGen const&amp; og) @@ -2082,6 +2067,8 @@ QPDF::reserveStream(QPDFObjGen const&amp; og)
2082 QPDFObjectHandle 2067 QPDFObjectHandle
2083 QPDF::getObject(QPDFObjGen const& og) 2068 QPDF::getObject(QPDFObjGen const& og)
2084 { 2069 {
  2070 + // This method is called by the parser and therefore must not
  2071 + // resolve any objects.
2085 if (!isCached(og)) { 2072 if (!isCached(og)) {
2086 m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); 2073 m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
2087 } 2074 }
libqpdf/QPDFParser.cc
@@ -190,6 +190,11 @@ QPDFParser::parse(bool&amp; empty, bool content_stream) @@ -190,6 +190,11 @@ QPDFParser::parse(bool&amp; empty, bool content_stream)
190 olist.at(size - 2).getIntValueAsInt(), 190 olist.at(size - 2).getIntValueAsInt(),
191 olist.back().getIntValueAsInt()); 191 olist.back().getIntValueAsInt());
192 if (ref_og.isIndirect()) { 192 if (ref_og.isIndirect()) {
  193 + // This action has the desirable side effect
  194 + // of causing dangling references (references
  195 + // to indirect objects that don't appear in
  196 + // the PDF) in any parsed object to appear in
  197 + // the object cache.
193 object = context->getObject(ref_og); 198 object = context->getObject(ref_og);
194 indirect_ref = true; 199 indirect_ref = true;
195 } else { 200 } else {
qpdf/qpdf.testcov
@@ -381,7 +381,6 @@ QPDFFormFieldObjectHelper list not found 0 @@ -381,7 +381,6 @@ QPDFFormFieldObjectHelper list not found 0
381 QPDFFormFieldObjectHelper list found 0 381 QPDFFormFieldObjectHelper list found 0
382 QPDFFormFieldObjectHelper list first too low 0 382 QPDFFormFieldObjectHelper list first too low 0
383 QPDFFormFieldObjectHelper list last too high 0 383 QPDFFormFieldObjectHelper list last too high 0
384 -QPDF detected dangling ref 0  
385 QPDFJob image optimize no pipeline 0 384 QPDFJob image optimize no pipeline 0
386 QPDFJob image optimize no shrink 0 385 QPDFJob image optimize no shrink 0
387 QPDFJob image optimize too small 0 386 QPDFJob image optimize too small 0