Commit 6b9eb14c76cdbee063051d0ba987e8a2961a4139

Authored by m-holger
1 parent c0020cb1

Remove Xref_table::deleted_objects

libqpdf/QPDF.cc
@@ -634,8 +634,6 @@ QPDF::Xref_table::reconstruct(QPDFExc& e) @@ -634,8 +634,6 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
634 check_warnings(); 634 check_warnings();
635 } 635 }
636 636
637 - deleted_objects.clear();  
638 -  
639 if (!trailer_) { 637 if (!trailer_) {
640 qpdf_offset_t max_offset{0}; 638 qpdf_offset_t max_offset{0};
641 // If there are any xref streams, take the last one to appear. 639 // If there are any xref streams, take the last one to appear.
@@ -764,32 +762,15 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) @@ -764,32 +762,15 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
764 if (!trailer_) { 762 if (!trailer_) {
765 throw damaged_pdf("unable to find trailer while reading xref"); 763 throw damaged_pdf("unable to find trailer while reading xref");
766 } 764 }
  765 + int size = trailer_.getKey("/Size").getIntValueAsInt();
  766 +
  767 + if (size < 3) {
  768 + throw damaged_pdf("too few objects - file can't have a page tree");
  769 + }
767 770
768 // We are no longer reporting what the highest id in the xref table is. I don't think it adds 771 // We are no longer reporting what the highest id in the xref table is. I don't think it adds
769 // anything. If we want to report more detail, we should report the total number of missing 772 // anything. If we want to report more detail, we should report the total number of missing
770 // entries, including missing entries before the last actual entry. 773 // entries, including missing entries before the last actual entry.
771 - //  
772 - // int size = trailer_.getKey("/Size").getIntValueAsInt();  
773 - // int max_obj = 0;  
774 - // if (!table.empty()) {  
775 - // max_obj = table.rbegin()->first.getObj();  
776 - // }  
777 - // if (!deleted_objects.empty()) {  
778 - // max_obj = std::max(max_obj, *deleted_objects.rbegin());  
779 - // }  
780 - // if ((size < 1) || (size - 1 != max_obj)) {  
781 - // QTC::TC("qpdf", "QPDF xref size mismatch");  
782 - // warn_damaged(  
783 - // "reported number of objects (" + std::to_string(size) +  
784 - // ") is not one plus the highest object number (" + std::to_string(max_obj) + ")");  
785 - // }  
786 -  
787 - // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we  
788 - // never depend on its being set.  
789 - deleted_objects.clear();  
790 -  
791 - // Make sure we keep only the highest generation for any object.  
792 - // No longer needed as compliance is guaranteed by vector.  
793 } 774 }
794 775
795 QPDF::Xref_table::Subsection 776 QPDF::Xref_table::Subsection
@@ -1353,8 +1334,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1353,8 +1334,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1353 } 1334 }
1354 1335
1355 auto& entry = table[static_cast<size_t>(obj)]; 1336 auto& entry = table[static_cast<size_t>(obj)];
  1337 + auto old_type = entry.entry.getType();
1356 1338
1357 - if (deleted_objects.count(obj)) { 1339 + if (!old_type && entry.gen > 0) {
  1340 + // At the moment we are processing the updates last to first and therefore the gen doesn't
  1341 + // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need
  1342 + // to be revisited when we want to support incremental updates or more comprhensive
  1343 + // checking.
1358 QTC::TC("qpdf", "QPDF xref deleted object"); 1344 QTC::TC("qpdf", "QPDF xref deleted object");
1359 return; 1345 return;
1360 } 1346 }
@@ -1365,7 +1351,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1365,7 +1351,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1365 return; 1351 return;
1366 } 1352 }
1367 1353
1368 - if (entry.entry.getType() && entry.gen >= new_gen) { 1354 + if (old_type && entry.gen >= new_gen) {
1369 QTC::TC("qpdf", "QPDF xref reused object"); 1355 QTC::TC("qpdf", "QPDF xref reused object");
1370 return; 1356 return;
1371 } 1357 }
@@ -1391,8 +1377,15 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1391,8 +1377,15 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1391 void 1377 void
1392 QPDF::Xref_table::insert_free(QPDFObjGen og) 1378 QPDF::Xref_table::insert_free(QPDFObjGen og)
1393 { 1379 {
1394 - if (!type(og)) {  
1395 - deleted_objects.insert(og.getObj()); 1380 + // At the moment we are processing the updates last to first and therefore the gen doesn't
  1381 + // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be
  1382 + // revisited when we want to support incremental updates or more comprhensive checking.
  1383 + if (og.getObj() < 1) {
  1384 + return;
  1385 + }
  1386 + size_t id = static_cast<size_t>(og.getObj());
  1387 + if (id < table.size() && !type(id)) {
  1388 + table[id] = {1, {}};
1396 } 1389 }
1397 } 1390 }
1398 1391
libqpdf/qpdf/QPDF_private.hh
@@ -45,6 +45,16 @@ class QPDF::Xref_table @@ -45,6 +45,16 @@ class QPDF::Xref_table
45 } 45 }
46 46
47 // Returns 0 if og is not in table. 47 // Returns 0 if og is not in table.
  48 + int
  49 + type(size_t id) const
  50 + {
  51 + if (id >= table.size()) {
  52 + return 0;
  53 + }
  54 + return table[id].entry.getType();
  55 + }
  56 +
  57 + // Returns 0 if og is not in table.
48 qpdf_offset_t 58 qpdf_offset_t
49 offset(QPDFObjGen og) const 59 offset(QPDFObjGen og) const
50 { 60 {
@@ -175,7 +185,6 @@ class QPDF::Xref_table @@ -175,7 +185,6 @@ class QPDF::Xref_table
175 } 185 }
176 186
177 // Methods to insert table entries 187 // Methods to insert table entries
178 - void insert_reconstructed(int obj, qpdf_offset_t f1, int f2);  
179 void insert(int obj, int f0, qpdf_offset_t f1, int f2); 188 void insert(int obj, int f0, qpdf_offset_t f1, int f2);
180 void insert_free(QPDFObjGen); 189 void insert_free(QPDFObjGen);
181 190
@@ -207,7 +216,6 @@ class QPDF::Xref_table @@ -207,7 +216,6 @@ class QPDF::Xref_table
207 bool attempt_recovery_{true}; 216 bool attempt_recovery_{true};
208 bool initialized_{false}; 217 bool initialized_{false};
209 bool ignore_streams_{false}; 218 bool ignore_streams_{false};
210 - std::set<int> deleted_objects;  
211 bool reconstructed_{false}; 219 bool reconstructed_{false};
212 // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids 220 // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
213 // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the 221 // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the