Commit 0ac37bc9561f1cf1aca2c55fc0e4702d3febcf75

Authored by m-holger
1 parent f8e6274a

Add new class QPDF::Xref_table

include/qpdf/QPDF.hh
@@ -733,6 +733,7 @@ class QPDF @@ -733,6 +733,7 @@ class QPDF
733 class ParseGuard; 733 class ParseGuard;
734 class Pipe; 734 class Pipe;
735 class JobSetter; 735 class JobSetter;
  736 + class Xref_table;
736 737
737 // For testing only -- do not add to DLL 738 // For testing only -- do not add to DLL
738 static bool test_json_validators(); 739 static bool test_json_validators();
libqpdf/QPDF.cc
@@ -303,7 +303,7 @@ QPDF::registerStreamFilter( @@ -303,7 +303,7 @@ QPDF::registerStreamFilter(
303 void 303 void
304 QPDF::setIgnoreXRefStreams(bool val) 304 QPDF::setIgnoreXRefStreams(bool val)
305 { 305 {
306 - m->ignore_xref_streams = val; 306 + m->xref_table.ignore_streams = val;
307 } 307 }
308 308
309 std::shared_ptr<QPDFLogger> 309 std::shared_ptr<QPDFLogger>
@@ -341,6 +341,7 @@ void @@ -341,6 +341,7 @@ void
341 QPDF::setAttemptRecovery(bool val) 341 QPDF::setAttemptRecovery(bool val)
342 { 342 {
343 m->attempt_recovery = val; 343 m->attempt_recovery = val;
  344 + m->xref_table.attempt_recovery = val;
344 } 345 }
345 346
346 void 347 void
@@ -447,11 +448,11 @@ QPDF::parse(char const* password) @@ -447,11 +448,11 @@ QPDF::parse(char const* password)
447 // 30 characters to leave room for the startxref stuff. 448 // 30 characters to leave room for the startxref stuff.
448 m->file->seek(0, SEEK_END); 449 m->file->seek(0, SEEK_END);
449 qpdf_offset_t end_offset = m->file->tell(); 450 qpdf_offset_t end_offset = m->file->tell();
450 - m->xref_table_max_offset = end_offset; 451 + m->xref_table.max_offset = end_offset;
451 // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic 452 // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
452 // scenarios at least 3 bytes are required. 453 // scenarios at least 3 bytes are required.
453 - if (m->xref_table_max_id > m->xref_table_max_offset / 3) {  
454 - m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3); 454 + if (m->xref_table.max_id > m->xref_table.max_offset / 3) {
  455 + m->xref_table.max_id = static_cast<int>(m->xref_table.max_offset / 3);
455 } 456 }
456 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); 457 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
457 PatternFinder sf(*this, &QPDF::findStartxref); 458 PatternFinder sf(*this, &QPDF::findStartxref);
@@ -482,7 +483,7 @@ QPDF::parse(char const* password) @@ -482,7 +483,7 @@ QPDF::parse(char const* password)
482 } 483 }
483 484
484 initializeEncryption(); 485 initializeEncryption();
485 - m->parsed = true; 486 + m->xref_table.parsed = true;
486 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { 487 if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
487 // QPDFs created from JSON have an empty xref table and no root object yet. 488 // QPDFs created from JSON have an empty xref table and no root object yet.
488 throw damagedPDF("", 0, "unable to find page tree"); 489 throw damagedPDF("", 0, "unable to find page tree");
@@ -526,16 +527,16 @@ QPDF::warn( @@ -526,16 +527,16 @@ QPDF::warn(
526 void 527 void
527 QPDF::setTrailer(QPDFObjectHandle obj) 528 QPDF::setTrailer(QPDFObjectHandle obj)
528 { 529 {
529 - if (m->trailer) { 530 + if (m->xref_table.trailer) {
530 return; 531 return;
531 } 532 }
532 - m->trailer = obj; 533 + m->xref_table.trailer = obj;
533 } 534 }
534 535
535 void 536 void
536 QPDF::reconstruct_xref(QPDFExc& e) 537 QPDF::reconstruct_xref(QPDFExc& e)
537 { 538 {
538 - if (m->reconstructed_xref) { 539 + if (m->xref_table.reconstructed) {
539 // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because 540 // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
540 // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now. 541 // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
541 throw e; 542 throw e;
@@ -550,7 +551,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -550,7 +551,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
550 } 551 }
551 }; 552 };
552 553
553 - m->reconstructed_xref = true; 554 + m->xref_table.reconstructed = true;
554 // We may find more objects, which may contain dangling references. 555 // We may find more objects, which may contain dangling references.
555 m->fixed_dangling_refs = false; 556 m->fixed_dangling_refs = false;
556 557
@@ -583,7 +584,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -583,7 +584,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
583 if ((t2.isInteger()) && (readToken(*m->file, MAX_LEN).isWord("obj"))) { 584 if ((t2.isInteger()) && (readToken(*m->file, MAX_LEN).isWord("obj"))) {
584 int obj = QUtil::string_to_int(t1.getValue().c_str()); 585 int obj = QUtil::string_to_int(t1.getValue().c_str());
585 int gen = QUtil::string_to_int(t2.getValue().c_str()); 586 int gen = QUtil::string_to_int(t2.getValue().c_str());
586 - if (obj <= m->xref_table_max_id) { 587 + if (obj <= m->xref_table.max_id) {
587 insertReconstructedXrefEntry(obj, token_start, gen); 588 insertReconstructedXrefEntry(obj, token_start, gen);
588 } else { 589 } else {
589 warn(damagedPDF( 590 warn(damagedPDF(
@@ -591,7 +592,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -591,7 +592,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
591 } 592 }
592 } 593 }
593 m->file->seek(pos, SEEK_SET); 594 m->file->seek(pos, SEEK_SET);
594 - } else if (!m->trailer && t1.isWord("trailer")) { 595 + } else if (!m->xref_table.trailer && t1.isWord("trailer")) {
595 auto pos = m->file->tell(); 596 auto pos = m->file->tell();
596 QPDFObjectHandle t = readTrailer(); 597 QPDFObjectHandle t = readTrailer();
597 if (!t.isDictionary()) { 598 if (!t.isDictionary()) {
@@ -604,9 +605,9 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -604,9 +605,9 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
604 check_warnings(); 605 check_warnings();
605 m->file->findAndSkipNextEOL(); 606 m->file->findAndSkipNextEOL();
606 } 607 }
607 - m->deleted_objects.clear(); 608 + m->xref_table.deleted_objects.clear();
608 609
609 - if (!m->trailer) { 610 + if (!m->xref_table.trailer) {
610 qpdf_offset_t max_offset{0}; 611 qpdf_offset_t max_offset{0};
611 // If there are any xref streams, take the last one to appear. 612 // If there are any xref streams, take the last one to appear.
612 for (auto const& iter: m->xref_table) { 613 for (auto const& iter: m->xref_table) {
@@ -640,7 +641,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -640,7 +641,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
640 } 641 }
641 } 642 }
642 643
643 - if (!m->trailer) { 644 + if (!m->xref_table.trailer) {
644 // We could check the last encountered object to see if it was an xref stream. If so, we 645 // We could check the last encountered object to see if it was an xref stream. If so, we
645 // could try to get the trailer from there. This may make it possible to recover files with 646 // could try to get the trailer from there. This may make it possible to recover files with
646 // bad startxref pointers even when they have object streams. 647 // bad startxref pointers even when they have object streams.
@@ -653,12 +654,12 @@ QPDF::reconstruct_xref(QPDFExc&amp; e) @@ -653,12 +654,12 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
653 throw damagedPDF("", 0, "unable to find objects while recovering damaged file"); 654 throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
654 } 655 }
655 check_warnings(); 656 check_warnings();
656 - if (!m->parsed) {  
657 - m->parsed = true; 657 + if (!m->xref_table.parsed) {
  658 + m->xref_table.parsed = true;
658 getAllPages(); 659 getAllPages();
659 check_warnings(); 660 check_warnings();
660 if (m->all_pages.empty()) { 661 if (m->all_pages.empty()) {
661 - m->parsed = false; 662 + m->xref_table.parsed = false;
662 throw damagedPDF("", 0, "unable to find any pages while recovering damaged file"); 663 throw damagedPDF("", 0, "unable to find any pages while recovering damaged file");
663 } 664 }
664 } 665 }
@@ -730,16 +731,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -730,16 +731,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
730 } 731 }
731 } 732 }
732 733
733 - if (!m->trailer) { 734 + if (!m->xref_table.trailer) {
734 throw damagedPDF("", 0, "unable to find trailer while reading xref"); 735 throw damagedPDF("", 0, "unable to find trailer while reading xref");
735 } 736 }
736 - int size = m->trailer.getKey("/Size").getIntValueAsInt(); 737 + int size = m->xref_table.trailer.getKey("/Size").getIntValueAsInt();
737 int max_obj = 0; 738 int max_obj = 0;
738 if (!m->xref_table.empty()) { 739 if (!m->xref_table.empty()) {
739 max_obj = m->xref_table.rbegin()->first.getObj(); 740 max_obj = m->xref_table.rbegin()->first.getObj();
740 } 741 }
741 - if (!m->deleted_objects.empty()) {  
742 - max_obj = std::max(max_obj, *(m->deleted_objects.rbegin())); 742 + if (!m->xref_table.deleted_objects.empty()) {
  743 + max_obj = std::max(max_obj, *(m->xref_table.deleted_objects.rbegin()));
743 } 744 }
744 if ((size < 1) || (size - 1 != max_obj)) { 745 if ((size < 1) || (size - 1 != max_obj)) {
745 QTC::TC("qpdf", "QPDF xref size mismatch"); 746 QTC::TC("qpdf", "QPDF xref size mismatch");
@@ -752,7 +753,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) @@ -752,7 +753,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
752 753
753 // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we 754 // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
754 // never depend on its being set. 755 // never depend on its being set.
755 - m->deleted_objects.clear(); 756 + m->xref_table.deleted_objects.clear();
756 757
757 // Make sure we keep only the highest generation for any object. 758 // Make sure we keep only the highest generation for any object.
758 QPDFObjGen last_og{-1, 0}; 759 QPDFObjGen last_og{-1, 0};
@@ -968,7 +969,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -968,7 +969,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
968 for (qpdf_offset_t i = obj; i - num < obj; ++i) { 969 for (qpdf_offset_t i = obj; i - num < obj; ++i) {
969 if (i == 0) { 970 if (i == 0) {
970 // This is needed by checkLinearization() 971 // This is needed by checkLinearization()
971 - m->first_xref_item_offset = m->file->tell(); 972 + m->xref_table.first_item_offset = m->file->tell();
972 } 973 }
973 // For xref_table, these will always be small enough to be ints 974 // For xref_table, these will always be small enough to be ints
974 qpdf_offset_t f1 = 0; 975 qpdf_offset_t f1 = 0;
@@ -1000,21 +1001,21 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -1000,21 +1001,21 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
1000 throw damagedPDF("", "expected trailer dictionary"); 1001 throw damagedPDF("", "expected trailer dictionary");
1001 } 1002 }
1002 1003
1003 - if (!m->trailer) { 1004 + if (!m->xref_table.trailer) {
1004 setTrailer(cur_trailer); 1005 setTrailer(cur_trailer);
1005 1006
1006 - if (!m->trailer.hasKey("/Size")) { 1007 + if (!m->xref_table.trailer.hasKey("/Size")) {
1007 QTC::TC("qpdf", "QPDF trailer lacks size"); 1008 QTC::TC("qpdf", "QPDF trailer lacks size");
1008 throw damagedPDF("trailer", "trailer dictionary lacks /Size key"); 1009 throw damagedPDF("trailer", "trailer dictionary lacks /Size key");
1009 } 1010 }
1010 - if (!m->trailer.getKey("/Size").isInteger()) { 1011 + if (!m->xref_table.trailer.getKey("/Size").isInteger()) {
1011 QTC::TC("qpdf", "QPDF trailer size not integer"); 1012 QTC::TC("qpdf", "QPDF trailer size not integer");
1012 throw damagedPDF("trailer", "/Size key in trailer dictionary is not an integer"); 1013 throw damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");
1013 } 1014 }
1014 } 1015 }
1015 1016
1016 if (cur_trailer.hasKey("/XRefStm")) { 1017 if (cur_trailer.hasKey("/XRefStm")) {
1017 - if (m->ignore_xref_streams) { 1018 + if (m->xref_table.ignore_streams) {
1018 QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); 1019 QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
1019 } else { 1020 } else {
1020 if (cur_trailer.getKey("/XRefStm").isInteger()) { 1021 if (cur_trailer.getKey("/XRefStm").isInteger()) {
@@ -1043,7 +1044,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -1043,7 +1044,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
1043 qpdf_offset_t 1044 qpdf_offset_t
1044 QPDF::read_xrefStream(qpdf_offset_t xref_offset) 1045 QPDF::read_xrefStream(qpdf_offset_t xref_offset)
1045 { 1046 {
1046 - if (!m->ignore_xref_streams) { 1047 + if (!m->xref_table.ignore_streams) {
1047 QPDFObjGen x_og; 1048 QPDFObjGen x_og;
1048 QPDFObjectHandle xref_obj; 1049 QPDFObjectHandle xref_obj;
1049 try { 1050 try {
@@ -1238,14 +1239,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1238,14 +1239,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1238 // object record, in which case the generation number appears as the third field. 1239 // object record, in which case the generation number appears as the third field.
1239 if (saw_first_compressed_object) { 1240 if (saw_first_compressed_object) {
1240 if (fields[0] != 2) { 1241 if (fields[0] != 2) {
1241 - m->uncompressed_after_compressed = true; 1242 + m->xref_table.uncompressed_after_compressed = true;
1242 } 1243 }
1243 } else if (fields[0] == 2) { 1244 } else if (fields[0] == 2) {
1244 saw_first_compressed_object = true; 1245 saw_first_compressed_object = true;
1245 } 1246 }
1246 if (obj == 0) { 1247 if (obj == 0) {
1247 // This is needed by checkLinearization() 1248 // This is needed by checkLinearization()
1248 - m->first_xref_item_offset = xref_offset; 1249 + m->xref_table.first_item_offset = xref_offset;
1249 } else if (fields[0] == 0) { 1250 } else if (fields[0] == 0) {
1250 // Ignore fields[2], which we don't care about in this case. This works around the 1251 // Ignore fields[2], which we don't care about in this case. This works around the
1251 // issue of some PDF files that put invalid values, like -1, here for deleted 1252 // issue of some PDF files that put invalid values, like -1, here for deleted
@@ -1258,7 +1259,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj) @@ -1258,7 +1259,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1258 } 1259 }
1259 } 1260 }
1260 1261
1261 - if (!m->trailer) { 1262 + if (!m->xref_table.trailer) {
1262 setTrailer(dict); 1263 setTrailer(dict);
1263 } 1264 }
1264 1265
@@ -1284,12 +1285,12 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1284,12 +1285,12 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2)
1284 // If there is already an entry for this object and generation in the table, it means that a 1285 // If there is already an entry for this object and generation in the table, it means that a
1285 // later xref table has registered this object. Disregard this one. 1286 // later xref table has registered this object. Disregard this one.
1286 1287
1287 - if (obj > m->xref_table_max_id) { 1288 + if (obj > m->xref_table.max_id) {
1288 // ignore impossibly large object ids or object ids > Size. 1289 // ignore impossibly large object ids or object ids > Size.
1289 return; 1290 return;
1290 } 1291 }
1291 1292
1292 - if (m->deleted_objects.count(obj)) { 1293 + if (m->xref_table.deleted_objects.count(obj)) {
1293 QTC::TC("qpdf", "QPDF xref deleted object"); 1294 QTC::TC("qpdf", "QPDF xref deleted object");
1294 return; 1295 return;
1295 } 1296 }
@@ -1326,7 +1327,7 @@ void @@ -1326,7 +1327,7 @@ void
1326 QPDF::insertFreeXrefEntry(QPDFObjGen og) 1327 QPDF::insertFreeXrefEntry(QPDFObjGen og)
1327 { 1328 {
1328 if (!m->xref_table.count(og)) { 1329 if (!m->xref_table.count(og)) {
1329 - m->deleted_objects.insert(og.getObj()); 1330 + m->xref_table.deleted_objects.insert(og.getObj());
1330 } 1331 }
1331 } 1332 }
1332 1333
@@ -1335,13 +1336,13 @@ QPDF::insertFreeXrefEntry(QPDFObjGen og) @@ -1335,13 +1336,13 @@ QPDF::insertFreeXrefEntry(QPDFObjGen og)
1335 void 1336 void
1336 QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2) 1337 QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2)
1337 { 1338 {
1338 - if (!(obj > 0 && obj <= m->xref_table_max_id && 0 <= f2 && f2 < 65535)) { 1339 + if (!(obj > 0 && obj <= m->xref_table.max_id && 0 <= f2 && f2 < 65535)) {
1339 QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); 1340 QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
1340 return; 1341 return;
1341 } 1342 }
1342 1343
1343 QPDFObjGen og(obj, f2); 1344 QPDFObjGen og(obj, f2);
1344 - if (!m->deleted_objects.count(obj)) { 1345 + if (!m->xref_table.deleted_objects.count(obj)) {
1345 // deleted_objects stores the uncompressed objects removed from the xref table at the start 1346 // deleted_objects stores the uncompressed objects removed from the xref table at the start
1346 // of recovery. 1347 // of recovery.
1347 QTC::TC("qpdf", "QPDF xref overwrite object"); 1348 QTC::TC("qpdf", "QPDF xref overwrite object");
@@ -1381,11 +1382,11 @@ QPDF::showXRefTable() @@ -1381,11 +1382,11 @@ QPDF::showXRefTable()
1381 bool 1382 bool
1382 QPDF::resolveXRefTable() 1383 QPDF::resolveXRefTable()
1383 { 1384 {
1384 - bool may_change = !m->reconstructed_xref; 1385 + bool may_change = !m->xref_table.reconstructed;
1385 for (auto& iter: m->xref_table) { 1386 for (auto& iter: m->xref_table) {
1386 if (isUnresolved(iter.first)) { 1387 if (isUnresolved(iter.first)) {
1387 resolve(iter.first); 1388 resolve(iter.first);
1388 - if (may_change && m->reconstructed_xref) { 1389 + if (may_change && m->xref_table.reconstructed) {
1389 return false; 1390 return false;
1390 } 1391 }
1391 } 1392 }
@@ -1958,7 +1959,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -1958,7 +1959,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1958 1959
1959 int num = QUtil::string_to_int(tnum.getValue().c_str()); 1960 int num = QUtil::string_to_int(tnum.getValue().c_str());
1960 long long offset = QUtil::string_to_int(toffset.getValue().c_str()); 1961 long long offset = QUtil::string_to_int(toffset.getValue().c_str());
1961 - if (num > m->xref_table_max_id) { 1962 + if (num > m->xref_table.max_id) {
1962 continue; 1963 continue;
1963 } 1964 }
1964 if (num == obj_stream_number) { 1965 if (num == obj_stream_number) {
@@ -2101,7 +2102,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf) @@ -2101,7 +2102,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2101 if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) { 2102 if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
2102 return iter->second.object; 2103 return iter->second.object;
2103 } 2104 }
2104 - if (m->xref_table.count(og) || !m->parsed) { 2105 + if (m->xref_table.count(og) || !m->xref_table.parsed) {
2105 return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object; 2106 return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;
2106 } 2107 }
2107 if (parse_pdf) { 2108 if (parse_pdf) {
@@ -2117,8 +2118,9 @@ QPDF::getObjectForJSON(int id, int gen) @@ -2117,8 +2118,9 @@ QPDF::getObjectForJSON(int id, int gen)
2117 auto [it, inserted] = m->obj_cache.try_emplace(og); 2118 auto [it, inserted] = m->obj_cache.try_emplace(og);
2118 auto& obj = it->second.object; 2119 auto& obj = it->second.object;
2119 if (inserted) { 2120 if (inserted) {
2120 - obj = (m->parsed && !m->xref_table.count(og)) ? QPDF_Null::create(this, og)  
2121 - : QPDF_Unresolved::create(this, og); 2121 + obj = (m->xref_table.parsed && !m->xref_table.count(og))
  2122 + ? QPDF_Null::create(this, og)
  2123 + : QPDF_Unresolved::create(this, og);
2122 } 2124 }
2123 return obj; 2125 return obj;
2124 } 2126 }
@@ -2128,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const&amp; og) @@ -2128,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const&amp; og)
2128 { 2130 {
2129 if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) { 2131 if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
2130 return {it->second.object}; 2132 return {it->second.object};
2131 - } else if (m->parsed && !m->xref_table.count(og)) { 2133 + } else if (m->xref_table.parsed && !m->xref_table.count(og)) {
2132 return QPDF_Null::create(); 2134 return QPDF_Null::create();
2133 } else { 2135 } else {
2134 auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1); 2136 auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1);
@@ -2526,13 +2528,13 @@ QPDF::getExtensionLevel() @@ -2526,13 +2528,13 @@ QPDF::getExtensionLevel()
2526 QPDFObjectHandle 2528 QPDFObjectHandle
2527 QPDF::getTrailer() 2529 QPDF::getTrailer()
2528 { 2530 {
2529 - return m->trailer; 2531 + return m->xref_table.trailer;
2530 } 2532 }
2531 2533
2532 QPDFObjectHandle 2534 QPDFObjectHandle
2533 QPDF::getRoot() 2535 QPDF::getRoot()
2534 { 2536 {
2535 - QPDFObjectHandle root = m->trailer.getKey("/Root"); 2537 + QPDFObjectHandle root = m->xref_table.trailer.getKey("/Root");
2536 if (!root.isDictionary()) { 2538 if (!root.isDictionary()) {
2537 throw damagedPDF("", 0, "unable to find /Root dictionary"); 2539 throw damagedPDF("", 0, "unable to find /Root dictionary");
2538 } else if ( 2540 } else if (
@@ -2554,7 +2556,7 @@ QPDF::getXRefTable() @@ -2554,7 +2556,7 @@ QPDF::getXRefTable()
2554 std::map<QPDFObjGen, QPDFXRefEntry> const& 2556 std::map<QPDFObjGen, QPDFXRefEntry> const&
2555 QPDF::getXRefTableInternal() 2557 QPDF::getXRefTableInternal()
2556 { 2558 {
2557 - if (!m->parsed) { 2559 + if (!m->xref_table.parsed) {
2558 throw std::logic_error("QPDF::getXRefTable called before parsing."); 2560 throw std::logic_error("QPDF::getXRefTable called before parsing.");
2559 } 2561 }
2560 2562
@@ -2604,14 +2606,14 @@ QPDF::getCompressibleObjGens() @@ -2604,14 +2606,14 @@ QPDF::getCompressibleObjGens()
2604 // iterating through the xref table since it avoids preserving orphaned items. 2606 // iterating through the xref table since it avoids preserving orphaned items.
2605 2607
2606 // Exclude encryption dictionary, if any 2608 // Exclude encryption dictionary, if any
2607 - QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); 2609 + QPDFObjectHandle encryption_dict = m->xref_table.trailer.getKey("/Encrypt");
2608 QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); 2610 QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
2609 2611
2610 const size_t max_obj = getObjectCount(); 2612 const size_t max_obj = getObjectCount();
2611 std::vector<bool> visited(max_obj, false); 2613 std::vector<bool> visited(max_obj, false);
2612 std::vector<QPDFObjectHandle> queue; 2614 std::vector<QPDFObjectHandle> queue;
2613 queue.reserve(512); 2615 queue.reserve(512);
2614 - queue.push_back(m->trailer); 2616 + queue.push_back(m->xref_table.trailer);
2615 std::vector<T> result; 2617 std::vector<T> result;
2616 if constexpr (std::is_same_v<T, QPDFObjGen>) { 2618 if constexpr (std::is_same_v<T, QPDFObjGen>) {
2617 result.reserve(m->obj_cache.size()); 2619 result.reserve(m->obj_cache.size());
libqpdf/QPDF_encryption.cc
@@ -727,7 +727,7 @@ QPDF::initializeEncryption() @@ -727,7 +727,7 @@ QPDF::initializeEncryption()
727 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption 727 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
728 // dictionary. 728 // dictionary.
729 729
730 - if (!m->trailer.hasKey("/Encrypt")) { 730 + if (!m->xref_table.trailer.hasKey("/Encrypt")) {
731 return; 731 return;
732 } 732 }
733 733
@@ -736,7 +736,7 @@ QPDF::initializeEncryption() @@ -736,7 +736,7 @@ QPDF::initializeEncryption()
736 m->encp->encrypted = true; 736 m->encp->encrypted = true;
737 737
738 std::string id1; 738 std::string id1;
739 - QPDFObjectHandle id_obj = m->trailer.getKey("/ID"); 739 + QPDFObjectHandle id_obj = m->xref_table.trailer.getKey("/ID");
740 if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { 740 if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
741 id1 = id_obj.getArrayItem(0).getStringValue(); 741 id1 = id_obj.getArrayItem(0).getStringValue();
742 } else { 742 } else {
@@ -745,7 +745,7 @@ QPDF::initializeEncryption() @@ -745,7 +745,7 @@ QPDF::initializeEncryption()
745 warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); 745 warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
746 } 746 }
747 747
748 - QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); 748 + QPDFObjectHandle encryption_dict = m->xref_table.trailer.getKey("/Encrypt");
749 if (!encryption_dict.isDictionary()) { 749 if (!encryption_dict.isDictionary()) {
750 throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary"); 750 throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary");
751 } 751 }
libqpdf/QPDF_json.cc
@@ -593,8 +593,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -593,8 +593,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
593 this->saw_value = true; 593 this->saw_value = true;
594 // The trailer must be a dictionary, so we can use setNextStateIfDictionary. 594 // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
595 if (setNextStateIfDictionary("trailer.value", value, st_object)) { 595 if (setNextStateIfDictionary("trailer.value", value, st_object)) {
596 - this->pdf.m->trailer = makeObject(value);  
597 - setObjectDescription(this->pdf.m->trailer, value); 596 + pdf.m->xref_table.trailer = makeObject(value);
  597 + setObjectDescription(this->pdf.m->xref_table.trailer, value);
598 } 598 }
599 } else if (key == "stream") { 599 } else if (key == "stream") {
600 // Don't need to set saw_stream here since there's already an error. 600 // Don't need to set saw_stream here since there's already an error.
libqpdf/QPDF_linearization.cc
@@ -461,12 +461,11 @@ QPDF::checkLinearizationInternal() @@ -461,12 +461,11 @@ QPDF::checkLinearizationInternal()
461 break; 461 break;
462 } 462 }
463 } 463 }
464 - if (m->file->tell() != m->first_xref_item_offset) { 464 + if (m->file->tell() != m->xref_table.first_item_offset) {
465 QTC::TC("qpdf", "QPDF err /T mismatch"); 465 QTC::TC("qpdf", "QPDF err /T mismatch");
466 linearizationWarning( 466 linearizationWarning(
467 - "space before first xref item (/T) mismatch "  
468 - "(computed = " +  
469 - std::to_string(m->first_xref_item_offset) + 467 + "space before first xref item (/T) mismatch (computed = " +
  468 + std::to_string(m->xref_table.first_item_offset) +
470 "; file = " + std::to_string(m->file->tell())); 469 "; file = " + std::to_string(m->file->tell()));
471 } 470 }
472 471
@@ -477,7 +476,7 @@ QPDF::checkLinearizationInternal() @@ -477,7 +476,7 @@ QPDF::checkLinearizationInternal()
477 // compressed objects are supposed to be at the end of the containing xref section if any object 476 // compressed objects are supposed to be at the end of the containing xref section if any object
478 // streams are in use. 477 // streams are in use.
479 478
480 - if (m->uncompressed_after_compressed) { 479 + if (m->xref_table.uncompressed_after_compressed) {
481 linearizationWarning("linearized file contains an uncompressed object after a compressed " 480 linearizationWarning("linearized file contains an uncompressed object after a compressed "
482 "one in a cross-reference stream"); 481 "one in a cross-reference stream");
483 } 482 }
libqpdf/QPDF_optimization.cc
@@ -115,13 +115,13 @@ QPDF::optimize_internal( @@ -115,13 +115,13 @@ QPDF::optimize_internal(
115 } 115 }
116 116
117 // Traverse document-level items 117 // Traverse document-level items
118 - for (auto const& key: m->trailer.getKeys()) { 118 + for (auto const& key: m->xref_table.trailer.getKeys()) {
119 if (key == "/Root") { 119 if (key == "/Root") {
120 // handled separately 120 // handled separately
121 } else { 121 } else {
122 updateObjectMaps( 122 updateObjectMaps(
123 ObjUser(ObjUser::ou_trailer_key, key), 123 ObjUser(ObjUser::ou_trailer_key, key),
124 - m->trailer.getKey(key), 124 + m->xref_table.trailer.getKey(key),
125 skip_stream_parameters); 125 skip_stream_parameters);
126 } 126 }
127 } 127 }
@@ -169,13 +169,13 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) @@ -169,13 +169,13 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
169 // values for them. 169 // values for them.
170 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; 170 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
171 pushInheritedAttributesToPageInternal( 171 pushInheritedAttributesToPageInternal(
172 - m->trailer.getKey("/Root").getKey("/Pages"), 172 + m->xref_table.trailer.getKey("/Root").getKey("/Pages"),
173 key_ancestors, 173 key_ancestors,
174 allow_changes, 174 allow_changes,
175 warn_skipped_keys); 175 warn_skipped_keys);
176 if (!key_ancestors.empty()) { 176 if (!key_ancestors.empty()) {
177 - throw std::logic_error("key_ancestors not empty after"  
178 - " pushing inherited attributes to pages"); 177 + throw std::logic_error(
  178 + "key_ancestors not empty after pushing inherited attributes to pages");
179 } 179 }
180 m->pushed_inherited_attributes_to_pages = true; 180 m->pushed_inherited_attributes_to_pages = true;
181 m->ever_pushed_inherited_attributes_to_pages = true; 181 m->ever_pushed_inherited_attributes_to_pages = true;
libqpdf/qpdf/QPDF_private.hh
@@ -3,6 +3,25 @@ @@ -3,6 +3,25 @@
3 3
4 #include <qpdf/QPDF.hh> 4 #include <qpdf/QPDF.hh>
5 5
  6 +// Xref_table encapsulates the pdf's xref table and trailer.
  7 +class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry>
  8 +{
  9 + public:
  10 + QPDFObjectHandle trailer;
  11 + bool reconstructed{false};
  12 + // Various tables are indexed by object id, with potential size id + 1
  13 + int max_id{std::numeric_limits<int>::max() - 1};
  14 + qpdf_offset_t max_offset{0};
  15 + std::set<int> deleted_objects;
  16 + bool ignore_streams{false};
  17 + bool parsed{false};
  18 + bool attempt_recovery{true};
  19 +
  20 + // Linearization data
  21 + bool uncompressed_after_compressed{false};
  22 + qpdf_offset_t first_item_offset{0}; // actual value from file
  23 +};
  24 +
6 // Writer class is restricted to QPDFWriter so that only it can call certain methods. 25 // Writer class is restricted to QPDFWriter so that only it can call certain methods.
7 class QPDF::Writer 26 class QPDF::Writer
8 { 27 {
@@ -459,21 +478,15 @@ class QPDF::Members @@ -459,21 +478,15 @@ class QPDF::Members
459 std::shared_ptr<InputSource> file; 478 std::shared_ptr<InputSource> file;
460 std::string last_object_description; 479 std::string last_object_description;
461 bool provided_password_is_hex_key{false}; 480 bool provided_password_is_hex_key{false};
462 - bool ignore_xref_streams{false};  
463 bool suppress_warnings{false}; 481 bool suppress_warnings{false};
464 size_t max_warnings{0}; 482 size_t max_warnings{0};
465 bool attempt_recovery{true}; 483 bool attempt_recovery{true};
466 bool check_mode{false}; 484 bool check_mode{false};
467 std::shared_ptr<EncryptionParameters> encp; 485 std::shared_ptr<EncryptionParameters> encp;
468 std::string pdf_version; 486 std::string pdf_version;
469 - std::map<QPDFObjGen, QPDFXRefEntry> xref_table;  
470 - // Various tables are indexed by object id, with potential size id + 1  
471 - int xref_table_max_id{std::numeric_limits<int>::max() - 1};  
472 - qpdf_offset_t xref_table_max_offset{0};  
473 - std::set<int> deleted_objects; 487 + Xref_table xref_table;
474 std::map<QPDFObjGen, ObjCache> obj_cache; 488 std::map<QPDFObjGen, ObjCache> obj_cache;
475 std::set<QPDFObjGen> resolving; 489 std::set<QPDFObjGen> resolving;
476 - QPDFObjectHandle trailer;  
477 std::vector<QPDFObjectHandle> all_pages; 490 std::vector<QPDFObjectHandle> all_pages;
478 bool invalid_page_found{false}; 491 bool invalid_page_found{false};
479 std::map<QPDFObjGen, int> pageobj_to_pages_pos; 492 std::map<QPDFObjGen, int> pageobj_to_pages_pos;
@@ -485,16 +498,12 @@ class QPDF::Members @@ -485,16 +498,12 @@ class QPDF::Members
485 std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams; 498 std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams;
486 // copied_stream_data_provider is owned by copied_streams 499 // copied_stream_data_provider is owned by copied_streams
487 CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; 500 CopiedStreamDataProvider* copied_stream_data_provider{nullptr};
488 - bool reconstructed_xref{false};  
489 bool fixed_dangling_refs{false}; 501 bool fixed_dangling_refs{false};
490 bool immediate_copy_from{false}; 502 bool immediate_copy_from{false};
491 bool in_parse{false}; 503 bool in_parse{false};
492 - bool parsed{false};  
493 std::set<int> resolved_object_streams; 504 std::set<int> resolved_object_streams;
494 505
495 // Linearization data 506 // Linearization data
496 - qpdf_offset_t first_xref_item_offset{0}; // actual value from file  
497 - bool uncompressed_after_compressed{false};  
498 bool linearization_warnings{false}; 507 bool linearization_warnings{false};
499 508
500 // Linearization parameter dictionary and hint table data: may be read from file or computed 509 // Linearization parameter dictionary and hint table data: may be read from file or computed