Commit 0ac37bc9561f1cf1aca2c55fc0e4702d3febcf75
1 parent
f8e6274a
Add new class QPDF::Xref_table
Showing
7 changed files
with
85 additions
and
74 deletions
include/qpdf/QPDF.hh
libqpdf/QPDF.cc
| ... | ... | @@ -303,7 +303,7 @@ QPDF::registerStreamFilter( |
| 303 | 303 | void |
| 304 | 304 | QPDF::setIgnoreXRefStreams(bool val) |
| 305 | 305 | { |
| 306 | - m->ignore_xref_streams = val; | |
| 306 | + m->xref_table.ignore_streams = val; | |
| 307 | 307 | } |
| 308 | 308 | |
| 309 | 309 | std::shared_ptr<QPDFLogger> |
| ... | ... | @@ -341,6 +341,7 @@ void |
| 341 | 341 | QPDF::setAttemptRecovery(bool val) |
| 342 | 342 | { |
| 343 | 343 | m->attempt_recovery = val; |
| 344 | + m->xref_table.attempt_recovery = val; | |
| 344 | 345 | } |
| 345 | 346 | |
| 346 | 347 | void |
| ... | ... | @@ -447,11 +448,11 @@ QPDF::parse(char const* password) |
| 447 | 448 | // 30 characters to leave room for the startxref stuff. |
| 448 | 449 | m->file->seek(0, SEEK_END); |
| 449 | 450 | qpdf_offset_t end_offset = m->file->tell(); |
| 450 | - m->xref_table_max_offset = end_offset; | |
| 451 | + m->xref_table.max_offset = end_offset; | |
| 451 | 452 | // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic |
| 452 | 453 | // scenarios at least 3 bytes are required. |
| 453 | - if (m->xref_table_max_id > m->xref_table_max_offset / 3) { | |
| 454 | - m->xref_table_max_id = static_cast<int>(m->xref_table_max_offset / 3); | |
| 454 | + if (m->xref_table.max_id > m->xref_table.max_offset / 3) { | |
| 455 | + m->xref_table.max_id = static_cast<int>(m->xref_table.max_offset / 3); | |
| 455 | 456 | } |
| 456 | 457 | qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); |
| 457 | 458 | PatternFinder sf(*this, &QPDF::findStartxref); |
| ... | ... | @@ -482,7 +483,7 @@ QPDF::parse(char const* password) |
| 482 | 483 | } |
| 483 | 484 | |
| 484 | 485 | initializeEncryption(); |
| 485 | - m->parsed = true; | |
| 486 | + m->xref_table.parsed = true; | |
| 486 | 487 | if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { |
| 487 | 488 | // QPDFs created from JSON have an empty xref table and no root object yet. |
| 488 | 489 | throw damagedPDF("", 0, "unable to find page tree"); |
| ... | ... | @@ -526,16 +527,16 @@ QPDF::warn( |
| 526 | 527 | void |
| 527 | 528 | QPDF::setTrailer(QPDFObjectHandle obj) |
| 528 | 529 | { |
| 529 | - if (m->trailer) { | |
| 530 | + if (m->xref_table.trailer) { | |
| 530 | 531 | return; |
| 531 | 532 | } |
| 532 | - m->trailer = obj; | |
| 533 | + m->xref_table.trailer = obj; | |
| 533 | 534 | } |
| 534 | 535 | |
| 535 | 536 | void |
| 536 | 537 | QPDF::reconstruct_xref(QPDFExc& e) |
| 537 | 538 | { |
| 538 | - if (m->reconstructed_xref) { | |
| 539 | + if (m->xref_table.reconstructed) { | |
| 539 | 540 | // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because |
| 540 | 541 | // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now. |
| 541 | 542 | throw e; |
| ... | ... | @@ -550,7 +551,7 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 550 | 551 | } |
| 551 | 552 | }; |
| 552 | 553 | |
| 553 | - m->reconstructed_xref = true; | |
| 554 | + m->xref_table.reconstructed = true; | |
| 554 | 555 | // We may find more objects, which may contain dangling references. |
| 555 | 556 | m->fixed_dangling_refs = false; |
| 556 | 557 | |
| ... | ... | @@ -583,7 +584,7 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 583 | 584 | if ((t2.isInteger()) && (readToken(*m->file, MAX_LEN).isWord("obj"))) { |
| 584 | 585 | int obj = QUtil::string_to_int(t1.getValue().c_str()); |
| 585 | 586 | int gen = QUtil::string_to_int(t2.getValue().c_str()); |
| 586 | - if (obj <= m->xref_table_max_id) { | |
| 587 | + if (obj <= m->xref_table.max_id) { | |
| 587 | 588 | insertReconstructedXrefEntry(obj, token_start, gen); |
| 588 | 589 | } else { |
| 589 | 590 | warn(damagedPDF( |
| ... | ... | @@ -591,7 +592,7 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 591 | 592 | } |
| 592 | 593 | } |
| 593 | 594 | m->file->seek(pos, SEEK_SET); |
| 594 | - } else if (!m->trailer && t1.isWord("trailer")) { | |
| 595 | + } else if (!m->xref_table.trailer && t1.isWord("trailer")) { | |
| 595 | 596 | auto pos = m->file->tell(); |
| 596 | 597 | QPDFObjectHandle t = readTrailer(); |
| 597 | 598 | if (!t.isDictionary()) { |
| ... | ... | @@ -604,9 +605,9 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 604 | 605 | check_warnings(); |
| 605 | 606 | m->file->findAndSkipNextEOL(); |
| 606 | 607 | } |
| 607 | - m->deleted_objects.clear(); | |
| 608 | + m->xref_table.deleted_objects.clear(); | |
| 608 | 609 | |
| 609 | - if (!m->trailer) { | |
| 610 | + if (!m->xref_table.trailer) { | |
| 610 | 611 | qpdf_offset_t max_offset{0}; |
| 611 | 612 | // If there are any xref streams, take the last one to appear. |
| 612 | 613 | for (auto const& iter: m->xref_table) { |
| ... | ... | @@ -640,7 +641,7 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 640 | 641 | } |
| 641 | 642 | } |
| 642 | 643 | |
| 643 | - if (!m->trailer) { | |
| 644 | + if (!m->xref_table.trailer) { | |
| 644 | 645 | // We could check the last encountered object to see if it was an xref stream. If so, we |
| 645 | 646 | // could try to get the trailer from there. This may make it possible to recover files with |
| 646 | 647 | // bad startxref pointers even when they have object streams. |
| ... | ... | @@ -653,12 +654,12 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 653 | 654 | throw damagedPDF("", 0, "unable to find objects while recovering damaged file"); |
| 654 | 655 | } |
| 655 | 656 | check_warnings(); |
| 656 | - if (!m->parsed) { | |
| 657 | - m->parsed = true; | |
| 657 | + if (!m->xref_table.parsed) { | |
| 658 | + m->xref_table.parsed = true; | |
| 658 | 659 | getAllPages(); |
| 659 | 660 | check_warnings(); |
| 660 | 661 | if (m->all_pages.empty()) { |
| 661 | - m->parsed = false; | |
| 662 | + m->xref_table.parsed = false; | |
| 662 | 663 | throw damagedPDF("", 0, "unable to find any pages while recovering damaged file"); |
| 663 | 664 | } |
| 664 | 665 | } |
| ... | ... | @@ -730,16 +731,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset) |
| 730 | 731 | } |
| 731 | 732 | } |
| 732 | 733 | |
| 733 | - if (!m->trailer) { | |
| 734 | + if (!m->xref_table.trailer) { | |
| 734 | 735 | throw damagedPDF("", 0, "unable to find trailer while reading xref"); |
| 735 | 736 | } |
| 736 | - int size = m->trailer.getKey("/Size").getIntValueAsInt(); | |
| 737 | + int size = m->xref_table.trailer.getKey("/Size").getIntValueAsInt(); | |
| 737 | 738 | int max_obj = 0; |
| 738 | 739 | if (!m->xref_table.empty()) { |
| 739 | 740 | max_obj = m->xref_table.rbegin()->first.getObj(); |
| 740 | 741 | } |
| 741 | - if (!m->deleted_objects.empty()) { | |
| 742 | - max_obj = std::max(max_obj, *(m->deleted_objects.rbegin())); | |
| 742 | + if (!m->xref_table.deleted_objects.empty()) { | |
| 743 | + max_obj = std::max(max_obj, *(m->xref_table.deleted_objects.rbegin())); | |
| 743 | 744 | } |
| 744 | 745 | if ((size < 1) || (size - 1 != max_obj)) { |
| 745 | 746 | QTC::TC("qpdf", "QPDF xref size mismatch"); |
| ... | ... | @@ -752,7 +753,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) |
| 752 | 753 | |
| 753 | 754 | // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we |
| 754 | 755 | // never depend on its being set. |
| 755 | - m->deleted_objects.clear(); | |
| 756 | + m->xref_table.deleted_objects.clear(); | |
| 756 | 757 | |
| 757 | 758 | // Make sure we keep only the highest generation for any object. |
| 758 | 759 | QPDFObjGen last_og{-1, 0}; |
| ... | ... | @@ -968,7 +969,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 968 | 969 | for (qpdf_offset_t i = obj; i - num < obj; ++i) { |
| 969 | 970 | if (i == 0) { |
| 970 | 971 | // This is needed by checkLinearization() |
| 971 | - m->first_xref_item_offset = m->file->tell(); | |
| 972 | + m->xref_table.first_item_offset = m->file->tell(); | |
| 972 | 973 | } |
| 973 | 974 | // For xref_table, these will always be small enough to be ints |
| 974 | 975 | qpdf_offset_t f1 = 0; |
| ... | ... | @@ -1000,21 +1001,21 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 1000 | 1001 | throw damagedPDF("", "expected trailer dictionary"); |
| 1001 | 1002 | } |
| 1002 | 1003 | |
| 1003 | - if (!m->trailer) { | |
| 1004 | + if (!m->xref_table.trailer) { | |
| 1004 | 1005 | setTrailer(cur_trailer); |
| 1005 | 1006 | |
| 1006 | - if (!m->trailer.hasKey("/Size")) { | |
| 1007 | + if (!m->xref_table.trailer.hasKey("/Size")) { | |
| 1007 | 1008 | QTC::TC("qpdf", "QPDF trailer lacks size"); |
| 1008 | 1009 | throw damagedPDF("trailer", "trailer dictionary lacks /Size key"); |
| 1009 | 1010 | } |
| 1010 | - if (!m->trailer.getKey("/Size").isInteger()) { | |
| 1011 | + if (!m->xref_table.trailer.getKey("/Size").isInteger()) { | |
| 1011 | 1012 | QTC::TC("qpdf", "QPDF trailer size not integer"); |
| 1012 | 1013 | throw damagedPDF("trailer", "/Size key in trailer dictionary is not an integer"); |
| 1013 | 1014 | } |
| 1014 | 1015 | } |
| 1015 | 1016 | |
| 1016 | 1017 | if (cur_trailer.hasKey("/XRefStm")) { |
| 1017 | - if (m->ignore_xref_streams) { | |
| 1018 | + if (m->xref_table.ignore_streams) { | |
| 1018 | 1019 | QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); |
| 1019 | 1020 | } else { |
| 1020 | 1021 | if (cur_trailer.getKey("/XRefStm").isInteger()) { |
| ... | ... | @@ -1043,7 +1044,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 1043 | 1044 | qpdf_offset_t |
| 1044 | 1045 | QPDF::read_xrefStream(qpdf_offset_t xref_offset) |
| 1045 | 1046 | { |
| 1046 | - if (!m->ignore_xref_streams) { | |
| 1047 | + if (!m->xref_table.ignore_streams) { | |
| 1047 | 1048 | QPDFObjGen x_og; |
| 1048 | 1049 | QPDFObjectHandle xref_obj; |
| 1049 | 1050 | try { |
| ... | ... | @@ -1238,14 +1239,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) |
| 1238 | 1239 | // object record, in which case the generation number appears as the third field. |
| 1239 | 1240 | if (saw_first_compressed_object) { |
| 1240 | 1241 | if (fields[0] != 2) { |
| 1241 | - m->uncompressed_after_compressed = true; | |
| 1242 | + m->xref_table.uncompressed_after_compressed = true; | |
| 1242 | 1243 | } |
| 1243 | 1244 | } else if (fields[0] == 2) { |
| 1244 | 1245 | saw_first_compressed_object = true; |
| 1245 | 1246 | } |
| 1246 | 1247 | if (obj == 0) { |
| 1247 | 1248 | // This is needed by checkLinearization() |
| 1248 | - m->first_xref_item_offset = xref_offset; | |
| 1249 | + m->xref_table.first_item_offset = xref_offset; | |
| 1249 | 1250 | } else if (fields[0] == 0) { |
| 1250 | 1251 | // Ignore fields[2], which we don't care about in this case. This works around the |
| 1251 | 1252 | // issue of some PDF files that put invalid values, like -1, here for deleted |
| ... | ... | @@ -1258,7 +1259,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) |
| 1258 | 1259 | } |
| 1259 | 1260 | } |
| 1260 | 1261 | |
| 1261 | - if (!m->trailer) { | |
| 1262 | + if (!m->xref_table.trailer) { | |
| 1262 | 1263 | setTrailer(dict); |
| 1263 | 1264 | } |
| 1264 | 1265 | |
| ... | ... | @@ -1284,12 +1285,12 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) |
| 1284 | 1285 | // If there is already an entry for this object and generation in the table, it means that a |
| 1285 | 1286 | // later xref table has registered this object. Disregard this one. |
| 1286 | 1287 | |
| 1287 | - if (obj > m->xref_table_max_id) { | |
| 1288 | + if (obj > m->xref_table.max_id) { | |
| 1288 | 1289 | // ignore impossibly large object ids or object ids > Size. |
| 1289 | 1290 | return; |
| 1290 | 1291 | } |
| 1291 | 1292 | |
| 1292 | - if (m->deleted_objects.count(obj)) { | |
| 1293 | + if (m->xref_table.deleted_objects.count(obj)) { | |
| 1293 | 1294 | QTC::TC("qpdf", "QPDF xref deleted object"); |
| 1294 | 1295 | return; |
| 1295 | 1296 | } |
| ... | ... | @@ -1326,7 +1327,7 @@ void |
| 1326 | 1327 | QPDF::insertFreeXrefEntry(QPDFObjGen og) |
| 1327 | 1328 | { |
| 1328 | 1329 | if (!m->xref_table.count(og)) { |
| 1329 | - m->deleted_objects.insert(og.getObj()); | |
| 1330 | + m->xref_table.deleted_objects.insert(og.getObj()); | |
| 1330 | 1331 | } |
| 1331 | 1332 | } |
| 1332 | 1333 | |
| ... | ... | @@ -1335,13 +1336,13 @@ QPDF::insertFreeXrefEntry(QPDFObjGen og) |
| 1335 | 1336 | void |
| 1336 | 1337 | QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2) |
| 1337 | 1338 | { |
| 1338 | - if (!(obj > 0 && obj <= m->xref_table_max_id && 0 <= f2 && f2 < 65535)) { | |
| 1339 | + if (!(obj > 0 && obj <= m->xref_table.max_id && 0 <= f2 && f2 < 65535)) { | |
| 1339 | 1340 | QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); |
| 1340 | 1341 | return; |
| 1341 | 1342 | } |
| 1342 | 1343 | |
| 1343 | 1344 | QPDFObjGen og(obj, f2); |
| 1344 | - if (!m->deleted_objects.count(obj)) { | |
| 1345 | + if (!m->xref_table.deleted_objects.count(obj)) { | |
| 1345 | 1346 | // deleted_objects stores the uncompressed objects removed from the xref table at the start |
| 1346 | 1347 | // of recovery. |
| 1347 | 1348 | QTC::TC("qpdf", "QPDF xref overwrite object"); |
| ... | ... | @@ -1381,11 +1382,11 @@ QPDF::showXRefTable() |
| 1381 | 1382 | bool |
| 1382 | 1383 | QPDF::resolveXRefTable() |
| 1383 | 1384 | { |
| 1384 | - bool may_change = !m->reconstructed_xref; | |
| 1385 | + bool may_change = !m->xref_table.reconstructed; | |
| 1385 | 1386 | for (auto& iter: m->xref_table) { |
| 1386 | 1387 | if (isUnresolved(iter.first)) { |
| 1387 | 1388 | resolve(iter.first); |
| 1388 | - if (may_change && m->reconstructed_xref) { | |
| 1389 | + if (may_change && m->xref_table.reconstructed) { | |
| 1389 | 1390 | return false; |
| 1390 | 1391 | } |
| 1391 | 1392 | } |
| ... | ... | @@ -1958,7 +1959,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) |
| 1958 | 1959 | |
| 1959 | 1960 | int num = QUtil::string_to_int(tnum.getValue().c_str()); |
| 1960 | 1961 | long long offset = QUtil::string_to_int(toffset.getValue().c_str()); |
| 1961 | - if (num > m->xref_table_max_id) { | |
| 1962 | + if (num > m->xref_table.max_id) { | |
| 1962 | 1963 | continue; |
| 1963 | 1964 | } |
| 1964 | 1965 | if (num == obj_stream_number) { |
| ... | ... | @@ -2101,7 +2102,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf) |
| 2101 | 2102 | if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) { |
| 2102 | 2103 | return iter->second.object; |
| 2103 | 2104 | } |
| 2104 | - if (m->xref_table.count(og) || !m->parsed) { | |
| 2105 | + if (m->xref_table.count(og) || !m->xref_table.parsed) { | |
| 2105 | 2106 | return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object; |
| 2106 | 2107 | } |
| 2107 | 2108 | if (parse_pdf) { |
| ... | ... | @@ -2117,8 +2118,9 @@ QPDF::getObjectForJSON(int id, int gen) |
| 2117 | 2118 | auto [it, inserted] = m->obj_cache.try_emplace(og); |
| 2118 | 2119 | auto& obj = it->second.object; |
| 2119 | 2120 | if (inserted) { |
| 2120 | - obj = (m->parsed && !m->xref_table.count(og)) ? QPDF_Null::create(this, og) | |
| 2121 | - : QPDF_Unresolved::create(this, og); | |
| 2121 | + obj = (m->xref_table.parsed && !m->xref_table.count(og)) | |
| 2122 | + ? QPDF_Null::create(this, og) | |
| 2123 | + : QPDF_Unresolved::create(this, og); | |
| 2122 | 2124 | } |
| 2123 | 2125 | return obj; |
| 2124 | 2126 | } |
| ... | ... | @@ -2128,7 +2130,7 @@ QPDF::getObject(QPDFObjGen const& og) |
| 2128 | 2130 | { |
| 2129 | 2131 | if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) { |
| 2130 | 2132 | return {it->second.object}; |
| 2131 | - } else if (m->parsed && !m->xref_table.count(og)) { | |
| 2133 | + } else if (m->xref_table.parsed && !m->xref_table.count(og)) { | |
| 2132 | 2134 | return QPDF_Null::create(); |
| 2133 | 2135 | } else { |
| 2134 | 2136 | auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1); |
| ... | ... | @@ -2526,13 +2528,13 @@ QPDF::getExtensionLevel() |
| 2526 | 2528 | QPDFObjectHandle |
| 2527 | 2529 | QPDF::getTrailer() |
| 2528 | 2530 | { |
| 2529 | - return m->trailer; | |
| 2531 | + return m->xref_table.trailer; | |
| 2530 | 2532 | } |
| 2531 | 2533 | |
| 2532 | 2534 | QPDFObjectHandle |
| 2533 | 2535 | QPDF::getRoot() |
| 2534 | 2536 | { |
| 2535 | - QPDFObjectHandle root = m->trailer.getKey("/Root"); | |
| 2537 | + QPDFObjectHandle root = m->xref_table.trailer.getKey("/Root"); | |
| 2536 | 2538 | if (!root.isDictionary()) { |
| 2537 | 2539 | throw damagedPDF("", 0, "unable to find /Root dictionary"); |
| 2538 | 2540 | } else if ( |
| ... | ... | @@ -2554,7 +2556,7 @@ QPDF::getXRefTable() |
| 2554 | 2556 | std::map<QPDFObjGen, QPDFXRefEntry> const& |
| 2555 | 2557 | QPDF::getXRefTableInternal() |
| 2556 | 2558 | { |
| 2557 | - if (!m->parsed) { | |
| 2559 | + if (!m->xref_table.parsed) { | |
| 2558 | 2560 | throw std::logic_error("QPDF::getXRefTable called before parsing."); |
| 2559 | 2561 | } |
| 2560 | 2562 | |
| ... | ... | @@ -2604,14 +2606,14 @@ QPDF::getCompressibleObjGens() |
| 2604 | 2606 | // iterating through the xref table since it avoids preserving orphaned items. |
| 2605 | 2607 | |
| 2606 | 2608 | // Exclude encryption dictionary, if any |
| 2607 | - QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); | |
| 2609 | + QPDFObjectHandle encryption_dict = m->xref_table.trailer.getKey("/Encrypt"); | |
| 2608 | 2610 | QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); |
| 2609 | 2611 | |
| 2610 | 2612 | const size_t max_obj = getObjectCount(); |
| 2611 | 2613 | std::vector<bool> visited(max_obj, false); |
| 2612 | 2614 | std::vector<QPDFObjectHandle> queue; |
| 2613 | 2615 | queue.reserve(512); |
| 2614 | - queue.push_back(m->trailer); | |
| 2616 | + queue.push_back(m->xref_table.trailer); | |
| 2615 | 2617 | std::vector<T> result; |
| 2616 | 2618 | if constexpr (std::is_same_v<T, QPDFObjGen>) { |
| 2617 | 2619 | result.reserve(m->obj_cache.size()); | ... | ... |
libqpdf/QPDF_encryption.cc
| ... | ... | @@ -727,7 +727,7 @@ QPDF::initializeEncryption() |
| 727 | 727 | // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption |
| 728 | 728 | // dictionary. |
| 729 | 729 | |
| 730 | - if (!m->trailer.hasKey("/Encrypt")) { | |
| 730 | + if (!m->xref_table.trailer.hasKey("/Encrypt")) { | |
| 731 | 731 | return; |
| 732 | 732 | } |
| 733 | 733 | |
| ... | ... | @@ -736,7 +736,7 @@ QPDF::initializeEncryption() |
| 736 | 736 | m->encp->encrypted = true; |
| 737 | 737 | |
| 738 | 738 | std::string id1; |
| 739 | - QPDFObjectHandle id_obj = m->trailer.getKey("/ID"); | |
| 739 | + QPDFObjectHandle id_obj = m->xref_table.trailer.getKey("/ID"); | |
| 740 | 740 | if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { |
| 741 | 741 | id1 = id_obj.getArrayItem(0).getStringValue(); |
| 742 | 742 | } else { |
| ... | ... | @@ -745,7 +745,7 @@ QPDF::initializeEncryption() |
| 745 | 745 | warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); |
| 746 | 746 | } |
| 747 | 747 | |
| 748 | - QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); | |
| 748 | + QPDFObjectHandle encryption_dict = m->xref_table.trailer.getKey("/Encrypt"); | |
| 749 | 749 | if (!encryption_dict.isDictionary()) { |
| 750 | 750 | throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary"); |
| 751 | 751 | } | ... | ... |
libqpdf/QPDF_json.cc
| ... | ... | @@ -593,8 +593,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) |
| 593 | 593 | this->saw_value = true; |
| 594 | 594 | // The trailer must be a dictionary, so we can use setNextStateIfDictionary. |
| 595 | 595 | if (setNextStateIfDictionary("trailer.value", value, st_object)) { |
| 596 | - this->pdf.m->trailer = makeObject(value); | |
| 597 | - setObjectDescription(this->pdf.m->trailer, value); | |
| 596 | + pdf.m->xref_table.trailer = makeObject(value); | |
| 597 | + setObjectDescription(this->pdf.m->xref_table.trailer, value); | |
| 598 | 598 | } |
| 599 | 599 | } else if (key == "stream") { |
| 600 | 600 | // Don't need to set saw_stream here since there's already an error. | ... | ... |
libqpdf/QPDF_linearization.cc
| ... | ... | @@ -461,12 +461,11 @@ QPDF::checkLinearizationInternal() |
| 461 | 461 | break; |
| 462 | 462 | } |
| 463 | 463 | } |
| 464 | - if (m->file->tell() != m->first_xref_item_offset) { | |
| 464 | + if (m->file->tell() != m->xref_table.first_item_offset) { | |
| 465 | 465 | QTC::TC("qpdf", "QPDF err /T mismatch"); |
| 466 | 466 | linearizationWarning( |
| 467 | - "space before first xref item (/T) mismatch " | |
| 468 | - "(computed = " + | |
| 469 | - std::to_string(m->first_xref_item_offset) + | |
| 467 | + "space before first xref item (/T) mismatch (computed = " + | |
| 468 | + std::to_string(m->xref_table.first_item_offset) + | |
| 470 | 469 | "; file = " + std::to_string(m->file->tell())); |
| 471 | 470 | } |
| 472 | 471 | |
| ... | ... | @@ -477,7 +476,7 @@ QPDF::checkLinearizationInternal() |
| 477 | 476 | // compressed objects are supposed to be at the end of the containing xref section if any object |
| 478 | 477 | // streams are in use. |
| 479 | 478 | |
| 480 | - if (m->uncompressed_after_compressed) { | |
| 479 | + if (m->xref_table.uncompressed_after_compressed) { | |
| 481 | 480 | linearizationWarning("linearized file contains an uncompressed object after a compressed " |
| 482 | 481 | "one in a cross-reference stream"); |
| 483 | 482 | } | ... | ... |
libqpdf/QPDF_optimization.cc
| ... | ... | @@ -115,13 +115,13 @@ QPDF::optimize_internal( |
| 115 | 115 | } |
| 116 | 116 | |
| 117 | 117 | // Traverse document-level items |
| 118 | - for (auto const& key: m->trailer.getKeys()) { | |
| 118 | + for (auto const& key: m->xref_table.trailer.getKeys()) { | |
| 119 | 119 | if (key == "/Root") { |
| 120 | 120 | // handled separately |
| 121 | 121 | } else { |
| 122 | 122 | updateObjectMaps( |
| 123 | 123 | ObjUser(ObjUser::ou_trailer_key, key), |
| 124 | - m->trailer.getKey(key), | |
| 124 | + m->xref_table.trailer.getKey(key), | |
| 125 | 125 | skip_stream_parameters); |
| 126 | 126 | } |
| 127 | 127 | } |
| ... | ... | @@ -169,13 +169,13 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) |
| 169 | 169 | // values for them. |
| 170 | 170 | std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; |
| 171 | 171 | pushInheritedAttributesToPageInternal( |
| 172 | - m->trailer.getKey("/Root").getKey("/Pages"), | |
| 172 | + m->xref_table.trailer.getKey("/Root").getKey("/Pages"), | |
| 173 | 173 | key_ancestors, |
| 174 | 174 | allow_changes, |
| 175 | 175 | warn_skipped_keys); |
| 176 | 176 | if (!key_ancestors.empty()) { |
| 177 | - throw std::logic_error("key_ancestors not empty after" | |
| 178 | - " pushing inherited attributes to pages"); | |
| 177 | + throw std::logic_error( | |
| 178 | + "key_ancestors not empty after pushing inherited attributes to pages"); | |
| 179 | 179 | } |
| 180 | 180 | m->pushed_inherited_attributes_to_pages = true; |
| 181 | 181 | m->ever_pushed_inherited_attributes_to_pages = true; | ... | ... |
libqpdf/qpdf/QPDF_private.hh
| ... | ... | @@ -3,6 +3,25 @@ |
| 3 | 3 | |
| 4 | 4 | #include <qpdf/QPDF.hh> |
| 5 | 5 | |
| 6 | +// Xref_table encapsulates the pdf's xref table and trailer. | |
| 7 | +class QPDF::Xref_table: public std::map<QPDFObjGen, QPDFXRefEntry> | |
| 8 | +{ | |
| 9 | + public: | |
| 10 | + QPDFObjectHandle trailer; | |
| 11 | + bool reconstructed{false}; | |
| 12 | + // Various tables are indexed by object id, with potential size id + 1 | |
| 13 | + int max_id{std::numeric_limits<int>::max() - 1}; | |
| 14 | + qpdf_offset_t max_offset{0}; | |
| 15 | + std::set<int> deleted_objects; | |
| 16 | + bool ignore_streams{false}; | |
| 17 | + bool parsed{false}; | |
| 18 | + bool attempt_recovery{true}; | |
| 19 | + | |
| 20 | + // Linearization data | |
| 21 | + bool uncompressed_after_compressed{false}; | |
| 22 | + qpdf_offset_t first_item_offset{0}; // actual value from file | |
| 23 | +}; | |
| 24 | + | |
| 6 | 25 | // Writer class is restricted to QPDFWriter so that only it can call certain methods. |
| 7 | 26 | class QPDF::Writer |
| 8 | 27 | { |
| ... | ... | @@ -459,21 +478,15 @@ class QPDF::Members |
| 459 | 478 | std::shared_ptr<InputSource> file; |
| 460 | 479 | std::string last_object_description; |
| 461 | 480 | bool provided_password_is_hex_key{false}; |
| 462 | - bool ignore_xref_streams{false}; | |
| 463 | 481 | bool suppress_warnings{false}; |
| 464 | 482 | size_t max_warnings{0}; |
| 465 | 483 | bool attempt_recovery{true}; |
| 466 | 484 | bool check_mode{false}; |
| 467 | 485 | std::shared_ptr<EncryptionParameters> encp; |
| 468 | 486 | std::string pdf_version; |
| 469 | - std::map<QPDFObjGen, QPDFXRefEntry> xref_table; | |
| 470 | - // Various tables are indexed by object id, with potential size id + 1 | |
| 471 | - int xref_table_max_id{std::numeric_limits<int>::max() - 1}; | |
| 472 | - qpdf_offset_t xref_table_max_offset{0}; | |
| 473 | - std::set<int> deleted_objects; | |
| 487 | + Xref_table xref_table; | |
| 474 | 488 | std::map<QPDFObjGen, ObjCache> obj_cache; |
| 475 | 489 | std::set<QPDFObjGen> resolving; |
| 476 | - QPDFObjectHandle trailer; | |
| 477 | 490 | std::vector<QPDFObjectHandle> all_pages; |
| 478 | 491 | bool invalid_page_found{false}; |
| 479 | 492 | std::map<QPDFObjGen, int> pageobj_to_pages_pos; |
| ... | ... | @@ -485,16 +498,12 @@ class QPDF::Members |
| 485 | 498 | std::shared_ptr<QPDFObjectHandle::StreamDataProvider> copied_streams; |
| 486 | 499 | // copied_stream_data_provider is owned by copied_streams |
| 487 | 500 | CopiedStreamDataProvider* copied_stream_data_provider{nullptr}; |
| 488 | - bool reconstructed_xref{false}; | |
| 489 | 501 | bool fixed_dangling_refs{false}; |
| 490 | 502 | bool immediate_copy_from{false}; |
| 491 | 503 | bool in_parse{false}; |
| 492 | - bool parsed{false}; | |
| 493 | 504 | std::set<int> resolved_object_streams; |
| 494 | 505 | |
| 495 | 506 | // Linearization data |
| 496 | - qpdf_offset_t first_xref_item_offset{0}; // actual value from file | |
| 497 | - bool uncompressed_after_compressed{false}; | |
| 498 | 507 | bool linearization_warnings{false}; |
| 499 | 508 | |
| 500 | 509 | // Linearization parameter dictionary and hint table data: may be read from file or computed | ... | ... |