Commit 39abb113763a5f3686656cfaa9086ede4495a06d

Authored by m-holger
1 parent 82419ca0

Make all QPDF::Xref_table data members private

libqpdf/QPDF.cc
... ... @@ -304,7 +304,7 @@ QPDF::registerStreamFilter(
304 304 void
305 305 QPDF::setIgnoreXRefStreams(bool val)
306 306 {
307   - m->xref_table.ignore_streams = val;
  307 + m->xref_table.ignore_streams(val);
308 308 }
309 309  
310 310 std::shared_ptr<QPDFLogger>
... ... @@ -342,7 +342,7 @@ void
342 342 QPDF::setAttemptRecovery(bool val)
343 343 {
344 344 m->attempt_recovery = val;
345   - m->xref_table.attempt_recovery = val;
  345 + m->xref_table.attempt_recovery(val);
346 346 }
347 347  
348 348 void
... ... @@ -496,11 +496,10 @@ QPDF::Xref_table::initialize()
496 496 // 30 characters to leave room for the startxref stuff.
497 497 file->seek(0, SEEK_END);
498 498 qpdf_offset_t end_offset = file->tell();
499   - max_offset = end_offset;
500 499 // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
501 500 // scenarios at least 3 bytes are required.
502   - if (max_id > max_offset / 3) {
503   - max_id = static_cast<int>(max_offset / 3);
  501 + if (max_id_ > end_offset / 3) {
  502 + max_id_ = static_cast<int>(end_offset / 3);
504 503 }
505 504 qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
506 505 PatternFinder sf(qpdf, &QPDF::findStartxref);
... ... @@ -522,7 +521,7 @@ QPDF::Xref_table::initialize()
522 521 throw damaged_pdf(std::string("error reading xref: ") + e.what());
523 522 }
524 523 } catch (QPDFExc& e) {
525   - if (attempt_recovery) {
  524 + if (attempt_recovery_) {
526 525 reconstruct(e);
527 526 QTC::TC("qpdf", "QPDF reconstructed xref table");
528 527 } else {
... ... @@ -530,13 +529,13 @@ QPDF::Xref_table::initialize()
530 529 }
531 530 }
532 531  
533   - parsed = true;
  532 + initialized_ = true;
534 533 }
535 534  
536 535 void
537 536 QPDF::Xref_table::reconstruct(QPDFExc& e)
538 537 {
539   - if (reconstructed) {
  538 + if (reconstructed_) {
540 539 // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
541 540 // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
542 541 throw e;
... ... @@ -551,7 +550,7 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
551 550 }
552 551 };
553 552  
554   - reconstructed = true;
  553 + reconstructed_ = true;
555 554 // We may find more objects, which may contain dangling references.
556 555 qpdf.m->fixed_dangling_refs = false;
557 556  
... ... @@ -584,20 +583,20 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
584 583 if (t2.isInteger() && read_token(MAX_LEN).isWord("obj")) {
585 584 int obj = QUtil::string_to_int(t1.getValue().c_str());
586 585 int gen = QUtil::string_to_int(t2.getValue().c_str());
587   - if (obj <= max_id) {
  586 + if (obj <= max_id_) {
588 587 insert_reconstructed(obj, token_start, gen);
589 588 } else {
590 589 warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));
591 590 }
592 591 }
593 592 file->seek(pos, SEEK_SET);
594   - } else if (!trailer && t1.isWord("trailer")) {
  593 + } else if (!trailer_ && t1.isWord("trailer")) {
595 594 auto pos = file->tell();
596 595 QPDFObjectHandle t = read_trailer();
597 596 if (!t.isDictionary()) {
598 597 // Oh well. It was worth a try.
599 598 } else {
600   - trailer = t;
  599 + trailer_ = t;
601 600 }
602 601 file->seek(pos, SEEK_SET);
603 602 }
... ... @@ -606,7 +605,7 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
606 605 }
607 606 deleted_objects.clear();
608 607  
609   - if (!trailer) {
  608 + if (!trailer_) {
610 609 qpdf_offset_t max_offset{0};
611 610 // If there are any xref streams, take the last one to appear.
612 611 for (auto const& iter: table) {
... ... @@ -625,7 +624,7 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
625 624 auto offset = entry.getOffset();
626 625 if (offset > max_offset) {
627 626 max_offset = offset;
628   - trailer = oh.getDict();
  627 + trailer_ = oh.getDict();
629 628 }
630 629 check_warnings();
631 630 }
... ... @@ -640,7 +639,7 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
640 639 }
641 640 }
642 641  
643   - if (!trailer) {
  642 + if (!trailer_) {
644 643 // We could check the last encountered object to see if it was an xref stream. If so, we
645 644 // could try to get the trailer from there. This may make it possible to recover files with
646 645 // bad startxref pointers even when they have object streams.
... ... @@ -653,12 +652,12 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
653 652 throw damaged_pdf("unable to find objects while recovering damaged file");
654 653 }
655 654 check_warnings();
656   - if (!parsed) {
657   - parsed = true;
  655 + if (!initialized_) {
  656 + initialized_ = true;
658 657 qpdf.getAllPages();
659 658 check_warnings();
660 659 if (qpdf.m->all_pages.empty()) {
661   - parsed = false;
  660 + initialized_ = false;
662 661 throw damaged_pdf("unable to find any pages while recovering damaged file");
663 662 }
664 663 }
... ... @@ -730,10 +729,10 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
730 729 }
731 730 }
732 731  
733   - if (!trailer) {
  732 + if (!trailer_) {
734 733 throw damaged_pdf("unable to find trailer while reading xref");
735 734 }
736   - int size = trailer.getKey("/Size").getIntValueAsInt();
  735 + int size = trailer_.getKey("/Size").getIntValueAsInt();
737 736 int max_obj = 0;
738 737 if (!table.empty()) {
739 738 max_obj = table.rbegin()->first.getObj();
... ... @@ -967,7 +966,7 @@ QPDF::Xref_table::read_table(qpdf_offset_t xref_offset)
967 966 for (qpdf_offset_t i = obj; i - num < obj; ++i) {
968 967 if (i == 0) {
969 968 // This is needed by checkLinearization()
970   - first_item_offset = file->tell();
  969 + first_item_offset_ = file->tell();
971 970 }
972 971 // For xref_table, these will always be small enough to be ints
973 972 qpdf_offset_t f1 = 0;
... ... @@ -998,21 +997,21 @@ QPDF::Xref_table::read_table(qpdf_offset_t xref_offset)
998 997 throw qpdf.damagedPDF("", "expected trailer dictionary");
999 998 }
1000 999  
1001   - if (!trailer) {
1002   - trailer = cur_trailer;
  1000 + if (!trailer_) {
  1001 + trailer_ = cur_trailer;
1003 1002  
1004   - if (!trailer.hasKey("/Size")) {
  1003 + if (!trailer_.hasKey("/Size")) {
1005 1004 QTC::TC("qpdf", "QPDF trailer lacks size");
1006 1005 throw qpdf.damagedPDF("trailer", "trailer dictionary lacks /Size key");
1007 1006 }
1008   - if (!trailer.getKey("/Size").isInteger()) {
  1007 + if (!trailer_.getKey("/Size").isInteger()) {
1009 1008 QTC::TC("qpdf", "QPDF trailer size not integer");
1010 1009 throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");
1011 1010 }
1012 1011 }
1013 1012  
1014 1013 if (cur_trailer.hasKey("/XRefStm")) {
1015   - if (ignore_streams) {
  1014 + if (ignore_streams_) {
1016 1015 QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
1017 1016 } else {
1018 1017 if (cur_trailer.getKey("/XRefStm").isInteger()) {
... ... @@ -1041,7 +1040,7 @@ QPDF::Xref_table::read_table(qpdf_offset_t xref_offset)
1041 1040 qpdf_offset_t
1042 1041 QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
1043 1042 {
1044   - if (!ignore_streams) {
  1043 + if (!ignore_streams_) {
1045 1044 QPDFObjGen x_og;
1046 1045 QPDFObjectHandle xref_obj;
1047 1046 try {
... ... @@ -1237,14 +1236,14 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr
1237 1236 // object record, in which case the generation number appears as the third field.
1238 1237 if (saw_first_compressed_object) {
1239 1238 if (fields[0] != 2) {
1240   - uncompressed_after_compressed = true;
  1239 + uncompressed_after_compressed_ = true;
1241 1240 }
1242 1241 } else if (fields[0] == 2) {
1243 1242 saw_first_compressed_object = true;
1244 1243 }
1245 1244 if (obj == 0) {
1246 1245 // This is needed by checkLinearization()
1247   - first_item_offset = xref_offset;
  1246 + first_item_offset_ = xref_offset;
1248 1247 } else if (fields[0] == 0) {
1249 1248 // Ignore fields[2], which we don't care about in this case. This works around the
1250 1249 // issue of some PDF files that put invalid values, like -1, here for deleted
... ... @@ -1257,8 +1256,8 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr
1257 1256 }
1258 1257 }
1259 1258  
1260   - if (!trailer) {
1261   - trailer = dict;
  1259 + if (!trailer_) {
  1260 + trailer_ = dict;
1262 1261 }
1263 1262  
1264 1263 if (dict.hasKey("/Prev")) {
... ... @@ -1283,7 +1282,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1283 1282 // If there is already an entry for this object and generation in the table, it means that a
1284 1283 // later xref table has registered this object. Disregard this one.
1285 1284  
1286   - if (obj > max_id) {
  1285 + if (obj > max_id_) {
1287 1286 // ignore impossibly large object ids or object ids > Size.
1288 1287 return;
1289 1288 }
... ... @@ -1336,7 +1335,7 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
1336 1335 void
1337 1336 QPDF::Xref_table::insert_reconstructed(int obj, qpdf_offset_t f1, int f2)
1338 1337 {
1339   - if (!(obj > 0 && obj <= max_id && 0 <= f2 && f2 < 65535)) {
  1338 + if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && f2 < 65535)) {
1340 1339 QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
1341 1340 return;
1342 1341 }
... ... @@ -1385,11 +1384,11 @@ QPDF::Xref_table::show()
1385 1384 bool
1386 1385 QPDF::Xref_table::resolve()
1387 1386 {
1388   - bool may_change = !reconstructed;
  1387 + bool may_change = !reconstructed_;
1389 1388 for (auto& iter: table) {
1390 1389 if (qpdf.isUnresolved(iter.first)) {
1391 1390 qpdf.resolve(iter.first);
1392   - if (may_change && reconstructed) {
  1391 + if (may_change && reconstructed_) {
1393 1392 return false;
1394 1393 }
1395 1394 }
... ... @@ -1958,7 +1957,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
1958 1957  
1959 1958 int num = QUtil::string_to_int(tnum.getValue().c_str());
1960 1959 long long offset = QUtil::string_to_int(toffset.getValue().c_str());
1961   - if (num > m->xref_table.max_id) {
  1960 + if (num > m->xref_table.max_id()) {
1962 1961 continue;
1963 1962 }
1964 1963 if (num == obj_stream_number) {
... ... @@ -2100,7 +2099,7 @@ QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2100 2099 if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {
2101 2100 return iter->second.object;
2102 2101 }
2103   - if (m->xref_table.type(og) || !m->xref_table.parsed) {
  2102 + if (m->xref_table.type(og) || !m->xref_table.initialized()) {
2104 2103 return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;
2105 2104 }
2106 2105 if (parse_pdf) {
... ... @@ -2116,8 +2115,9 @@ QPDF::getObjectForJSON(int id, int gen)
2116 2115 auto [it, inserted] = m->obj_cache.try_emplace(og);
2117 2116 auto& obj = it->second.object;
2118 2117 if (inserted) {
2119   - obj = (m->xref_table.parsed && !m->xref_table.type(og)) ? QPDF_Null::create(this, og)
2120   - : QPDF_Unresolved::create(this, og);
  2118 + obj = (m->xref_table.initialized() && !m->xref_table.type(og))
  2119 + ? QPDF_Null::create(this, og)
  2120 + : QPDF_Unresolved::create(this, og);
2121 2121 }
2122 2122 return obj;
2123 2123 }
... ... @@ -2127,7 +2127,7 @@ QPDF::getObject(QPDFObjGen const&amp; og)
2127 2127 {
2128 2128 if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {
2129 2129 return {it->second.object};
2130   - } else if (m->xref_table.parsed && !m->xref_table.type(og)) {
  2130 + } else if (m->xref_table.initialized() && !m->xref_table.type(og)) {
2131 2131 return QPDF_Null::create();
2132 2132 } else {
2133 2133 auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1);
... ... @@ -2524,13 +2524,13 @@ QPDF::getExtensionLevel()
2524 2524 QPDFObjectHandle
2525 2525 QPDF::getTrailer()
2526 2526 {
2527   - return m->xref_table.trailer;
  2527 + return m->xref_table.trailer();
2528 2528 }
2529 2529  
2530 2530 QPDFObjectHandle
2531 2531 QPDF::getRoot()
2532 2532 {
2533   - QPDFObjectHandle root = m->xref_table.trailer.getKey("/Root");
  2533 + QPDFObjectHandle root = m->xref_table.trailer().getKey("/Root");
2534 2534 if (!root.isDictionary()) {
2535 2535 throw damagedPDF("", 0, "unable to find /Root dictionary");
2536 2536 } else if (
... ... @@ -2552,7 +2552,7 @@ QPDF::getXRefTable()
2552 2552 std::map<QPDFObjGen, QPDFXRefEntry> const&
2553 2553 QPDF::getXRefTableInternal()
2554 2554 {
2555   - if (!m->xref_table.parsed) {
  2555 + if (!m->xref_table.initialized()) {
2556 2556 throw std::logic_error("QPDF::getXRefTable called before parsing.");
2557 2557 }
2558 2558  
... ... @@ -2602,14 +2602,14 @@ QPDF::getCompressibleObjGens()
2602 2602 // iterating through the xref table since it avoids preserving orphaned items.
2603 2603  
2604 2604 // Exclude encryption dictionary, if any
2605   - QPDFObjectHandle encryption_dict = m->xref_table.trailer.getKey("/Encrypt");
  2605 + QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
2606 2606 QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
2607 2607  
2608 2608 const size_t max_obj = getObjectCount();
2609 2609 std::vector<bool> visited(max_obj, false);
2610 2610 std::vector<QPDFObjectHandle> queue;
2611 2611 queue.reserve(512);
2612   - queue.push_back(m->xref_table.trailer);
  2612 + queue.push_back(m->xref_table.trailer());
2613 2613 std::vector<T> result;
2614 2614 if constexpr (std::is_same_v<T, QPDFObjGen>) {
2615 2615 result.reserve(m->obj_cache.size());
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -727,7 +727,7 @@ QPDF::initializeEncryption()
727 727 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
728 728 // dictionary.
729 729  
730   - if (!m->xref_table.trailer.hasKey("/Encrypt")) {
  730 + if (!m->xref_table.trailer().hasKey("/Encrypt")) {
731 731 return;
732 732 }
733 733  
... ... @@ -736,7 +736,7 @@ QPDF::initializeEncryption()
736 736 m->encp->encrypted = true;
737 737  
738 738 std::string id1;
739   - QPDFObjectHandle id_obj = m->xref_table.trailer.getKey("/ID");
  739 + QPDFObjectHandle id_obj = m->xref_table.trailer().getKey("/ID");
740 740 if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
741 741 id1 = id_obj.getArrayItem(0).getStringValue();
742 742 } else {
... ... @@ -745,7 +745,7 @@ QPDF::initializeEncryption()
745 745 warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
746 746 }
747 747  
748   - QPDFObjectHandle encryption_dict = m->xref_table.trailer.getKey("/Encrypt");
  748 + QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
749 749 if (!encryption_dict.isDictionary()) {
750 750 throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary");
751 751 }
... ...
libqpdf/QPDF_json.cc
... ... @@ -593,8 +593,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
593 593 this->saw_value = true;
594 594 // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
595 595 if (setNextStateIfDictionary("trailer.value", value, st_object)) {
596   - pdf.m->xref_table.trailer = makeObject(value);
597   - setObjectDescription(this->pdf.m->xref_table.trailer, value);
  596 + pdf.m->xref_table.trailer(makeObject(value));
598 597 }
599 598 } else if (key == "stream") {
600 599 // Don't need to set saw_stream here since there's already an error.
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -461,11 +461,11 @@ QPDF::checkLinearizationInternal()
461 461 break;
462 462 }
463 463 }
464   - if (m->file->tell() != m->xref_table.first_item_offset) {
  464 + if (m->file->tell() != m->xref_table.first_item_offset()) {
465 465 QTC::TC("qpdf", "QPDF err /T mismatch");
466 466 linearizationWarning(
467 467 "space before first xref item (/T) mismatch (computed = " +
468   - std::to_string(m->xref_table.first_item_offset) +
  468 + std::to_string(m->xref_table.first_item_offset()) +
469 469 "; file = " + std::to_string(m->file->tell()));
470 470 }
471 471  
... ... @@ -476,7 +476,7 @@ QPDF::checkLinearizationInternal()
476 476 // compressed objects are supposed to be at the end of the containing xref section if any object
477 477 // streams are in use.
478 478  
479   - if (m->xref_table.uncompressed_after_compressed) {
  479 + if (m->xref_table.uncompressed_after_compressed()) {
480 480 linearizationWarning("linearized file contains an uncompressed object after a compressed "
481 481 "one in a cross-reference stream");
482 482 }
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -115,13 +115,13 @@ QPDF::optimize_internal(
115 115 }
116 116  
117 117 // Traverse document-level items
118   - for (auto const& key: m->xref_table.trailer.getKeys()) {
  118 + for (auto const& key: m->xref_table.trailer().getKeys()) {
119 119 if (key == "/Root") {
120 120 // handled separately
121 121 } else {
122 122 updateObjectMaps(
123 123 ObjUser(ObjUser::ou_trailer_key, key),
124   - m->xref_table.trailer.getKey(key),
  124 + m->xref_table.trailer().getKey(key),
125 125 skip_stream_parameters);
126 126 }
127 127 }
... ... @@ -169,7 +169,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
169 169 // values for them.
170 170 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
171 171 pushInheritedAttributesToPageInternal(
172   - m->xref_table.trailer.getKey("/Root").getKey("/Pages"),
  172 + m->xref_table.trailer().getKey("/Root").getKey("/Pages"),
173 173 key_ancestors,
174 174 allow_changes,
175 175 warn_skipped_keys);
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -19,6 +19,18 @@ class QPDF::Xref_table
19 19 void show();
20 20 bool resolve();
21 21  
  22 + QPDFObjectHandle
  23 + trailer() const
  24 + {
  25 + return trailer_;
  26 + }
  27 +
  28 + void
  29 + trailer(QPDFObjectHandle&& oh)
  30 + {
  31 + trailer_ = std::move(oh);
  32 + }
  33 +
22 34 // Returns 0 if og is not in table.
23 35 int
24 36 type(QPDFObjGen og) const
... ... @@ -61,22 +73,47 @@ class QPDF::Xref_table
61 73 size_t
62 74 size() const noexcept
63 75 {
64   - return trailer ? table.size() : 0;
  76 + return trailer_ ? table.size() : 0;
65 77 }
66 78  
67   - QPDFObjectHandle trailer;
68   - bool reconstructed{false};
69   - // Various tables are indexed by object id, with potential size id + 1
70   - int max_id{std::numeric_limits<int>::max() - 1};
71   - qpdf_offset_t max_offset{0};
72   - std::set<int> deleted_objects;
73   - bool ignore_streams{false};
74   - bool parsed{false};
75   - bool attempt_recovery{true};
  79 + void
  80 + ignore_streams(bool val) noexcept
  81 + {
  82 + ignore_streams_ = val;
  83 + }
76 84  
77   - // Linearization data
78   - bool uncompressed_after_compressed{false};
79   - qpdf_offset_t first_item_offset{0}; // actual value from file
  85 + bool
  86 + initialized() const noexcept
  87 + {
  88 + return initialized_;
  89 + }
  90 +
  91 + void
  92 + attempt_recovery(bool val) noexcept
  93 + {
  94 + attempt_recovery_ = val;
  95 + }
  96 +
  97 + int
  98 + max_id() const noexcept
  99 + {
  100 + return max_id_;
  101 + }
  102 +
  103 + // For Linearization
  104 +
  105 + bool
  106 + uncompressed_after_compressed() const noexcept
  107 + {
  108 + return uncompressed_after_compressed_;
  109 + }
  110 +
  111 + // Actual value from file
  112 + qpdf_offset_t
  113 + first_item_offset() const noexcept
  114 + {
  115 + return first_item_offset_;
  116 + }
80 117  
81 118 private:
82 119 void read(qpdf_offset_t offset);
... ... @@ -135,6 +172,19 @@ class QPDF::Xref_table
135 172 QPDFTokenizer tokenizer;
136 173  
137 174 std::map<QPDFObjGen, QPDFXRefEntry> table;
  175 + QPDFObjectHandle trailer_;
  176 +
  177 + bool attempt_recovery_{true};
  178 + bool initialized_{false};
  179 + bool ignore_streams_{false};
  180 + std::set<int> deleted_objects;
  181 + bool reconstructed_{false};
  182 + // Various tables are indexed by object id, with potential size id + 1
  183 + int max_id_{std::numeric_limits<int>::max() - 1};
  184 +
  185 + // Linearization data
  186 + bool uncompressed_after_compressed_{false};
  187 + qpdf_offset_t first_item_offset_{0}; // actual value from file
138 188 };
139 189  
140 190 // Writer class is restricted to QPDFWriter so that only it can call certain methods.
... ...