Commit b5a5780019a5b75465671a23f55772a20b007115

Authored by m-holger
1 parent a3f693c8

Make Xref_table an inner class of QPDF::Objects

include/qpdf/QPDF.hh
... ... @@ -734,7 +734,6 @@ class QPDF
734 734 class ParseGuard;
735 735 class Pipe;
736 736 class JobSetter;
737   - class Xref_table;
738 737  
739 738 // For testing only -- do not add to DLL
740 739 static bool test_json_validators();
... ... @@ -811,7 +810,7 @@ class QPDF
811 810 void optimize(
812 811 QPDFWriter::ObjTable const& obj,
813 812 std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
814   - void optimize(Xref_table const& obj);
  813 + void optimize(Objects const& obj);
815 814  
816 815 // Get lists of all objects in order according to the part of a linearized file that they belong
817 816 // to.
... ... @@ -904,7 +903,7 @@ class QPDF
904 903 QPDFObjectHandle
905 904 getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
906 905 QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
907   - QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Xref_table const& obj);
  906 + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Objects const& obj);
908 907 int lengthNextN(int first_object, int n);
909 908 void
910 909 checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
... ... @@ -950,7 +949,7 @@ class QPDF
950 949 std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
951 950 void filterCompressedObjects(std::map<int, int> const& object_stream_data);
952 951 void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
953   - void filterCompressedObjects(Xref_table const& object_stream_data);
  952 + void filterCompressedObjects(Objects const& object_stream_data);
954 953  
955 954 // JSON import
956 955 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);
... ...
libqpdf/QPDF.cc
... ... @@ -185,8 +185,7 @@ QPDF::Members::Members(QPDF&amp; qpdf) :
185 185 file_sp(new InvalidInputSource(no_input_name)),
186 186 file(file_sp.get()),
187 187 encp(new EncryptionParameters),
188   - objects(qpdf, this),
189   - xref_table(qpdf, objects, file)
  188 + objects(qpdf, this, file)
190 189 {
191 190 }
192 191  
... ... @@ -279,7 +278,7 @@ QPDF::emptyPDF()
279 278 {
280 279 m->pdf_version = "1.3";
281 280 m->no_input_name = "empty PDF";
282   - m->xref_table.initialize_empty();
  281 + m->objects.xref_table().initialize_empty();
283 282 }
284 283  
285 284 void
... ... @@ -292,7 +291,7 @@ QPDF::registerStreamFilter(
292 291 void
293 292 QPDF::setIgnoreXRefStreams(bool val)
294 293 {
295   - m->xref_table.ignore_streams(val);
  294 + m->objects.xref_table().ignore_streams(val);
296 295 }
297 296  
298 297 std::shared_ptr<QPDFLogger>
... ... @@ -330,7 +329,7 @@ void
330 329 QPDF::setAttemptRecovery(bool val)
331 330 {
332 331 m->attempt_recovery = val;
333   - m->xref_table.attempt_recovery(val);
  332 + m->objects.xref_table().attempt_recovery(val);
334 333 }
335 334  
336 335 void
... ... @@ -424,9 +423,9 @@ QPDF::parse(char const* password)
424 423 m->pdf_version = "1.2";
425 424 }
426 425  
427   - m->xref_table.initialize();
  426 + m->objects.xref_table().initialize();
428 427 initializeEncryption();
429   - if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
  428 + if (m->objects.xref_table().size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
430 429 // QPDFs created from JSON have an empty xref table and no root object yet.
431 430 throw damagedPDF("", 0, "unable to find page tree");
432 431 }
... ... @@ -469,7 +468,7 @@ QPDF::warn(
469 468 void
470 469 QPDF::showXRefTable()
471 470 {
472   - m->xref_table.show();
  471 + m->objects.xref_table().show();
473 472 }
474 473  
475 474 // Ensure all objects in the pdf file, including those in indirect references, appear in the object
... ... @@ -480,9 +479,9 @@ QPDF::fixDanglingReferences(bool force)
480 479 if (m->fixed_dangling_refs) {
481 480 return;
482 481 }
483   - if (!m->xref_table.resolve()) {
  482 + if (!m->objects.xref_table().resolve()) {
484 483 QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");
485   - m->xref_table.resolve();
  484 + m->objects.xref_table().resolve();
486 485 }
487 486 m->fixed_dangling_refs = true;
488 487 }
... ... @@ -578,7 +577,7 @@ QPDF::getObject(QPDFObjGen const&amp; og)
578 577 {
579 578 if (auto it = m->objects.obj_cache.find(og); it != m->objects.obj_cache.end()) {
580 579 return {it->second.object};
581   - } else if (m->xref_table.initialized() && !m->xref_table.type(og)) {
  580 + } else if (m->objects.xref_table().initialized() && !m->objects.xref_table().type(og)) {
582 581 return QPDF_Null::create();
583 582 } else {
584 583 auto result = m->objects.obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og));
... ... @@ -945,13 +944,13 @@ QPDF::getExtensionLevel()
945 944 QPDFObjectHandle
946 945 QPDF::getTrailer()
947 946 {
948   - return m->xref_table.trailer();
  947 + return m->objects.trailer();
949 948 }
950 949  
951 950 QPDFObjectHandle
952 951 QPDF::getRoot()
953 952 {
954   - QPDFObjectHandle root = m->xref_table.trailer().getKey("/Root");
  953 + auto root = m->objects.trailer().getKey("/Root");
955 954 if (!root.isDictionary()) {
956 955 throw damagedPDF("", 0, "unable to find /Root dictionary");
957 956 } else if (
... ... @@ -967,10 +966,10 @@ QPDF::getRoot()
967 966 std::map<QPDFObjGen, QPDFXRefEntry>
968 967 QPDF::getXRefTable()
969 968 {
970   - if (!m->xref_table.initialized()) {
  969 + if (!m->objects.xref_table().initialized()) {
971 970 throw std::logic_error("QPDF::getXRefTable called before parsing.");
972 971 }
973   - return m->xref_table.as_map();
  972 + return m->objects.xref_table().as_map();
974 973 }
975 974  
976 975 bool
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -727,7 +727,7 @@ QPDF::initializeEncryption()
727 727 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
728 728 // dictionary.
729 729  
730   - if (!m->xref_table.trailer().hasKey("/Encrypt")) {
  730 + if (!m->objects.trailer().hasKey("/Encrypt")) {
731 731 return;
732 732 }
733 733  
... ... @@ -736,7 +736,7 @@ QPDF::initializeEncryption()
736 736 m->encp->encrypted = true;
737 737  
738 738 std::string id1;
739   - QPDFObjectHandle id_obj = m->xref_table.trailer().getKey("/ID");
  739 + QPDFObjectHandle id_obj = m->objects.trailer().getKey("/ID");
740 740 if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
741 741 id1 = id_obj.getArrayItem(0).getStringValue();
742 742 } else {
... ... @@ -745,7 +745,7 @@ QPDF::initializeEncryption()
745 745 warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
746 746 }
747 747  
748   - QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
  748 + QPDFObjectHandle encryption_dict = m->objects.trailer().getKey("/Encrypt");
749 749 if (!encryption_dict.isDictionary()) {
750 750 throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary");
751 751 }
... ...
libqpdf/QPDF_json.cc
... ... @@ -582,7 +582,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
582 582 this->saw_value = true;
583 583 // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
584 584 if (setNextStateIfDictionary("trailer.value", value, st_object)) {
585   - pdf.m->xref_table.trailer(makeObject(value));
  585 + pdf.m->objects.xref_table().trailer(makeObject(value));
586 586 }
587 587 } else if (key == "stream") {
588 588 // Don't need to set saw_stream here since there's already an error.
... ... @@ -776,7 +776,7 @@ QPDF::createFromJSON(std::shared_ptr&lt;InputSource&gt; is)
776 776 {
777 777 m->pdf_version = "1.3";
778 778 m->no_input_name = is->getName();
779   - m->xref_table.initialize_json();
  779 + m->objects.xref_table().initialize_json();
780 780 importJSON(is, true);
781 781 }
782 782  
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -288,8 +288,8 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
288 288 QPDFObjGen og;
289 289 QPDFObjectHandle H =
290 290 objects().read(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
291   - qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og);
292   - qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og);
  291 + qpdf_offset_t min_end_offset = m->objects.xref_table().end_before_space(og);
  292 + qpdf_offset_t max_end_offset = m->objects.xref_table().end_after_space(og);
293 293 if (!H.isStream()) {
294 294 throw damagedPDF("linearization dictionary", "hint table is not a stream");
295 295 }
... ... @@ -303,8 +303,8 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
303 303 auto length_og = Hdict.getKey("/Length").getObjGen();
304 304 if (length_og.isIndirect()) {
305 305 QTC::TC("qpdf", "QPDF hint table length indirect");
306   - min_end_offset = m->xref_table.end_before_space(length_og);
307   - max_end_offset = m->xref_table.end_after_space(length_og);
  306 + min_end_offset = m->objects.xref_table().end_before_space(length_og);
  307 + max_end_offset = m->objects.xref_table().end_after_space(length_og);
308 308 } else {
309 309 QTC::TC("qpdf", "QPDF hint table length direct");
310 310 }
... ... @@ -441,7 +441,7 @@ QPDF::checkLinearizationInternal()
441 441 for (size_t i = 0; i < toS(npages); ++i) {
442 442 QPDFObjectHandle const& page = pages.at(i);
443 443 QPDFObjGen og(page.getObjGen());
444   - if (m->xref_table.type(og) == 2) {
  444 + if (m->objects.xref_table().type(og) == 2) {
445 445 linearizationWarning(
446 446 "page dictionary for page " + std::to_string(i) + " is compressed");
447 447 }
... ... @@ -457,11 +457,11 @@ QPDF::checkLinearizationInternal()
457 457 break;
458 458 }
459 459 }
460   - if (m->file->tell() != m->xref_table.first_item_offset()) {
  460 + if (m->file->tell() != m->objects.xref_table().first_item_offset()) {
461 461 QTC::TC("qpdf", "QPDF err /T mismatch");
462 462 linearizationWarning(
463 463 "space before first xref item (/T) mismatch (computed = " +
464   - std::to_string(m->xref_table.first_item_offset()) +
  464 + std::to_string(m->objects.xref_table().first_item_offset()) +
465 465 "; file = " + std::to_string(m->file->tell()));
466 466 }
467 467  
... ... @@ -472,7 +472,7 @@ QPDF::checkLinearizationInternal()
472 472 // compressed objects are supposed to be at the end of the containing xref section if any object
473 473 // streams are in use.
474 474  
475   - if (m->xref_table.uncompressed_after_compressed()) {
  475 + if (m->objects.xref_table().uncompressed_after_compressed()) {
476 476 linearizationWarning("linearized file contains an uncompressed object after a compressed "
477 477 "one in a cross-reference stream");
478 478 }
... ... @@ -481,8 +481,8 @@ QPDF::checkLinearizationInternal()
481 481 // make changes. If it has to, then the file is not properly linearized. We use the xref table
482 482 // to figure out which objects are compressed and which are uncompressed.
483 483  
484   - optimize(m->xref_table);
485   - calculateLinearizationData(m->xref_table);
  484 + optimize(m->objects);
  485 + calculateLinearizationData(m->objects);
486 486  
487 487 // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra
488 488 // object here by mistake. pdlin fails to place thumbnail images in section 9, so when
... ... @@ -499,8 +499,8 @@ QPDF::checkLinearizationInternal()
499 499 qpdf_offset_t max_E = -1;
500 500 for (auto const& oh: m->part6) {
501 501 QPDFObjGen og(oh.getObjGen());
502   - auto before = m->xref_table.end_before_space(og);
503   - auto after = m->xref_table.end_after_space(og);
  502 + auto before = m->objects.xref_table().end_before_space(og);
  503 + auto after = m->objects.xref_table().end_after_space(og);
504 504 if (before <= 0) {
505 505 // All objects have to have been dereferenced to be classified.
506 506 throw std::logic_error("linearization part6 object not in cache");
... ... @@ -533,7 +533,7 @@ QPDF::maxEnd(ObjUser const&amp; ou)
533 533 }
534 534 qpdf_offset_t end = 0;
535 535 for (auto const& og: m->obj_user_to_objects[ou]) {
536   - auto e = m->xref_table.end_after_space(og);
  536 + auto e = m->objects.xref_table().end_after_space(og);
537 537 if (e <= 0) {
538 538 stopOnError("unknown object referenced in object user table");
539 539 }
... ... @@ -545,13 +545,14 @@ QPDF::maxEnd(ObjUser const&amp; ou)
545 545 qpdf_offset_t
546 546 QPDF::getLinearizationOffset(QPDFObjGen const& og)
547 547 {
548   - switch (m->xref_table.type(og)) {
  548 + switch (m->objects.xref_table().type(og)) {
549 549 case 1:
550   - return m->xref_table.offset(og);
  550 + return m->objects.xref_table().offset(og);
551 551  
552 552 case 2:
553 553 // For compressed objects, return the offset of the object stream that contains them.
554   - return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0));
  554 + return getLinearizationOffset(
  555 + QPDFObjGen(m->objects.xref_table().stream_number(og.getObj()), 0));
555 556  
556 557 default:
557 558 stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2");
... ... @@ -571,13 +572,13 @@ QPDF::getUncompressedObject(QPDFObjectHandle&amp; obj, std::map&lt;int, int&gt; const&amp; obj
571 572 }
572 573  
573 574 QPDFObjectHandle
574   -QPDF::getUncompressedObject(QPDFObjectHandle& obj, Xref_table const& xref)
  575 +QPDF::getUncompressedObject(QPDFObjectHandle& obj, Objects const& objects)
575 576 {
576 577 auto og = obj.getObjGen();
577   - if (obj.isNull() || xref.type(og) != 2) {
  578 + if (obj.isNull() || objects.xref_table().type(og) != 2) {
578 579 return obj;
579 580 }
580   - return getObject(xref.stream_number(og.getObj()), 0);
  581 + return getObject(objects.xref_table().stream_number(og.getObj()), 0);
581 582 }
582 583  
583 584 QPDFObjectHandle
... ... @@ -597,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n)
597 598 int length = 0;
598 599 for (int i = 0; i < n; ++i) {
599 600 QPDFObjGen og(first_object + i, 0);
600   - auto end = m->xref_table.end_after_space(og);
  601 + auto end = m->objects.xref_table().end_after_space(og);
601 602 if (end <= 0) {
602 603 linearizationWarning(
603 604 "no xref table entry for " + std::to_string(first_object + i) + " 0");
... ... @@ -627,7 +628,7 @@ QPDF::checkHPageOffset(
627 628 int npages = toI(pages.size());
628 629 qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
629 630 QPDFObjGen first_page_og(pages.at(0).getObjGen());
630   - if (m->xref_table.type(first_page_og) == 0) {
  631 + if (m->objects.xref_table().type(first_page_og) == 0) {
631 632 stopOnError("supposed first page object is not known");
632 633 }
633 634 qpdf_offset_t offset = getLinearizationOffset(first_page_og);
... ... @@ -638,7 +639,7 @@ QPDF::checkHPageOffset(
638 639 for (int pageno = 0; pageno < npages; ++pageno) {
639 640 QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen());
640 641 int first_object = page_og.getObj();
641   - if (m->xref_table.type(page_og) == 0) {
  642 + if (m->objects.xref_table().type(page_og) == 0) {
642 643 stopOnError("unknown object in page offset hint table");
643 644 }
644 645 offset = getLinearizationOffset(page_og);
... ... @@ -760,7 +761,7 @@ QPDF::checkHSharedObject(std::vector&lt;QPDFObjectHandle&gt; const&amp; pages, std::map&lt;in
760 761 cur_object = so.first_shared_obj;
761 762  
762 763 QPDFObjGen og(cur_object, 0);
763   - if (m->xref_table.type(og) == 0) {
  764 + if (m->objects.xref_table().type(og) == 0) {
764 765 stopOnError("unknown object in shared object hint table");
765 766 }
766 767 qpdf_offset_t offset = getLinearizationOffset(og);
... ... @@ -811,7 +812,7 @@ QPDF::checkHOutlines()
811 812 return;
812 813 }
813 814 QPDFObjGen og(outlines.getObjGen());
814   - if (m->xref_table.type(og) == 0) {
  815 + if (m->objects.xref_table().type(og) == 0) {
815 816 stopOnError("unknown object in outlines hint table");
816 817 }
817 818 qpdf_offset_t offset = getLinearizationOffset(og);
... ... @@ -1158,7 +1159,7 @@ QPDF::calculateLinearizationData(T const&amp; object_stream_data)
1158 1159 // Map all page objects to the containing object stream. This should be a no-op in a
1159 1160 // properly linearized file.
1160 1161 for (auto oh: getAllPages()) {
1161   - pages.push_back(getUncompressedObject(oh, object_stream_data));
  1162 + pages.emplace_back(getUncompressedObject(oh, object_stream_data));
1162 1163 }
1163 1164 }
1164 1165 int npages = toI(pages.size());
... ...
libqpdf/QPDF_objects.cc
... ... @@ -24,6 +24,9 @@
24 24 #include <qpdf/QTC.hh>
25 25 #include <qpdf/QUtil.hh>
26 26  
  27 +using Objects = QPDF::Objects;
  28 +using Xref_table = Objects::Xref_table;
  29 +
27 30 namespace
28 31 {
29 32 class InvalidInputSource final: public InputSource
... ... @@ -98,7 +101,7 @@ QPDF::findStartxref()
98 101 }
99 102  
100 103 void
101   -QPDF::Xref_table::initialize_empty()
  104 +Xref_table::initialize_empty()
102 105 {
103 106 initialized_ = true;
104 107 trailer_ = QPDFObjectHandle::newDictionary();
... ... @@ -114,7 +117,7 @@ QPDF::Xref_table::initialize_empty()
114 117 }
115 118  
116 119 void
117   -QPDF::Xref_table::initialize_json()
  120 +Xref_table::initialize_json()
118 121 {
119 122 initialized_ = true;
120 123 table.resize(1);
... ... @@ -123,7 +126,7 @@ QPDF::Xref_table::initialize_json()
123 126 }
124 127  
125 128 void
126   -QPDF::Xref_table::initialize()
  129 +Xref_table::initialize()
127 130 {
128 131 // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
129 132 // 30 characters to leave room for the startxref stuff.
... ... @@ -166,7 +169,7 @@ QPDF::Xref_table::initialize()
166 169 }
167 170  
168 171 void
169   -QPDF::Xref_table::reconstruct(QPDFExc& e)
  172 +Xref_table::reconstruct(QPDFExc& e)
170 173 {
171 174 if (reconstructed_) {
172 175 // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
... ... @@ -318,7 +321,7 @@ QPDF::Xref_table::reconstruct(QPDFExc&amp; e)
318 321 }
319 322  
320 323 void
321   -QPDF::Xref_table::read(qpdf_offset_t xref_offset)
  324 +Xref_table::read(qpdf_offset_t xref_offset)
322 325 {
323 326 std::map<int, int> free_table;
324 327 std::set<qpdf_offset_t> visited;
... ... @@ -392,8 +395,8 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
392 395 // entries, including missing entries before the last actual entry.
393 396 }
394 397  
395   -QPDF::Xref_table::Subsection
396   -QPDF::Xref_table::subsection(std::string const& line)
  398 +Xref_table::Subsection
  399 +Xref_table::subsection(std::string const& line)
397 400 {
398 401 auto terminate = [this]() -> void {
399 402 QTC::TC("qpdf", "QPDF invalid xref");
... ... @@ -447,10 +450,10 @@ QPDF::Xref_table::subsection(std::string const&amp; line)
447 450 return {obj, count, file->getLastOffset() + toI(p - start)};
448 451 }
449 452  
450   -std::vector<QPDF::Xref_table::Subsection>
451   -QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
  453 +std::vector<Xref_table::Subsection>
  454 +Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
452 455 {
453   - std::vector<QPDF::Xref_table::Subsection> result;
  456 + std::vector<Xref_table::Subsection> result;
454 457 file->seek(start, SEEK_SET);
455 458  
456 459 while (true) {
... ... @@ -475,12 +478,12 @@ QPDF::Xref_table::bad_subsections(std::string&amp; line, qpdf_offset_t start)
475 478  
476 479 // Optimistically read and parse all subsection headers. If an error is encountered return the
477 480 // result of bad_subsections.
478   -std::vector<QPDF::Xref_table::Subsection>
479   -QPDF::Xref_table::subsections(std::string& line)
  481 +std::vector<Xref_table::Subsection>
  482 +Xref_table::subsections(std::string& line)
480 483 {
481 484 auto recovery_offset = file->tell();
482 485 try {
483   - std::vector<QPDF::Xref_table::Subsection> result;
  486 + std::vector<Xref_table::Subsection> result;
484 487  
485 488 while (true) {
486 489 line.assign(50, '\0');
... ... @@ -507,7 +510,7 @@ QPDF::Xref_table::subsections(std::string&amp; line)
507 510  
508 511 // Returns (success, f1, f2, type).
509 512 std::tuple<bool, qpdf_offset_t, int, char>
510   -QPDF::Xref_table::read_bad_entry()
  513 +Xref_table::read_bad_entry()
511 514 {
512 515 qpdf_offset_t f1{0};
513 516 int f2{0};
... ... @@ -592,7 +595,7 @@ QPDF::Xref_table::read_bad_entry()
592 595 // Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
593 596 // result. Returns (success, f1, f2, type).
594 597 std::tuple<bool, qpdf_offset_t, int, char>
595   -QPDF::Xref_table::read_entry()
  598 +Xref_table::read_entry()
596 599 {
597 600 qpdf_offset_t f1{0};
598 601 int f2{0};
... ... @@ -651,7 +654,7 @@ QPDF::Xref_table::read_entry()
651 654  
652 655 // Read a single cross-reference table section and associated trailer.
653 656 qpdf_offset_t
654   -QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
  657 +Xref_table::process_section(qpdf_offset_t xref_offset)
655 658 {
656 659 file->seek(xref_offset, SEEK_SET);
657 660 std::string line;
... ... @@ -738,7 +741,7 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
738 741  
739 742 // Read a single cross-reference stream.
740 743 qpdf_offset_t
741   -QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
  744 +Xref_table::read_stream(qpdf_offset_t xref_offset)
742 745 {
743 746 if (!ignore_streams_) {
744 747 QPDFObjGen x_og;
... ... @@ -762,8 +765,7 @@ QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
762 765  
763 766 // Return the entry size of the xref stream and the processed W array.
764 767 std::pair<int, std::array<int, 3>>
765   -QPDF::Xref_table::process_W(
766   - QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
  768 +Xref_table::process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
767 769 {
768 770 auto W_obj = dict.getKey("/W");
769 771 if (!(W_obj.isArray() && W_obj.getArrayNItems() >= 3 && W_obj.getArrayItem(0).isInteger() &&
... ... @@ -794,7 +796,7 @@ QPDF::Xref_table::process_W(
794 796 // Validate Size entry and return the maximum number of entries that the xref stream can contain and
795 797 // the value of the Size entry.
796 798 std::pair<int, size_t>
797   -QPDF::Xref_table::process_Size(
  799 +Xref_table::process_Size(
798 800 QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)
799 801 {
800 802 // Number of entries is limited by the highest possible object id and stream size.
... ... @@ -818,7 +820,7 @@ QPDF::Xref_table::process_Size(
818 820  
819 821 // Return the number of entries of the xref stream and the processed Index array.
820 822 std::pair<int, std::vector<std::pair<int, int>>>
821   -QPDF::Xref_table::process_Index(
  823 +Xref_table::process_Index(
822 824 QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
823 825 {
824 826 auto size = dict.getKey("/Size").getIntValueAsInt();
... ... @@ -885,7 +887,7 @@ QPDF::Xref_table::process_Index(
885 887 }
886 888  
887 889 qpdf_offset_t
888   -QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
  890 +Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
889 891 {
890 892 auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
891 893 return qpdf.damagedPDF("xref stream", xref_offset, msg.data());
... ... @@ -978,7 +980,7 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xr
978 980 }
979 981  
980 982 void
981   -QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
  983 +Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
982 984 {
983 985 // Populate the xref table in such a way that the first reference to an object that we see,
984 986 // which is the one in the latest xref table in which it appears, is the one that gets stored.
... ... @@ -1040,7 +1042,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1040 1042 }
1041 1043  
1042 1044 void
1043   -QPDF::Xref_table::insert_free(QPDFObjGen og)
  1045 +Xref_table::insert_free(QPDFObjGen og)
1044 1046 {
1045 1047 // At the moment we are processing the updates last to first and therefore the gen doesn't
1046 1048 // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be
... ... @@ -1055,7 +1057,7 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
1055 1057 }
1056 1058  
1057 1059 QPDFObjGen
1058   -QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept
  1060 +Xref_table::at_offset(qpdf_offset_t offset) const noexcept
1059 1061 {
1060 1062 int id = 0;
1061 1063 int gen = 0;
... ... @@ -1075,7 +1077,7 @@ QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept
1075 1077 }
1076 1078  
1077 1079 std::map<QPDFObjGen, QPDFXRefEntry>
1078   -QPDF::Xref_table::as_map() const
  1080 +Xref_table::as_map() const
1079 1081 {
1080 1082 std::map<QPDFObjGen, QPDFXRefEntry> result;
1081 1083 int i{0};
... ... @@ -1099,7 +1101,7 @@ QPDF::Xref_table::as_map() const
1099 1101 }
1100 1102  
1101 1103 void
1102   -QPDF::Xref_table::show()
  1104 +Xref_table::show()
1103 1105 {
1104 1106 auto& cout = *qpdf.m->log->getInfo();
1105 1107 int i = -1;
... ... @@ -1128,7 +1130,7 @@ QPDF::Xref_table::show()
1128 1130 // Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
1129 1131 // return false. Otherwise return true.
1130 1132 bool
1131   -QPDF::Xref_table::resolve()
  1133 +Xref_table::resolve()
1132 1134 {
1133 1135 bool may_change = !reconstructed_;
1134 1136 int i = -1;
... ... @@ -1159,7 +1161,7 @@ QPDF::getAllObjects()
1159 1161 }
1160 1162  
1161 1163 QPDFObjectHandle
1162   -QPDF::Xref_table::read_trailer()
  1164 +Xref_table::read_trailer()
1163 1165 {
1164 1166 qpdf_offset_t offset = file->tell();
1165 1167 bool empty = false;
... ... @@ -1177,7 +1179,7 @@ QPDF::Xref_table::read_trailer()
1177 1179 }
1178 1180  
1179 1181 QPDFObjectHandle
1180   -QPDF::Objects::read_object(std::string const& description, QPDFObjGen og)
  1182 +Objects::read_object(std::string const& description, QPDFObjGen og)
1181 1183 {
1182 1184 qpdf.setLastObjectDescription(description, og);
1183 1185 qpdf_offset_t offset = m->file->tell();
... ... @@ -1209,7 +1211,7 @@ QPDF::Objects::read_object(std::string const&amp; description, QPDFObjGen og)
1209 1211  
1210 1212 // After reading stream dictionary and stream keyword, read rest of stream.
1211 1213 void
1212   -QPDF::Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
  1214 +Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
1213 1215 {
1214 1216 validate_stream_line_end(object, og, offset);
1215 1217  
... ... @@ -1250,8 +1252,7 @@ QPDF::Objects::read_stream(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset_
1250 1252 }
1251 1253  
1252 1254 void
1253   -QPDF::Objects::validate_stream_line_end(
1254   - QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
  1255 +Objects::validate_stream_line_end(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
1255 1256 {
1256 1257 // The PDF specification states that the word "stream" should be followed by either a carriage
1257 1258 // return and a newline or by a newline alone. It specifically disallowed following it by a
... ... @@ -1302,7 +1303,7 @@ QPDF::Objects::validate_stream_line_end(
1302 1303 }
1303 1304  
1304 1305 QPDFObjectHandle
1305   -QPDF::Objects::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
  1306 +Objects::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
1306 1307 {
1307 1308 m->last_object_description.erase(7); // last_object_description starts with "object "
1308 1309 m->last_object_description += std::to_string(obj);
... ... @@ -1332,7 +1333,7 @@ QPDF::findEndstream()
1332 1333 }
1333 1334  
1334 1335 size_t
1335   -QPDF::Objects::recover_stream_length(
  1336 +Objects::recover_stream_length(
1336 1337 std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset)
1337 1338 {
1338 1339 // Try to reconstruct stream length by looking for endstream or endobj
... ... @@ -1351,7 +1352,7 @@ QPDF::Objects::recover_stream_length(
1351 1352  
1352 1353 if (length) {
1353 1354 // Make sure this is inside this object
1354   - auto found = m->xref_table.at_offset(stream_offset + toO(length));
  1355 + auto found = xref.at_offset(stream_offset + toO(length));
1355 1356 if (found == QPDFObjGen() || found == og) {
1356 1357 // If we are trying to recover an XRef stream the xref table will not contain and
1357 1358 // won't contain any entries, therefore we cannot check the found length. Otherwise we
... ... @@ -1376,7 +1377,7 @@ QPDF::Objects::recover_stream_length(
1376 1377 }
1377 1378  
1378 1379 QPDFObjectHandle
1379   -QPDF::Objects::read(
  1380 +Objects::read(
1380 1381 bool try_recovery,
1381 1382 qpdf_offset_t offset,
1382 1383 std::string const& description,
... ... @@ -1455,10 +1456,10 @@ QPDF::Objects::read(
1455 1456 } catch (QPDFExc& e) {
1456 1457 if (try_recovery) {
1457 1458 // Try again after reconstructing xref table
1458   - m->xref_table.reconstruct(e);
1459   - if (m->xref_table.type(exp_og) == 1) {
  1459 + xref.reconstruct(e);
  1460 + if (xref.type(exp_og) == 1) {
1460 1461 QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
1461   - return read(false, m->xref_table.offset(exp_og), description, exp_og, og, false);
  1462 + return read(false, xref.offset(exp_og), description, exp_og, og, false);
1462 1463 } else {
1463 1464 QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1464 1465 qpdf.warn(qpdf.damagedPDF(
... ... @@ -1498,7 +1499,7 @@ QPDF::Objects::read(
1498 1499 }
1499 1500 }
1500 1501 qpdf_offset_t end_after_space = m->file->tell();
1501   - if (skip_cache_if_in_xref && m->xref_table.type(og)) {
  1502 + if (skip_cache_if_in_xref && xref.type(og)) {
1502 1503 // Ordinarily, an object gets read here when resolved through xref table or stream. In
1503 1504 // the special case of the xref stream and linearization hint tables, the offset comes
1504 1505 // from another source. For the specific case of xref streams, the xref stream is read
... ... @@ -1526,8 +1527,7 @@ QPDF::Objects::read(
1526 1527 // could use !check_og in place of skip_cache_if_in_xref.
1527 1528 QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
1528 1529 } else {
1529   - m->xref_table.linearization_offsets(
1530   - toS(og.getObj()), end_before_space, end_after_space);
  1530 + xref.linearization_offsets(toS(og.getObj()), end_before_space, end_after_space);
1531 1531 update_table(og, oh.getObj());
1532 1532 }
1533 1533 }
... ... @@ -1536,7 +1536,7 @@ QPDF::Objects::read(
1536 1536 }
1537 1537  
1538 1538 QPDFObject*
1539   -QPDF::Objects::resolve(QPDFObjGen og)
  1539 +Objects::resolve(QPDFObjGen og)
1540 1540 {
1541 1541 if (!unresolved(og)) {
1542 1542 return obj_cache[og].object.get();
... ... @@ -1553,19 +1553,19 @@ QPDF::Objects::resolve(QPDFObjGen og)
1553 1553 ResolveRecorder rr(&qpdf, og);
1554 1554  
1555 1555 try {
1556   - switch (m->xref_table.type(og)) {
  1556 + switch (xref.type(og)) {
1557 1557 case 0:
1558 1558 break;
1559 1559 case 1:
1560 1560 {
1561 1561 // Object stored in cache by readObjectAtOffset
1562 1562 QPDFObjGen a_og;
1563   - QPDFObjectHandle oh = read(true, m->xref_table.offset(og), "", og, a_og, false);
  1563 + QPDFObjectHandle oh = read(true, xref.offset(og), "", og, a_og, false);
1564 1564 }
1565 1565 break;
1566 1566  
1567 1567 case 2:
1568   - resolveObjectsInStream(m->xref_table.stream_number(og.getObj()));
  1568 + resolveObjectsInStream(xref.stream_number(og.getObj()));
1569 1569 break;
1570 1570  
1571 1571 default:
... ... @@ -1591,7 +1591,7 @@ QPDF::Objects::resolve(QPDFObjGen og)
1591 1591 }
1592 1592  
1593 1593 void
1594   -QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
  1594 +Objects::resolveObjectsInStream(int obj_stream_number)
1595 1595 {
1596 1596 if (m->resolved_object_streams.count(obj_stream_number)) {
1597 1597 return;
... ... @@ -1642,7 +1642,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
1642 1642  
1643 1643 int num = QUtil::string_to_int(tnum.getValue().c_str());
1644 1644 long long offset = QUtil::string_to_int(toffset.getValue().c_str());
1645   - if (num > m->xref_table.max_id()) {
  1645 + if (num > xref.max_id()) {
1646 1646 continue;
1647 1647 }
1648 1648 if (num == obj_stream_number) {
... ... @@ -1674,8 +1674,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
1674 1674 m->last_object_description += "object ";
1675 1675 for (auto const& iter: offsets) {
1676 1676 QPDFObjGen og(iter.first, 0);
1677   - if (m->xref_table.type(og) == 2 &&
1678   - m->xref_table.stream_number(og.getObj()) == obj_stream_number) {
  1677 + if (xref.type(og) == 2 && xref.stream_number(og.getObj()) == obj_stream_number) {
1679 1678 int offset = iter.second;
1680 1679 input->seek(offset, SEEK_SET);
1681 1680 QPDFObjectHandle oh = readObjectInStream(input, iter.first);
... ... @@ -1687,7 +1686,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
1687 1686 }
1688 1687  
1689 1688 void
1690   -QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& object)
  1689 +Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& object)
1691 1690 {
1692 1691 object->setObjGen(&qpdf, og);
1693 1692 if (cached(og)) {
... ... @@ -1699,19 +1698,19 @@ QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr&lt;QPDFObject&gt;&amp; ob
1699 1698 }
1700 1699  
1701 1700 bool
1702   -QPDF::Objects::cached(QPDFObjGen og)
  1701 +Objects::cached(QPDFObjGen og)
1703 1702 {
1704 1703 return obj_cache.count(og) != 0;
1705 1704 }
1706 1705  
1707 1706 bool
1708   -QPDF::Objects::unresolved(QPDFObjGen og)
  1707 +Objects::unresolved(QPDFObjGen og)
1709 1708 {
1710 1709 return !cached(og) || obj_cache[og].object->isUnresolved();
1711 1710 }
1712 1711  
1713 1712 QPDFObjGen
1714   -QPDF::Objects::next_id()
  1713 +Objects::next_id()
1715 1714 {
1716 1715 int max_objid = toI(qpdf.getObjectCount());
1717 1716 if (max_objid == std::numeric_limits<int>::max()) {
... ... @@ -1721,7 +1720,7 @@ QPDF::Objects::next_id()
1721 1720 }
1722 1721  
1723 1722 QPDFObjectHandle
1724   -QPDF::Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
  1723 +Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
1725 1724 {
1726 1725 QPDFObjGen next{next_id()};
1727 1726 obj_cache[next] = ObjCache(obj);
... ... @@ -1729,14 +1728,14 @@ QPDF::Objects::make_indirect(std::shared_ptr&lt;QPDFObject&gt; const&amp; obj)
1729 1728 }
1730 1729  
1731 1730 std::shared_ptr<QPDFObject>
1732   -QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf)
  1731 +Objects::get_for_parser(int id, int gen, bool parse_pdf)
1733 1732 {
1734 1733 // This method is called by the parser and therefore must not resolve any objects.
1735 1734 auto og = QPDFObjGen(id, gen);
1736 1735 if (auto iter = obj_cache.find(og); iter != obj_cache.end()) {
1737 1736 return iter->second.object;
1738 1737 }
1739   - if (m->xref_table.type(og) || !m->xref_table.initialized()) {
  1738 + if (xref.type(og) || !xref.initialized()) {
1740 1739 return obj_cache.insert({og, QPDF_Unresolved::create(&qpdf, og)}).first->second.object;
1741 1740 }
1742 1741 if (parse_pdf) {
... ... @@ -1746,15 +1745,14 @@ QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf)
1746 1745 }
1747 1746  
1748 1747 std::shared_ptr<QPDFObject>
1749   -QPDF::Objects::get_for_json(int id, int gen)
  1748 +Objects::get_for_json(int id, int gen)
1750 1749 {
1751 1750 auto og = QPDFObjGen(id, gen);
1752 1751 auto [it, inserted] = obj_cache.try_emplace(og);
1753 1752 auto& obj = it->second.object;
1754 1753 if (inserted) {
1755   - obj = (m->xref_table.initialized() && !m->xref_table.type(og))
1756   - ? QPDF_Null::create(&qpdf, og)
1757   - : QPDF_Unresolved::create(&qpdf, og);
  1754 + obj = (xref.initialized() && !xref.type(og)) ? QPDF_Null::create(&qpdf, og)
  1755 + : QPDF_Unresolved::create(&qpdf, og);
1758 1756 }
1759 1757 return obj;
1760 1758 }
... ... @@ -1770,7 +1768,7 @@ QPDF::replaceObject(QPDFObjGen const&amp; og, QPDFObjectHandle oh)
1770 1768 }
1771 1769  
1772 1770 void
1773   -QPDF::Objects::erase(QPDFObjGen og)
  1771 +Objects::erase(QPDFObjGen og)
1774 1772 {
1775 1773 if (auto cached = obj_cache.find(og); cached != obj_cache.end()) {
1776 1774 // Take care of any object handles that may be floating around.
... ... @@ -1790,11 +1788,11 @@ QPDF::swapObjects(QPDFObjGen const&amp; og1, QPDFObjGen const&amp; og2)
1790 1788 }
1791 1789  
1792 1790 size_t
1793   -QPDF::Objects::table_size()
  1791 +Objects::table_size()
1794 1792 {
1795 1793 // If obj_cache is dense, accommodate all object in tables,else accommodate only original
1796 1794 // objects.
1797   - auto max_xref = toI(m->xref_table.size());
  1795 + auto max_xref = toI(xref.size());
1798 1796 if (max_xref > 0) {
1799 1797 --max_xref;
1800 1798 }
... ... @@ -1813,20 +1811,20 @@ QPDF::Objects::table_size()
1813 1811 }
1814 1812  
1815 1813 std::vector<QPDFObjGen>
1816   -QPDF::Objects::compressible_vector()
  1814 +Objects::compressible_vector()
1817 1815 {
1818 1816 return compressible<QPDFObjGen>();
1819 1817 }
1820 1818  
1821 1819 std::vector<bool>
1822   -QPDF::Objects::compressible_set()
  1820 +Objects::compressible_set()
1823 1821 {
1824 1822 return compressible<bool>();
1825 1823 }
1826 1824  
1827 1825 template <typename T>
1828 1826 std::vector<T>
1829   -QPDF::Objects::compressible()
  1827 +Objects::compressible()
1830 1828 {
1831 1829 // Return a list of objects that are allowed to be in object streams. Walk through the objects
1832 1830 // by traversing the document from the root, including a traversal of the pages tree. This
... ... @@ -1835,14 +1833,14 @@ QPDF::Objects::compressible()
1835 1833 // iterating through the xref table since it avoids preserving orphaned items.
1836 1834  
1837 1835 // Exclude encryption dictionary, if any
1838   - QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
  1836 + QPDFObjectHandle encryption_dict = trailer().getKey("/Encrypt");
1839 1837 QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
1840 1838  
1841 1839 const size_t max_obj = qpdf.getObjectCount();
1842 1840 std::vector<bool> visited(max_obj, false);
1843 1841 std::vector<QPDFObjectHandle> queue;
1844 1842 queue.reserve(512);
1845   - queue.push_back(m->xref_table.trailer());
  1843 + queue.emplace_back(trailer());
1846 1844 std::vector<T> result;
1847 1845 if constexpr (std::is_same_v<T, QPDFObjGen>) {
1848 1846 result.reserve(obj_cache.size());
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -79,9 +79,9 @@ QPDF::optimize(
79 79 }
80 80  
81 81 void
82   -QPDF::optimize(QPDF::Xref_table const& xref)
  82 +QPDF::optimize(QPDF::Objects const& objects)
83 83 {
84   - optimize_internal(xref, false, nullptr);
  84 + optimize_internal(objects, false, nullptr);
85 85 }
86 86  
87 87 template <typename T>
... ... @@ -121,13 +121,13 @@ QPDF::optimize_internal(
121 121 }
122 122  
123 123 // Traverse document-level items
124   - for (auto const& key: m->xref_table.trailer().getKeys()) {
  124 + for (auto const& key: m->objects.trailer().getKeys()) {
125 125 if (key == "/Root") {
126 126 // handled separately
127 127 } else {
128 128 updateObjectMaps(
129 129 ObjUser(ObjUser::ou_trailer_key, key),
130   - m->xref_table.trailer().getKey(key),
  130 + m->objects.trailer().getKey(key),
131 131 skip_stream_parameters);
132 132 }
133 133 }
... ... @@ -175,7 +175,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
175 175 // values for them.
176 176 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
177 177 pushInheritedAttributesToPageInternal(
178   - m->xref_table.trailer().getKey("/Root").getKey("/Pages"),
  178 + m->objects.trailer().getKey("/Root").getKey("/Pages"),
179 179 key_ancestors,
180 180 allow_changes,
181 181 warn_skipped_keys);
... ... @@ -450,8 +450,9 @@ QPDF::filterCompressedObjects(QPDFWriter::ObjTable const&amp; obj)
450 450 }
451 451  
452 452 void
453   -QPDF::filterCompressedObjects(QPDF::Xref_table const& xref)
  453 +QPDF::filterCompressedObjects(QPDF::Objects const& objects)
454 454 {
  455 + auto const& xref = objects.xref_table();
455 456 if (!xref.object_streams()) {
456 457 return;
457 458 }
... ...
libqpdf/qpdf/QPDF_objects.hh
... ... @@ -3,17 +3,410 @@
3 3  
4 4 #include <qpdf/QPDF.hh>
5 5  
  6 +#include <variant>
  7 +
6 8 // The Objects class is responsible for keeping track of all objects belonging to a QPDF instance,
7 9 // including loading it from an input source when required.
8 10 class QPDF::Objects
9 11 {
10 12 public:
11   - Objects(QPDF& qpdf, QPDF::Members* m) :
  13 + // Xref_table encapsulates the pdf's xref table and trailer.
  14 + class Xref_table
  15 + {
  16 + public:
  17 + Xref_table(Objects& objects) :
  18 + qpdf(objects.qpdf),
  19 + objects(objects),
  20 + file(objects.file)
  21 + {
  22 + tokenizer.allowEOF();
  23 + }
  24 +
  25 + void initialize();
  26 + void initialize_empty();
  27 + void initialize_json();
  28 + void reconstruct(QPDFExc& e);
  29 + void show();
  30 + bool resolve();
  31 +
  32 + QPDFObjectHandle
  33 + trailer() noexcept
  34 + {
  35 + return trailer_;
  36 + }
  37 +
  38 + QPDFObjectHandle const&
  39 + trailer() const noexcept
  40 + {
  41 + return trailer_;
  42 + }
  43 +
  44 + void
  45 + trailer(QPDFObjectHandle&& oh)
  46 + {
  47 + trailer_ = std::move(oh);
  48 + }
  49 +
  50 + // Returns 0 if og is not in table.
  51 + size_t
  52 + type(QPDFObjGen og) const
  53 + {
  54 + int id = og.getObj();
  55 + if (id < 1 || static_cast<size_t>(id) >= table.size()) {
  56 + return 0;
  57 + }
  58 + auto& e = table[static_cast<size_t>(id)];
  59 + return e.gen() == og.getGen() ? e.type() : 0;
  60 + }
  61 +
  62 + // Returns 0 if og is not in table.
  63 + size_t
  64 + type(size_t id) const noexcept
  65 + {
  66 + if (id >= table.size()) {
  67 + return 0;
  68 + }
  69 + return table[id].type();
  70 + }
  71 +
  72 + // Returns 0 if og is not in table.
  73 + qpdf_offset_t
  74 + offset(QPDFObjGen og) const noexcept
  75 + {
  76 + int id = og.getObj();
  77 + if (id < 1 || static_cast<size_t>(id) >= table.size()) {
  78 + return 0;
  79 + }
  80 + return table[static_cast<size_t>(id)].offset();
  81 + }
  82 +
  83 + // Returns 0 if id is not in table.
  84 + int
  85 + stream_number(int id) const noexcept
  86 + {
  87 + if (id < 1 || static_cast<size_t>(id) >= table.size()) {
  88 + return 0;
  89 + }
  90 + return table[static_cast<size_t>(id)].stream_number();
  91 + }
  92 +
  93 + int
  94 + stream_index(int id) const noexcept
  95 + {
  96 + if (id < 1 || static_cast<size_t>(id) >= table.size()) {
  97 + return 0;
  98 + }
  99 + return table[static_cast<size_t>(id)].stream_index();
  100 + }
  101 +
  102 + QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
  103 +
  104 + std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
  105 +
  106 + bool
  107 + object_streams() const noexcept
  108 + {
  109 + return object_streams_;
  110 + }
  111 +
  112 + // Return a vector of object id and stream number for each compressed object.
  113 + std::vector<std::pair<unsigned int, int>>
  114 + compressed_objects() const
  115 + {
  116 + if (!initialized()) {
  117 + throw std::logic_error("Xref_table::compressed_objects called before parsing.");
  118 + }
  119 +
  120 + std::vector<std::pair<unsigned int, int>> result;
  121 + result.reserve(table.size());
  122 +
  123 + unsigned int i{0};
  124 + for (auto const& item: table) {
  125 + if (item.type() == 2) {
  126 + result.emplace_back(i, item.stream_number());
  127 + }
  128 + ++i;
  129 + }
  130 + return result;
  131 + }
  132 +
  133 + // Temporary access to underlying table size
  134 + size_t
  135 + size() const noexcept
  136 + {
  137 + return table.size();
  138 + }
  139 +
  140 + void
  141 + ignore_streams(bool val) noexcept
  142 + {
  143 + ignore_streams_ = val;
  144 + }
  145 +
  146 + bool
  147 + initialized() const noexcept
  148 + {
  149 + return initialized_;
  150 + }
  151 +
  152 + void
  153 + attempt_recovery(bool val) noexcept
  154 + {
  155 + attempt_recovery_ = val;
  156 + }
  157 +
  158 + int
  159 + max_id() const noexcept
  160 + {
  161 + return max_id_;
  162 + }
  163 +
  164 + // For Linearization
  165 +
  166 + qpdf_offset_t
  167 + end_after_space(QPDFObjGen og)
  168 + {
  169 + auto& e = entry(toS(og.getObj()));
  170 + switch (e.type()) {
  171 + case 1:
  172 + return e.end_after_space_;
  173 + case 2:
  174 + {
  175 + auto es = entry(toS(e.stream_number()));
  176 + return es.type() == 1 ? es.end_after_space_ : 0;
  177 + }
  178 + default:
  179 + return 0;
  180 + }
  181 + }
  182 +
  183 + qpdf_offset_t
  184 + end_before_space(QPDFObjGen og)
  185 + {
  186 + auto& e = entry(toS(og.getObj()));
  187 + switch (e.type()) {
  188 + case 1:
  189 + return e.end_before_space_;
  190 + case 2:
  191 + {
  192 + auto es = entry(toS(e.stream_number()));
  193 + return es.type() == 1 ? es.end_before_space_ : 0;
  194 + }
  195 + default:
  196 + return 0;
  197 + }
  198 + }
  199 +
  200 + void
  201 + linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
  202 + {
  203 + if (type(id)) {
  204 + table[id].end_before_space_ = before;
  205 + table[id].end_after_space_ = after;
  206 + }
  207 + }
  208 +
  209 + bool
  210 + uncompressed_after_compressed() const noexcept
  211 + {
  212 + return uncompressed_after_compressed_;
  213 + }
  214 +
  215 + // Actual value from file
  216 + qpdf_offset_t
  217 + first_item_offset() const noexcept
  218 + {
  219 + return first_item_offset_;
  220 + }
  221 +
  222 + private:
  223 + // Object, count, offset of first entry
  224 + typedef std::tuple<int, int, qpdf_offset_t> Subsection;
  225 +
  226 + struct Uncompressed
  227 + {
  228 + Uncompressed(qpdf_offset_t offset) :
  229 + offset(offset)
  230 + {
  231 + }
  232 + qpdf_offset_t offset;
  233 + };
  234 +
  235 + struct Compressed
  236 + {
  237 + Compressed(int stream_number, int stream_index) :
  238 + stream_number(stream_number),
  239 + stream_index(stream_index)
  240 + {
  241 + }
  242 + int stream_number{0};
  243 + int stream_index{0};
  244 + };
  245 +
  246 + typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;
  247 +
  248 + struct Entry
  249 + {
  250 + Entry() = default;
  251 +
  252 + Entry(int gen, Xref entry) :
  253 + gen_(gen),
  254 + entry(entry)
  255 + {
  256 + }
  257 +
  258 + int
  259 + gen() const noexcept
  260 + {
  261 + return gen_;
  262 + }
  263 +
  264 + size_t
  265 + type() const noexcept
  266 + {
  267 + return entry.index();
  268 + }
  269 +
  270 + qpdf_offset_t
  271 + offset() const noexcept
  272 + {
  273 + return type() == 1 ? std::get<1>(entry).offset : 0;
  274 + }
  275 +
  276 + int
  277 + stream_number() const noexcept
  278 + {
  279 + return type() == 2 ? std::get<2>(entry).stream_number : 0;
  280 + }
  281 +
  282 + int
  283 + stream_index() const noexcept
  284 + {
  285 + return type() == 2 ? std::get<2>(entry).stream_index : 0;
  286 + }
  287 +
  288 + int gen_{0};
  289 + Xref entry;
  290 + qpdf_offset_t end_before_space_{0};
  291 + qpdf_offset_t end_after_space_{0};
  292 + };
  293 +
  294 + Entry&
  295 + entry(size_t id)
  296 + {
  297 + return id < table.size() ? table[id] : table[0];
  298 + }
  299 +
  300 + void read(qpdf_offset_t offset);
  301 +
  302 + // Methods to parse tables
  303 + qpdf_offset_t process_section(qpdf_offset_t offset);
  304 + std::vector<Subsection> subsections(std::string& line);
  305 + std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
  306 + Subsection subsection(std::string const& line);
  307 + std::tuple<bool, qpdf_offset_t, int, char> read_entry();
  308 + std::tuple<bool, qpdf_offset_t, int, char> read_bad_entry();
  309 +
  310 + // Methods to parse streams
  311 + qpdf_offset_t read_stream(qpdf_offset_t offset);
  312 + qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
  313 + std::pair<int, std::array<int, 3>>
  314 + process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
  315 + std::pair<int, size_t> process_Size(
  316 + QPDFObjectHandle& dict,
  317 + int entry_size,
  318 + std::function<QPDFExc(std::string_view)> damaged);
  319 + std::pair<int, std::vector<std::pair<int, int>>> process_Index(
  320 + QPDFObjectHandle& dict,
  321 + int max_num_entries,
  322 + std::function<QPDFExc(std::string_view)> damaged);
  323 +
  324 + QPDFObjectHandle read_trailer();
  325 +
  326 + QPDFTokenizer::Token
  327 + read_token(size_t max_len = 0)
  328 + {
  329 + return tokenizer.readToken(*file, "", true, max_len);
  330 + }
  331 +
  332 + // Methods to insert table entries
  333 + void insert(int obj, int f0, qpdf_offset_t f1, int f2);
  334 + void insert_free(QPDFObjGen);
  335 +
  336 + QPDFExc
  337 + damaged_pdf(std::string const& msg)
  338 + {
  339 + return qpdf.damagedPDF("", 0, msg);
  340 + }
  341 +
  342 + QPDFExc
  343 + damaged_table(std::string const& msg)
  344 + {
  345 + return qpdf.damagedPDF("xref table", msg);
  346 + }
  347 +
  348 + void
  349 + warn_damaged(std::string const& msg)
  350 + {
  351 + qpdf.warn(damaged_pdf(msg));
  352 + }
  353 +
  354 + QPDF& qpdf;
  355 + QPDF::Objects& objects;
  356 + InputSource* const& file;
  357 + QPDFTokenizer tokenizer;
  358 +
  359 + std::vector<Entry> table;
  360 + QPDFObjectHandle trailer_;
  361 +
  362 + bool attempt_recovery_{true};
  363 + bool initialized_{false};
  364 + bool ignore_streams_{false};
  365 + bool reconstructed_{false};
  366 + bool object_streams_{false};
  367 + // Before the xref table is initialized, max_id_ is an upper bound on the possible object
  368 + // ids that could be present in the PDF file. Once the trailer has been read, max_id_ is set
  369 + // to the value of /Size. If the file is damaged, max_id_ becomes the maximum object id in
  370 + // the xref table after reconstruction.
  371 + int max_id_{std::numeric_limits<int>::max() - 1};
  372 +
  373 + // Linearization data
  374 + bool uncompressed_after_compressed_{false};
  375 + qpdf_offset_t first_item_offset_{0}; // actual value from file
  376 + }; // Xref_table;
  377 +
  378 + Objects(QPDF& qpdf, QPDF::Members* m, InputSource* const& file) :
12 379 qpdf(qpdf),
13   - m(m)
  380 + file(file),
  381 + m(m),
  382 + xref(*this)
14 383 {
15 384 }
16 385  
  386 + Xref_table&
  387 + xref_table() noexcept
  388 + {
  389 + return xref;
  390 + }
  391 +
  392 + Xref_table const&
  393 + xref_table() const noexcept
  394 + {
  395 + return xref;
  396 + }
  397 +
  398 + QPDFObjectHandle
  399 + trailer() noexcept
  400 + {
  401 + return xref.trailer();
  402 + }
  403 +
  404 + QPDFObjectHandle const&
  405 + trailer() const noexcept
  406 + {
  407 + return xref.trailer();
  408 + }
  409 +
17 410 std::map<QPDFObjGen, ObjCache> obj_cache;
18 411  
19 412 QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>& input, int obj);
... ... @@ -42,8 +435,6 @@ class QPDF::Objects
42 435 size_t table_size();
43 436  
44 437 private:
45   - friend class QPDF::Xref_table;
46   -
47 438 void erase(QPDFObjGen og);
48 439 bool cached(QPDFObjGen og);
49 440 bool unresolved(QPDFObjGen og);
... ... @@ -55,7 +446,9 @@ class QPDF::Objects
55 446 std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
56 447  
57 448 QPDF& qpdf;
  449 + InputSource* const& file;
58 450 QPDF::Members* m;
  451 + Xref_table xref;
59 452 }; // Objects
60 453  
61 454 #endif // QPDF_OBJECTS_HH
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -7,363 +7,6 @@
7 7  
8 8 #include <variant>
9 9  
10   -// Xref_table encapsulates the pdf's xref table and trailer.
11   -class QPDF::Xref_table
12   -{
13   - public:
14   - Xref_table(QPDF& qpdf, QPDF::Objects& objects, InputSource* const& file) :
15   - qpdf(qpdf),
16   - objects(objects),
17   - file(file)
18   - {
19   - tokenizer.allowEOF();
20   - }
21   -
22   - void initialize();
23   - void initialize_empty();
24   - void initialize_json();
25   - void reconstruct(QPDFExc& e);
26   - void show();
27   - bool resolve();
28   -
29   - QPDFObjectHandle
30   - trailer() const
31   - {
32   - return trailer_;
33   - }
34   -
35   - void
36   - trailer(QPDFObjectHandle&& oh)
37   - {
38   - trailer_ = std::move(oh);
39   - }
40   -
41   - // Returns 0 if og is not in table.
42   - size_t
43   - type(QPDFObjGen og) const
44   - {
45   - int id = og.getObj();
46   - if (id < 1 || static_cast<size_t>(id) >= table.size()) {
47   - return 0;
48   - }
49   - auto& e = table[static_cast<size_t>(id)];
50   - return e.gen() == og.getGen() ? e.type() : 0;
51   - }
52   -
53   - // Returns 0 if og is not in table.
54   - size_t
55   - type(size_t id) const noexcept
56   - {
57   - if (id >= table.size()) {
58   - return 0;
59   - }
60   - return table[id].type();
61   - }
62   -
63   - // Returns 0 if og is not in table.
64   - qpdf_offset_t
65   - offset(QPDFObjGen og) const noexcept
66   - {
67   - int id = og.getObj();
68   - if (id < 1 || static_cast<size_t>(id) >= table.size()) {
69   - return 0;
70   - }
71   - return table[static_cast<size_t>(id)].offset();
72   - }
73   -
74   - // Returns 0 if id is not in table.
75   - int
76   - stream_number(int id) const noexcept
77   - {
78   - if (id < 1 || static_cast<size_t>(id) >= table.size()) {
79   - return 0;
80   - }
81   - return table[static_cast<size_t>(id)].stream_number();
82   - }
83   -
84   - int
85   - stream_index(int id) const noexcept
86   - {
87   - if (id < 1 || static_cast<size_t>(id) >= table.size()) {
88   - return 0;
89   - }
90   - return table[static_cast<size_t>(id)].stream_index();
91   - }
92   -
93   - QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
94   -
95   - std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
96   -
97   - bool
98   - object_streams() const noexcept
99   - {
100   - return object_streams_;
101   - }
102   -
103   - // Return a vector of object id and stream number for each compressed object.
104   - std::vector<std::pair<unsigned int, int>>
105   - compressed_objects() const
106   - {
107   - if (!initialized()) {
108   - throw std::logic_error("Xref_table::compressed_objects called before parsing.");
109   - }
110   -
111   - std::vector<std::pair<unsigned int, int>> result;
112   - result.reserve(table.size());
113   -
114   - unsigned int i{0};
115   - for (auto const& item: table) {
116   - if (item.type() == 2) {
117   - result.emplace_back(i, item.stream_number());
118   - }
119   - ++i;
120   - }
121   - return result;
122   - }
123   -
124   - // Temporary access to underlying table size
125   - size_t
126   - size() const noexcept
127   - {
128   - return table.size();
129   - }
130   -
131   - void
132   - ignore_streams(bool val) noexcept
133   - {
134   - ignore_streams_ = val;
135   - }
136   -
137   - bool
138   - initialized() const noexcept
139   - {
140   - return initialized_;
141   - }
142   -
143   - void
144   - attempt_recovery(bool val) noexcept
145   - {
146   - attempt_recovery_ = val;
147   - }
148   -
149   - int
150   - max_id() const noexcept
151   - {
152   - return max_id_;
153   - }
154   -
155   - // For Linearization
156   -
157   - qpdf_offset_t
158   - end_after_space(QPDFObjGen og)
159   - {
160   - auto& e = entry(toS(og.getObj()));
161   - switch (e.type()) {
162   - case 1:
163   - return e.end_after_space_;
164   - case 2:
165   - {
166   - auto es = entry(toS(e.stream_number()));
167   - return es.type() == 1 ? es.end_after_space_ : 0;
168   - }
169   - default:
170   - return 0;
171   - }
172   - }
173   -
174   - qpdf_offset_t
175   - end_before_space(QPDFObjGen og)
176   - {
177   - auto& e = entry(toS(og.getObj()));
178   - switch (e.type()) {
179   - case 1:
180   - return e.end_before_space_;
181   - case 2:
182   - {
183   - auto es = entry(toS(e.stream_number()));
184   - return es.type() == 1 ? es.end_before_space_ : 0;
185   - }
186   - default:
187   - return 0;
188   - }
189   - }
190   -
191   - void
192   - linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
193   - {
194   - if (type(id)) {
195   - table[id].end_before_space_ = before;
196   - table[id].end_after_space_ = after;
197   - }
198   - }
199   -
200   - bool
201   - uncompressed_after_compressed() const noexcept
202   - {
203   - return uncompressed_after_compressed_;
204   - }
205   -
206   - // Actual value from file
207   - qpdf_offset_t
208   - first_item_offset() const noexcept
209   - {
210   - return first_item_offset_;
211   - }
212   -
213   - private:
214   - // Object, count, offset of first entry
215   - typedef std::tuple<int, int, qpdf_offset_t> Subsection;
216   -
217   - struct Uncompressed
218   - {
219   - Uncompressed(qpdf_offset_t offset) :
220   - offset(offset)
221   - {
222   - }
223   - qpdf_offset_t offset;
224   - };
225   -
226   - struct Compressed
227   - {
228   - Compressed(int stream_number, int stream_index) :
229   - stream_number(stream_number),
230   - stream_index(stream_index)
231   - {
232   - }
233   - int stream_number{0};
234   - int stream_index{0};
235   - };
236   -
237   - typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;
238   -
239   - struct Entry
240   - {
241   - Entry() = default;
242   -
243   - Entry(int gen, Xref entry) :
244   - gen_(gen),
245   - entry(entry)
246   - {
247   - }
248   -
249   - int
250   - gen() const noexcept
251   - {
252   - return gen_;
253   - }
254   -
255   - size_t
256   - type() const noexcept
257   - {
258   - return entry.index();
259   - }
260   -
261   - qpdf_offset_t
262   - offset() const noexcept
263   - {
264   - return type() == 1 ? std::get<1>(entry).offset : 0;
265   - }
266   -
267   - int
268   - stream_number() const noexcept
269   - {
270   - return type() == 2 ? std::get<2>(entry).stream_number : 0;
271   - }
272   -
273   - int
274   - stream_index() const noexcept
275   - {
276   - return type() == 2 ? std::get<2>(entry).stream_index : 0;
277   - }
278   -
279   - int gen_{0};
280   - Xref entry;
281   - qpdf_offset_t end_before_space_{0};
282   - qpdf_offset_t end_after_space_{0};
283   - };
284   -
285   - Entry&
286   - entry(size_t id)
287   - {
288   - return id < table.size() ? table[id] : table[0];
289   - }
290   -
291   - void read(qpdf_offset_t offset);
292   -
293   - // Methods to parse tables
294   - qpdf_offset_t process_section(qpdf_offset_t offset);
295   - std::vector<Subsection> subsections(std::string& line);
296   - std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
297   - Subsection subsection(std::string const& line);
298   - std::tuple<bool, qpdf_offset_t, int, char> read_entry();
299   - std::tuple<bool, qpdf_offset_t, int, char> read_bad_entry();
300   -
301   - // Methods to parse streams
302   - qpdf_offset_t read_stream(qpdf_offset_t offset);
303   - qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
304   - std::pair<int, std::array<int, 3>>
305   - process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
306   - std::pair<int, size_t> process_Size(
307   - QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
308   - std::pair<int, std::vector<std::pair<int, int>>> process_Index(
309   - QPDFObjectHandle& dict,
310   - int max_num_entries,
311   - std::function<QPDFExc(std::string_view)> damaged);
312   -
313   - QPDFObjectHandle read_trailer();
314   -
315   - QPDFTokenizer::Token
316   - read_token(size_t max_len = 0)
317   - {
318   - return tokenizer.readToken(*file, "", true, max_len);
319   - }
320   -
321   - // Methods to insert table entries
322   - void insert(int obj, int f0, qpdf_offset_t f1, int f2);
323   - void insert_free(QPDFObjGen);
324   -
325   - QPDFExc
326   - damaged_pdf(std::string const& msg)
327   - {
328   - return qpdf.damagedPDF("", 0, msg);
329   - }
330   -
331   - QPDFExc
332   - damaged_table(std::string const& msg)
333   - {
334   - return qpdf.damagedPDF("xref table", msg);
335   - }
336   -
337   - void
338   - warn_damaged(std::string const& msg)
339   - {
340   - qpdf.warn(damaged_pdf(msg));
341   - }
342   -
343   - QPDF& qpdf;
344   - QPDF::Objects& objects;
345   - InputSource* const& file;
346   - QPDFTokenizer tokenizer;
347   -
348   - std::vector<Entry> table;
349   - QPDFObjectHandle trailer_;
350   -
351   - bool attempt_recovery_{true};
352   - bool initialized_{false};
353   - bool ignore_streams_{false};
354   - bool reconstructed_{false};
355   - bool object_streams_{false};
356   - // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
357   - // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
358   - // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
359   - // table after reconstruction.
360   - int max_id_{std::numeric_limits<int>::max() - 1};
361   -
362   - // Linearization data
363   - bool uncompressed_after_compressed_{false};
364   - qpdf_offset_t first_item_offset_{0}; // actual value from file
365   -};
366   -
367 10 // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
368 11 class QPDF::StreamCopier
369 12 {
... ... @@ -740,7 +383,6 @@ class QPDF::Members
740 383 std::shared_ptr<EncryptionParameters> encp;
741 384 std::string pdf_version;
742 385 Objects objects;
743   - Xref_table xref_table;
744 386 std::set<QPDFObjGen> resolving;
745 387 std::vector<QPDFObjectHandle> all_pages;
746 388 bool invalid_page_found{false};
... ... @@ -901,10 +543,10 @@ class QPDF::Writer
901 543 return qpdf.objects().compressible_set();
902 544 }
903 545  
904   - static Xref_table const&
  546 + static Objects::Xref_table const&
905 547 getXRefTable(QPDF& qpdf)
906 548 {
907   - return qpdf.m->xref_table;
  549 + return qpdf.objects().xref_table();
908 550 }
909 551  
910 552 static size_t
... ...