Commit 8791b5f8d0ea9287f458784dc27039a2f55637b7

Authored by m-holger
1 parent 3b97c9bd

In QPDFWriter replace map obj_renumber with a new object table obj

include/qpdf/QPDF.hh
@@ -765,6 +765,12 @@ class QPDF @@ -765,6 +765,12 @@ class QPDF
765 { 765 {
766 return qpdf.getCompressibleObjGens(); 766 return qpdf.getCompressibleObjGens();
767 } 767 }
  768 +
  769 + static size_t
  770 + tableSize(QPDF& qpdf)
  771 + {
  772 + return qpdf.tableSize();
  773 + }
768 }; 774 };
769 775
770 // The Resolver class is restricted to QPDFObject so that only it can resolve indirect 776 // The Resolver class is restricted to QPDFObject so that only it can resolve indirect
@@ -1083,6 +1089,8 @@ class QPDF @@ -1083,6 +1089,8 @@ class QPDF
1083 1089
1084 // For QPDFWriter: 1090 // For QPDFWriter:
1085 1091
  1092 + size_t tableSize();
  1093 +
1086 // Get lists of all objects in order according to the part of a linearized file that they belong 1094 // Get lists of all objects in order according to the part of a linearized file that they belong
1087 // to. 1095 // to.
1088 void getLinearizedParts( 1096 void getLinearizedParts(
include/qpdf/QPDFWriter.hh
@@ -437,6 +437,10 @@ class QPDFWriter @@ -437,6 +437,10 @@ class QPDFWriter
437 QPDF_DLL 437 QPDF_DLL
438 std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable(); 438 std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable();
439 439
  440 + // The following structs / classes are not part of the public API.
  441 + struct Object;
  442 + class ObjTable;
  443 +
440 private: 444 private:
441 // flags used by unparseObject 445 // flags used by unparseObject
442 static int const f_stream = 1 << 0; 446 static int const f_stream = 1 << 0;
@@ -550,6 +554,7 @@ class QPDFWriter @@ -550,6 +554,7 @@ class QPDFWriter
550 void writeLinearized(); 554 void writeLinearized();
551 void enqueuePart(std::vector<QPDFObjectHandle>& part); 555 void enqueuePart(std::vector<QPDFObjectHandle>& part);
552 void writeEncryptionDictionary(); 556 void writeEncryptionDictionary();
  557 + void initializeTables(size_t extra = 0);
553 void doWriteSetup(); 558 void doWriteSetup();
554 void writeHeader(); 559 void writeHeader();
555 void writeHintStream(int hint_id); 560 void writeHintStream(int hint_id);
@@ -605,97 +610,9 @@ class QPDFWriter @@ -605,97 +610,9 @@ class QPDFWriter
605 void computeDeterministicIDData(); 610 void computeDeterministicIDData();
606 611
607 void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out); 612 void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out);
  613 + void discardGeneration(std::map<int, int>& out);
608 614
609 - class Members  
610 - {  
611 - friend class QPDFWriter;  
612 -  
613 - public:  
614 - QPDF_DLL  
615 - ~Members();  
616 -  
617 - private:  
618 - Members(QPDF& pdf);  
619 - Members(Members const&) = delete;  
620 -  
621 - QPDF& pdf;  
622 - QPDFObjGen root_og{-1, 0};  
623 - char const* filename{"unspecified"};  
624 - FILE* file{nullptr};  
625 - bool close_file{false};  
626 - Pl_Buffer* buffer_pipeline{nullptr};  
627 - Buffer* output_buffer{nullptr};  
628 - bool normalize_content_set{false};  
629 - bool normalize_content{false};  
630 - bool compress_streams{true};  
631 - bool compress_streams_set{false};  
632 - qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none};  
633 - bool stream_decode_level_set{false};  
634 - bool recompress_flate{false};  
635 - bool qdf_mode{false};  
636 - bool preserve_unreferenced_objects{false};  
637 - bool newline_before_endstream{false};  
638 - bool static_id{false};  
639 - bool suppress_original_object_ids{false};  
640 - bool direct_stream_lengths{true};  
641 - bool encrypted{false};  
642 - bool preserve_encryption{true};  
643 - bool linearized{false};  
644 - bool pclm{false};  
645 - qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};  
646 - std::string encryption_key;  
647 - bool encrypt_metadata{true};  
648 - bool encrypt_use_aes{false};  
649 - std::map<std::string, std::string> encryption_dictionary;  
650 - int encryption_V{0};  
651 - int encryption_R{0};  
652 -  
653 - std::string id1; // for /ID key of  
654 - std::string id2; // trailer dictionary  
655 - std::string final_pdf_version;  
656 - int final_extension_level{0};  
657 - std::string min_pdf_version;  
658 - int min_extension_level{0};  
659 - std::string forced_pdf_version;  
660 - int forced_extension_level{0};  
661 - std::string extra_header_text;  
662 - int encryption_dict_objid{0};  
663 - std::string cur_data_key;  
664 - std::list<std::shared_ptr<Pipeline>> to_delete;  
665 - Pl_Count* pipeline{nullptr};  
666 - std::vector<QPDFObjectHandle> object_queue;  
667 - size_t object_queue_front{0};  
668 - std::map<QPDFObjGen, int> obj_renumber;  
669 - std::map<int, QPDFXRefEntry> xref;  
670 - std::map<int, qpdf_offset_t> lengths;  
671 - int next_objid{1};  
672 - int cur_stream_length_id{0};  
673 - size_t cur_stream_length{0};  
674 - bool added_newline{false};  
675 - int max_ostream_index{0};  
676 - std::set<QPDFObjGen> normalized_streams;  
677 - std::map<QPDFObjGen, int> page_object_to_seq;  
678 - std::map<QPDFObjGen, int> contents_to_page_seq;  
679 - std::map<QPDFObjGen, int> object_to_object_stream;  
680 - std::map<int, std::set<QPDFObjGen>> object_stream_to_objects;  
681 - std::list<Pipeline*> pipeline_stack;  
682 - unsigned long long next_stack_id{0};  
683 - bool deterministic_id{false};  
684 - Pl_MD5* md5_pipeline{nullptr};  
685 - std::string deterministic_id_data;  
686 - bool did_write_setup{false};  
687 -  
688 - // For linearization only  
689 - std::string lin_pass1_filename;  
690 - std::map<int, int> obj_renumber_no_gen;  
691 - std::map<int, int> object_to_object_stream_no_gen;  
692 -  
693 - // For progress reporting  
694 - std::shared_ptr<ProgressReporter> progress_reporter;  
695 - int events_expected{0};  
696 - int events_seen{0};  
697 - int next_progress_report{0};  
698 - }; 615 + class Members;
699 616
700 // Keep all member variables inside the Members object, which we dynamically allocate. This 617 // Keep all member variables inside the Members object, which we dynamically allocate. This
701 // makes it possible to add new private members without breaking binary compatibility. 618 // makes it possible to add new private members without breaking binary compatibility.
libqpdf/QPDF.cc
@@ -2377,6 +2377,19 @@ QPDF::getXRefTable() @@ -2377,6 +2377,19 @@ QPDF::getXRefTable()
2377 return m->xref_table; 2377 return m->xref_table;
2378 } 2378 }
2379 2379
  2380 +size_t
  2381 +QPDF::tableSize()
  2382 +{
  2383 + // If obj_cache is dense, accommodate all object in tables,else accommodate only original
  2384 + // objects.
  2385 + auto max_xref = m->xref_table.size() ? m->xref_table.crbegin()->first.getObj() : 0;
  2386 + auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0;
  2387 + if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) {
  2388 + return toS(++max_obj);
  2389 + }
  2390 + return toS(++max_xref);
  2391 +}
  2392 +
2380 void 2393 void
2381 QPDF::getObjectStreamData(std::map<int, int>& omap) 2394 QPDF::getObjectStreamData(std::map<int, int>& omap)
2382 { 2395 {
libqpdf/QPDFWriter.cc
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 2
3 #include <qpdf/qpdf-config.h> // include early for large file support 3 #include <qpdf/qpdf-config.h> // include early for large file support
4 4
5 -#include <qpdf/QPDFWriter.hh> 5 +#include <qpdf/QPDFWriter_private.hh>
6 6
7 #include <qpdf/MD5.hh> 7 #include <qpdf/MD5.hh>
8 #include <qpdf/Pl_AES_PDF.hh> 8 #include <qpdf/Pl_AES_PDF.hh>
@@ -1064,7 +1064,7 @@ QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const&amp; og) @@ -1064,7 +1064,7 @@ QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const&amp; og)
1064 1064
1065 // Reserve numbers for the objects that belong to this object stream. 1065 // Reserve numbers for the objects that belong to this object stream.
1066 for (auto const& iter: m->object_stream_to_objects[objid]) { 1066 for (auto const& iter: m->object_stream_to_objects[objid]) {
1067 - m->obj_renumber[iter] = m->next_objid++; 1067 + m->obj[iter].renumber = m->next_objid++;
1068 } 1068 }
1069 } 1069 }
1070 1070
@@ -1093,18 +1093,19 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) @@ -1093,18 +1093,19 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
1093 } 1093 }
1094 1094
1095 QPDFObjGen og = object.getObjGen(); 1095 QPDFObjGen og = object.getObjGen();
  1096 + auto& renumber = m->obj[og].renumber;
1096 1097
1097 - if (m->obj_renumber.count(og) == 0) { 1098 + if (renumber == 0) {
1098 if (m->object_to_object_stream.count(og)) { 1099 if (m->object_to_object_stream.count(og)) {
1099 // This is in an object stream. Don't process it here. Instead, enqueue the object 1100 // This is in an object stream. Don't process it here. Instead, enqueue the object
1100 // stream. Object streams always have generation 0. 1101 // stream. Object streams always have generation 0.
1101 int stream_id = m->object_to_object_stream[og]; 1102 int stream_id = m->object_to_object_stream[og];
1102 // Detect loops by storing invalid object ID 0, which will get overwritten later. 1103 // Detect loops by storing invalid object ID 0, which will get overwritten later.
1103 - m->obj_renumber[og] = 0; 1104 + renumber = -1;
1104 enqueueObject(m->pdf.getObjectByID(stream_id, 0)); 1105 enqueueObject(m->pdf.getObjectByID(stream_id, 0));
1105 } else { 1106 } else {
1106 m->object_queue.push_back(object); 1107 m->object_queue.push_back(object);
1107 - m->obj_renumber[og] = m->next_objid++; 1108 + renumber = m->next_objid++;
1108 1109
1109 if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { 1110 if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) {
1110 // For linearized files, uncompressed objects go at end, and we take care of 1111 // For linearized files, uncompressed objects go at end, and we take care of
@@ -1117,7 +1118,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) @@ -1117,7 +1118,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
1117 ++m->next_objid; 1118 ++m->next_objid;
1118 } 1119 }
1119 } 1120 }
1120 - } else if (m->obj_renumber[og] == 0) { 1121 + } else if (renumber == -1) {
1121 // This can happen if a specially constructed file indicates that an object stream is 1122 // This can happen if a specially constructed file indicates that an object stream is
1122 // inside itself. 1123 // inside itself.
1123 QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); 1124 QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream");
@@ -1147,9 +1148,7 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) @@ -1147,9 +1148,7 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1147 enqueueObject(child); 1148 enqueueObject(child);
1148 } 1149 }
1149 if (child.isIndirect()) { 1150 if (child.isIndirect()) {
1150 - QPDFObjGen old_og = child.getObjGen();  
1151 - int new_id = m->obj_renumber[old_og];  
1152 - writeString(std::to_string(new_id)); 1151 + writeString(std::to_string(m->obj[child].renumber));
1153 writeString(" 0 R"); 1152 writeString(" 0 R");
1154 } else { 1153 } else {
1155 unparseObject(child, level, flags); 1154 unparseObject(child, level, flags);
@@ -1527,9 +1526,8 @@ QPDFWriter::unparseObject( @@ -1527,9 +1526,8 @@ QPDFWriter::unparseObject(
1527 writeString(">>"); 1526 writeString(">>");
1528 } else if (tc == ::ot_stream) { 1527 } else if (tc == ::ot_stream) {
1529 // Write stream data to a buffer. 1528 // Write stream data to a buffer.
1530 - int new_id = m->obj_renumber[old_og];  
1531 if (!m->direct_stream_lengths) { 1529 if (!m->direct_stream_lengths) {
1532 - m->cur_stream_length_id = new_id + 1; 1530 + m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1533 } 1531 }
1534 1532
1535 flags |= f_stream; 1533 flags |= f_stream;
@@ -1626,7 +1624,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1626,7 +1624,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1626 QPDFObjGen old_og = object.getObjGen(); 1624 QPDFObjGen old_og = object.getObjGen();
1627 qpdf_assert_debug(old_og.getGen() == 0); 1625 qpdf_assert_debug(old_og.getGen() == 0);
1628 int old_id = old_og.getObj(); 1626 int old_id = old_og.getObj();
1629 - int new_id = m->obj_renumber[old_og]; 1627 + int new_stream_id = m->obj[old_og].renumber;
1630 1628
1631 std::vector<qpdf_offset_t> offsets; 1629 std::vector<qpdf_offset_t> offsets;
1632 qpdf_offset_t first = 0; 1630 qpdf_offset_t first = 0;
@@ -1670,7 +1668,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1670,7 +1668,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1670 int count = -1; 1668 int count = -1;
1671 for (auto const& obj: m->object_stream_to_objects[old_id]) { 1669 for (auto const& obj: m->object_stream_to_objects[old_id]) {
1672 ++count; 1670 ++count;
1673 - int new_obj = m->obj_renumber[obj]; 1671 + int new_obj = m->obj[obj].renumber;
1674 if (first_obj == -1) { 1672 if (first_obj == -1) {
1675 first_obj = new_obj; 1673 first_obj = new_obj;
1676 } 1674 }
@@ -1706,13 +1704,13 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1706,13 +1704,13 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1706 } 1704 }
1707 writeObject(obj_to_write, count); 1705 writeObject(obj_to_write, count);
1708 1706
1709 - m->xref[new_obj] = QPDFXRefEntry(new_id, count); 1707 + m->xref[new_obj] = QPDFXRefEntry(new_stream_id, count);
1710 } 1708 }
1711 } 1709 }
1712 1710
1713 // Write the object 1711 // Write the object
1714 - openObject(new_id);  
1715 - setDataKey(new_id); 1712 + openObject(new_stream_id);
  1713 + setDataKey(new_stream_id);
1716 writeString("<<"); 1714 writeString("<<");
1717 writeStringQDF("\n "); 1715 writeStringQDF("\n ");
1718 writeString(" /Type /ObjStm"); 1716 writeString(" /Type /ObjStm");
@@ -1754,7 +1752,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1754,7 +1752,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1754 } 1752 }
1755 writeString("endstream"); 1753 writeString("endstream");
1756 m->cur_data_key.clear(); 1754 m->cur_data_key.clear();
1757 - closeObject(new_id); 1755 + closeObject(new_stream_id);
1758 } 1756 }
1759 1757
1760 void 1758 void
@@ -1769,7 +1767,7 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) @@ -1769,7 +1767,7 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1769 } 1767 }
1770 1768
1771 indicateProgress(false, false); 1769 indicateProgress(false, false);
1772 - int new_id = m->obj_renumber[old_og]; 1770 + auto new_id = m->obj[old_og].renumber;
1773 if (m->qdf_mode) { 1771 if (m->qdf_mode) {
1774 if (m->page_object_to_seq.count(old_og)) { 1772 if (m->page_object_to_seq.count(old_og)) {
1775 writeString("%% Page "); 1773 writeString("%% Page ");
@@ -1979,6 +1977,10 @@ QPDFWriter::generateObjectStreams() @@ -1979,6 +1977,10 @@ QPDFWriter::generateObjectStreams()
1979 1977
1980 std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); 1978 std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
1981 size_t n_object_streams = (eligible.size() + 99U) / 100U; 1979 size_t n_object_streams = (eligible.size() + 99U) / 100U;
  1980 +
  1981 + // Initialize object table for all existing objects plus some headroom for objects created
  1982 + // during writing.
  1983 + initializeTables(2U * n_object_streams);
1982 if (n_object_streams == 0) { 1984 if (n_object_streams == 0) {
1983 return; 1985 return;
1984 } 1986 }
@@ -2056,6 +2058,13 @@ QPDFWriter::prepareFileForWrite() @@ -2056,6 +2058,13 @@ QPDFWriter::prepareFileForWrite()
2056 } 2058 }
2057 2059
2058 void 2060 void
  2061 +QPDFWriter::initializeTables(size_t extra)
  2062 +{
  2063 + auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
  2064 + m->obj.initialize(size);
  2065 +}
  2066 +
  2067 +void
2059 QPDFWriter::doWriteSetup() 2068 QPDFWriter::doWriteSetup()
2060 { 2069 {
2061 if (m->did_write_setup) { 2070 if (m->did_write_setup) {
@@ -2124,10 +2133,13 @@ QPDFWriter::doWriteSetup() @@ -2124,10 +2133,13 @@ QPDFWriter::doWriteSetup()
2124 2133
2125 switch (m->object_stream_mode) { 2134 switch (m->object_stream_mode) {
2126 case qpdf_o_disable: 2135 case qpdf_o_disable:
2127 - // no action required 2136 + // Initialize object table for all existing objects plus some headroom for objects created
  2137 + // during writing.
  2138 + initializeTables();
2128 break; 2139 break;
2129 2140
2130 case qpdf_o_preserve: 2141 case qpdf_o_preserve:
  2142 + initializeTables();
2131 preserveObjectStreams(); 2143 preserveObjectStreams();
2132 break; 2144 break;
2133 2145
@@ -2215,7 +2227,7 @@ QPDFWriter::write() @@ -2215,7 +2227,7 @@ QPDFWriter::write()
2215 QPDFObjGen 2227 QPDFObjGen
2216 QPDFWriter::getRenumberedObjGen(QPDFObjGen og) 2228 QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2217 { 2229 {
2218 - return QPDFObjGen(m->obj_renumber[og], 0); 2230 + return QPDFObjGen(m->obj[og].renumber, 0);
2219 } 2231 }
2220 2232
2221 std::map<QPDFObjGen, QPDFXRefEntry> 2233 std::map<QPDFObjGen, QPDFXRefEntry>
@@ -2534,6 +2546,26 @@ QPDFWriter::discardGeneration(std::map&lt;QPDFObjGen, int&gt; const&amp; in, std::map&lt;int, @@ -2534,6 +2546,26 @@ QPDFWriter::discardGeneration(std::map&lt;QPDFObjGen, int&gt; const&amp; in, std::map&lt;int,
2534 } 2546 }
2535 2547
2536 void 2548 void
  2549 +QPDFWriter::discardGeneration(std::map<int, int>& out)
  2550 +{
  2551 + // There are deep assumptions in the linearization code in QPDF that there is only one object
  2552 + // with each object number; i.e., you can't have two objects with the same object number and
  2553 + // different generations. This is a pretty safe assumption because Adobe Reader and Acrobat
  2554 + // can't actually handle this case. There is not much if any code in QPDF outside linearization
  2555 + // that assumes this, but the linearization code as currently implemented would do weird things
  2556 + // if we found such a case. In order to avoid breaking ABI changes in QPDF, we will first
  2557 + // assert that this condition holds. Then we can create new maps for QPDF that throw away
  2558 + // generation numbers.
  2559 +
  2560 + out.clear();
  2561 + m->obj.forEach([&out](auto id, auto const& item) -> void {
  2562 + if (item.renumber > 0) {
  2563 + out[id] = item.renumber;
  2564 + }
  2565 + });
  2566 +}
  2567 +
  2568 +void
2537 QPDFWriter::writeLinearized() 2569 QPDFWriter::writeLinearized()
2538 { 2570 {
2539 // Optimize file and enqueue objects in order 2571 // Optimize file and enqueue objects in order
@@ -2690,7 +2722,7 @@ QPDFWriter::writeLinearized() @@ -2690,7 +2722,7 @@ QPDFWriter::writeLinearized()
2690 writeString("<<"); 2722 writeString("<<");
2691 if (pass == 2) { 2723 if (pass == 2) {
2692 std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages(); 2724 std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2693 - int first_page_object = m->obj_renumber[pages.at(0).getObjGen()]; 2725 + int first_page_object = m->obj[pages.at(0)].renumber;
2694 int npages = QIntC::to_int(pages.size()); 2726 int npages = QIntC::to_int(pages.size());
2695 2727
2696 writeString(" /Linearized 1 /L "); 2728 writeString(" /Linearized 1 /L ");
@@ -2855,7 +2887,7 @@ QPDFWriter::writeLinearized() @@ -2855,7 +2887,7 @@ QPDFWriter::writeLinearized()
2855 writeString(std::to_string(first_xref_offset)); 2887 writeString(std::to_string(first_xref_offset));
2856 writeString("\n%%EOF\n"); 2888 writeString("\n%%EOF\n");
2857 2889
2858 - discardGeneration(m->obj_renumber, m->obj_renumber_no_gen); 2890 + discardGeneration(m->obj_renumber_no_gen);
2859 2891
2860 if (pass == 1) { 2892 if (pass == 1) {
2861 if (m->deterministic_id) { 2893 if (m->deterministic_id) {
libqpdf/qpdf/ObjTable.hh 0 → 100644
  1 +#ifndef OBJTABLE_HH
  2 +#define OBJTABLE_HH
  3 +
  4 +#include <qpdf/QPDFObjGen.hh>
  5 +#include <qpdf/QPDFObjectHandle.hh>
  6 +
  7 +#include "qpdf/QIntC.hh"
  8 +#include <limits>
  9 +
  10 +// A table of objects indexed by object id. This is intended as a more efficient replacement for
  11 +// std::map<QPDFObjGen, T> containers.
  12 +//
  13 +// The table is implemented as a std::vector, with the object id implicitly represented by the index
  14 +// of the object. This has a number of implications, including:
  15 +// - operations that change the index of existing elements such as insertion and deletions are not
  16 +// permitted.
  17 +// - operations that extend the table may invalidate iterators and references to objects.
  18 +//
  19 +// The provided overloads of the access operator[] are safe. For out of bounds access they will
  20 +// either extend the table or throw a runtime error.
  21 +//
  22 +// ObjTable has a map 'sparse_elements' to deal with very sparse / extremely large object tables
  23 +// (usually as the result of invalid dangling references). This map may contain objects not found in
  24 +// the xref table of the original pdf if there are dangling references with an id significantly
  25 +// larger than the largest valid object id found in original pdf.
  26 +
  27 +template <class T>
  28 +class ObjTable: public std::vector<T>
  29 +{
  30 + public:
  31 + ObjTable() = default;
  32 + ObjTable(const ObjTable&) = delete;
  33 + ObjTable(ObjTable&&) = delete;
  34 + ObjTable& operator[](const ObjTable&) = delete;
  35 + ObjTable& operator[](ObjTable&&) = delete;
  36 +
  37 + // Remove unchecked access.
  38 + T& operator[](unsigned long idx) = delete;
  39 + T const& operator[](unsigned long idx) const = delete;
  40 +
  41 + inline T const&
  42 + operator[](int idx) const
  43 + {
  44 + return element(static_cast<size_t>(idx));
  45 + }
  46 +
  47 + inline T const&
  48 + operator[](QPDFObjGen og) const
  49 + {
  50 + return element(static_cast<size_t>(og.getObj()));
  51 + }
  52 +
  53 + inline T const&
  54 + operator[](QPDFObjectHandle oh) const
  55 + {
  56 + return element(static_cast<size_t>(oh.getObjectID()));
  57 + }
  58 +
  59 + inline bool
  60 + contains(size_t idx) const
  61 + {
  62 + return idx < std::vector<T>::size() || sparse_elements.count(idx);
  63 + }
  64 +
  65 + inline bool
  66 + contains(QPDFObjectHandle oh) const
  67 + {
  68 + return contains(static_cast<size_t>(oh.getObjectID()));
  69 + }
  70 +
  71 + protected:
  72 + inline T&
  73 + operator[](int id)
  74 + {
  75 + return element(static_cast<size_t>(id));
  76 + }
  77 +
  78 + inline T&
  79 + operator[](QPDFObjGen og)
  80 + {
  81 + return element(static_cast<size_t>(og.getObj()));
  82 + }
  83 +
  84 + inline T&
  85 + operator[](QPDFObjectHandle oh)
  86 + {
  87 + return element(static_cast<size_t>(oh.getObjectID()));
  88 + }
  89 +
  90 + inline T&
  91 + operator[](unsigned int id)
  92 + {
  93 + return element(id);
  94 + }
  95 +
  96 + void
  97 + initialize(size_t idx)
  98 + {
  99 + if (std::vector<T>::size() > 0 || sparse_elements.size() > 0) {
  100 + throw ::std::logic_error("ObjTable accessed before initialization");
  101 + } else if (
  102 + idx >= static_cast<size_t>(std::numeric_limits<int>::max()) ||
  103 + idx >= std::vector<T>::max_size()) {
  104 + throw std::runtime_error("Invalid maximum object id initializing ObjTable.");
  105 + } else {
  106 + std::vector<T>::resize(++idx);
  107 + }
  108 + }
  109 +
  110 + inline void
  111 + forEach(std::function<void(int, const T&)> fn)
  112 + {
  113 + int i = 0;
  114 + for (auto const& item: *this) {
  115 + fn(i++, item);
  116 + }
  117 + for (auto const& [id, item]: sparse_elements) {
  118 + fn(QIntC::to_int(id), item);
  119 + }
  120 + }
  121 +
  122 + private:
  123 + std::map<size_t, T> sparse_elements;
  124 +
  125 + inline T&
  126 + element(size_t idx)
  127 + {
  128 + if (idx < std::vector<T>::size()) {
  129 + return std::vector<T>::operator[](idx);
  130 + } else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) {
  131 + return sparse_elements[idx];
  132 + }
  133 + throw std::runtime_error("Invalid object id accessing ObjTable.");
  134 + return element(0); // doesn't return
  135 + }
  136 +
  137 + inline T const&
  138 + element(size_t idx) const
  139 + {
  140 + if (idx < std::vector<T>::size()) {
  141 + return std::vector<T>::operator[](idx);
  142 + } else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) {
  143 + return sparse_elements.at(idx);
  144 + }
  145 + throw std::runtime_error("Invalid object id accessing ObjTable.");
  146 + return element(0); // doesn't return
  147 + }
  148 +};
  149 +
  150 +#endif // OBJTABLE_HH
libqpdf/qpdf/QPDFWriter_private.hh 0 → 100644
  1 +#ifndef QPDFWRITER_PRIVATE_HH
  2 +#define QPDFWRITER_PRIVATE_HH
  3 +
  4 +#include <qpdf/QPDFWriter.hh>
  5 +
  6 +#include <qpdf/ObjTable.hh>
  7 +
  8 +// This file is intended for inclusion by QPDFWriter, QPDF, QPDF_optimization and QPDF_linearization
  9 +// only.
  10 +
  11 +struct QPDFWriter::Object
  12 +{
  13 + int renumber{0};
  14 +};
  15 +
  16 +class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object>
  17 +{
  18 + friend class QPDFWriter;
  19 +};
  20 +
  21 +class QPDFWriter::Members
  22 +{
  23 + friend class QPDFWriter;
  24 +
  25 + public:
  26 + QPDF_DLL
  27 + ~Members();
  28 +
  29 + private:
  30 + Members(QPDF& pdf);
  31 + Members(Members const&) = delete;
  32 +
  33 + QPDF& pdf;
  34 + QPDFObjGen root_og{-1, 0};
  35 + char const* filename{"unspecified"};
  36 + FILE* file{nullptr};
  37 + bool close_file{false};
  38 + Pl_Buffer* buffer_pipeline{nullptr};
  39 + Buffer* output_buffer{nullptr};
  40 + bool normalize_content_set{false};
  41 + bool normalize_content{false};
  42 + bool compress_streams{true};
  43 + bool compress_streams_set{false};
  44 + qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none};
  45 + bool stream_decode_level_set{false};
  46 + bool recompress_flate{false};
  47 + bool qdf_mode{false};
  48 + bool preserve_unreferenced_objects{false};
  49 + bool newline_before_endstream{false};
  50 + bool static_id{false};
  51 + bool suppress_original_object_ids{false};
  52 + bool direct_stream_lengths{true};
  53 + bool encrypted{false};
  54 + bool preserve_encryption{true};
  55 + bool linearized{false};
  56 + bool pclm{false};
  57 + qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
  58 + std::string encryption_key;
  59 + bool encrypt_metadata{true};
  60 + bool encrypt_use_aes{false};
  61 + std::map<std::string, std::string> encryption_dictionary;
  62 + int encryption_V{0};
  63 + int encryption_R{0};
  64 +
  65 + std::string id1; // for /ID key of
  66 + std::string id2; // trailer dictionary
  67 + std::string final_pdf_version;
  68 + int final_extension_level{0};
  69 + std::string min_pdf_version;
  70 + int min_extension_level{0};
  71 + std::string forced_pdf_version;
  72 + int forced_extension_level{0};
  73 + std::string extra_header_text;
  74 + int encryption_dict_objid{0};
  75 + std::string cur_data_key;
  76 + std::list<std::shared_ptr<Pipeline>> to_delete;
  77 + Pl_Count* pipeline{nullptr};
  78 + std::vector<QPDFObjectHandle> object_queue;
  79 + size_t object_queue_front{0};
  80 + QPDFWriter::ObjTable obj;
  81 + std::map<int, QPDFXRefEntry> xref;
  82 + std::map<int, qpdf_offset_t> lengths;
  83 + int next_objid{1};
  84 + int cur_stream_length_id{0};
  85 + size_t cur_stream_length{0};
  86 + bool added_newline{false};
  87 + int max_ostream_index{0};
  88 + std::set<QPDFObjGen> normalized_streams;
  89 + std::map<QPDFObjGen, int> page_object_to_seq;
  90 + std::map<QPDFObjGen, int> contents_to_page_seq;
  91 + std::map<QPDFObjGen, int> object_to_object_stream;
  92 + std::map<int, std::set<QPDFObjGen>> object_stream_to_objects;
  93 + std::list<Pipeline*> pipeline_stack;
  94 + unsigned long long next_stack_id{0};
  95 + bool deterministic_id{false};
  96 + Pl_MD5* md5_pipeline{nullptr};
  97 + std::string deterministic_id_data;
  98 + bool did_write_setup{false};
  99 +
  100 + // For linearization only
  101 + std::string lin_pass1_filename;
  102 + std::map<int, int> obj_renumber_no_gen;
  103 + std::map<int, int> object_to_object_stream_no_gen;
  104 +
  105 + // For progress reporting
  106 + std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
  107 + int events_expected{0};
  108 + int events_seen{0};
  109 + int next_progress_report{0};
  110 +};
  111 +
  112 +#endif // QPDFWRITER_PRIVATE_HH
libtests/CMakeLists.txt
@@ -23,6 +23,7 @@ set(TEST_PROGRAMS @@ -23,6 +23,7 @@ set(TEST_PROGRAMS
23 md5 23 md5
24 nntree 24 nntree
25 numrange 25 numrange
  26 + obj_table
26 pdf_version 27 pdf_version
27 pl_function 28 pl_function
28 pointer_holder 29 pointer_holder
libtests/obj_table.cc 0 → 100644
  1 +#include <qpdf/ObjTable.hh>
  2 +
  3 +struct Test
  4 +{
  5 + int value{0};
  6 +};
  7 +
  8 +class Table: public ObjTable<Test>
  9 +{
  10 + public:
  11 + Table()
  12 + {
  13 + initialize(5);
  14 + }
  15 +
  16 + void
  17 + test()
  18 + {
  19 + for (int i = 0; i < 10; ++i) {
  20 + (*this)[i].value = 2 * i;
  21 + (*this)[1000 + i].value = 2 * (1000 + i);
  22 + }
  23 +
  24 + forEach([](auto i, auto const& item) -> void {
  25 + std::cout << std::to_string(i) << " : " << std::to_string(item.value) << "\n";
  26 + });
  27 +
  28 + std::cout << "2000 : " << std::to_string((*this)[2000].value) << "\n";
  29 + }
  30 +};
  31 +
  32 +int
  33 +main()
  34 +{
  35 + Table().test();
  36 +
  37 + std::cout << "object table tests done\n";
  38 + return 0;
  39 +}
libtests/qtest/obj_table.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +chdir("obj_table") or die "chdir testdir failed: $!\n";
  7 +
  8 +require TestDriver;
  9 +
  10 +my $td = new TestDriver('object table');
  11 +
  12 +$td->runtest("obj_table",
  13 + {$td->COMMAND => "obj_table"},
  14 + {$td->FILE => "obj_table.out",
  15 + $td->EXIT_STATUS => 0},
  16 + $td->NORMALIZE_NEWLINES);
  17 +
  18 +$td->report(1);
libtests/qtest/obj_table/obj_table.out 0 → 100644
  1 +0 : 0
  2 +1 : 2
  3 +2 : 4
  4 +3 : 6
  5 +4 : 8
  6 +5 : 10
  7 +6 : 12
  8 +7 : 14
  9 +8 : 16
  10 +9 : 18
  11 +1000 : 2000
  12 +1001 : 2002
  13 +1002 : 2004
  14 +1003 : 2006
  15 +1004 : 2008
  16 +1005 : 2010
  17 +1006 : 2012
  18 +1007 : 2014
  19 +1008 : 2016
  20 +1009 : 2018
  21 +2000 : 0
  22 +object table tests done