Commit 0bb1458f3899d524b1dc76e0542be08b75e5e9e4

Authored by Jay Berkenbilt
Committed by GitHub
2 parents 69b9bc72 aa2e0d23

Merge pull request #1161 from m-holger/writer

Tune QPDFWriter
include/qpdf/QPDF.hh
@@ -41,6 +41,7 @@ @@ -41,6 +41,7 @@
41 #include <qpdf/QPDFObjectHandle.hh> 41 #include <qpdf/QPDFObjectHandle.hh>
42 #include <qpdf/QPDFStreamFilter.hh> 42 #include <qpdf/QPDFStreamFilter.hh>
43 #include <qpdf/QPDFTokenizer.hh> 43 #include <qpdf/QPDFTokenizer.hh>
  44 +#include <qpdf/QPDFWriter.hh>
44 #include <qpdf/QPDFXRefEntry.hh> 45 #include <qpdf/QPDFXRefEntry.hh>
45 46
46 class QPDF_Stream; 47 class QPDF_Stream;
@@ -727,43 +728,62 @@ class QPDF @@ -727,43 +728,62 @@ class QPDF
727 728
728 private: 729 private:
729 static void 730 static void
  731 + optimize(
  732 + QPDF& qpdf,
  733 + QPDFWriter::ObjTable const& obj,
  734 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  735 + {
  736 + return qpdf.optimize(obj, skip_stream_parameters);
  737 + }
  738 +
  739 + static void
730 getLinearizedParts( 740 getLinearizedParts(
731 QPDF& qpdf, 741 QPDF& qpdf,
732 - std::map<int, int> const& object_stream_data, 742 + QPDFWriter::ObjTable const& obj,
733 std::vector<QPDFObjectHandle>& part4, 743 std::vector<QPDFObjectHandle>& part4,
734 std::vector<QPDFObjectHandle>& part6, 744 std::vector<QPDFObjectHandle>& part6,
735 std::vector<QPDFObjectHandle>& part7, 745 std::vector<QPDFObjectHandle>& part7,
736 std::vector<QPDFObjectHandle>& part8, 746 std::vector<QPDFObjectHandle>& part8,
737 std::vector<QPDFObjectHandle>& part9) 747 std::vector<QPDFObjectHandle>& part9)
738 { 748 {
739 - qpdf.getLinearizedParts(object_stream_data, part4, part6, part7, part8, part9); 749 + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
740 } 750 }
741 751
742 static void 752 static void
743 generateHintStream( 753 generateHintStream(
744 QPDF& qpdf, 754 QPDF& qpdf,
745 - std::map<int, QPDFXRefEntry> const& xref,  
746 - std::map<int, qpdf_offset_t> const& lengths,  
747 - std::map<int, int> const& obj_renumber, 755 + QPDFWriter::NewObjTable const& new_obj,
  756 + QPDFWriter::ObjTable const& obj,
748 std::shared_ptr<Buffer>& hint_stream, 757 std::shared_ptr<Buffer>& hint_stream,
749 int& S, 758 int& S,
750 int& O, 759 int& O,
751 bool compressed) 760 bool compressed)
752 { 761 {
753 - return qpdf.generateHintStream(  
754 - xref, lengths, obj_renumber, hint_stream, S, O, compressed); 762 + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
755 } 763 }
756 764
757 - static void  
758 - getObjectStreamData(QPDF& qpdf, std::map<int, int>& omap) 765 + static std::vector<QPDFObjGen>
  766 + getCompressibleObjGens(QPDF& qpdf)
759 { 767 {
760 - qpdf.getObjectStreamData(omap); 768 + return qpdf.getCompressibleObjVector();
761 } 769 }
762 770
763 - static std::vector<QPDFObjGen>  
764 - getCompressibleObjGens(QPDF& qpdf) 771 + static std::vector<bool>
  772 + getCompressibleObjSet(QPDF& qpdf)
765 { 773 {
766 - return qpdf.getCompressibleObjGens(); 774 + return qpdf.getCompressibleObjSet();
  775 + }
  776 +
  777 + static std::map<QPDFObjGen, QPDFXRefEntry> const&
  778 + getXRefTable(QPDF& qpdf)
  779 + {
  780 + return qpdf.getXRefTableInternal();
  781 + }
  782 +
  783 + static size_t
  784 + tableSize(QPDF& qpdf)
  785 + {
  786 + return qpdf.tableSize();
767 } 787 }
768 }; 788 };
769 789
@@ -1083,10 +1103,21 @@ class QPDF @@ -1083,10 +1103,21 @@ class QPDF
1083 1103
1084 // For QPDFWriter: 1104 // For QPDFWriter:
1085 1105
  1106 + std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal();
  1107 + template <typename T>
  1108 + void optimize_internal(
  1109 + T const& object_stream_data,
  1110 + bool allow_changes = true,
  1111 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr);
  1112 + void optimize(
  1113 + QPDFWriter::ObjTable const& obj,
  1114 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
  1115 + size_t tableSize();
  1116 +
1086 // Get lists of all objects in order according to the part of a linearized file that they belong 1117 // Get lists of all objects in order according to the part of a linearized file that they belong
1087 // to. 1118 // to.
1088 void getLinearizedParts( 1119 void getLinearizedParts(
1089 - std::map<int, int> const& object_stream_data, 1120 + QPDFWriter::ObjTable const& obj,
1090 std::vector<QPDFObjectHandle>& part4, 1121 std::vector<QPDFObjectHandle>& part4,
1091 std::vector<QPDFObjectHandle>& part6, 1122 std::vector<QPDFObjectHandle>& part6,
1092 std::vector<QPDFObjectHandle>& part7, 1123 std::vector<QPDFObjectHandle>& part7,
@@ -1094,19 +1125,18 @@ class QPDF @@ -1094,19 +1125,18 @@ class QPDF
1094 std::vector<QPDFObjectHandle>& part9); 1125 std::vector<QPDFObjectHandle>& part9);
1095 1126
1096 void generateHintStream( 1127 void generateHintStream(
1097 - std::map<int, QPDFXRefEntry> const& xref,  
1098 - std::map<int, qpdf_offset_t> const& lengths,  
1099 - std::map<int, int> const& obj_renumber, 1128 + QPDFWriter::NewObjTable const& new_obj,
  1129 + QPDFWriter::ObjTable const& obj,
1100 std::shared_ptr<Buffer>& hint_stream, 1130 std::shared_ptr<Buffer>& hint_stream,
1101 int& S, 1131 int& S,
1102 int& O, 1132 int& O,
1103 bool compressed); 1133 bool compressed);
1104 1134
1105 - // Map object to object stream that contains it  
1106 - void getObjectStreamData(std::map<int, int>&);  
1107 -  
1108 // Get a list of objects that would be permitted in an object stream. 1135 // Get a list of objects that would be permitted in an object stream.
1109 - std::vector<QPDFObjGen> getCompressibleObjGens(); 1136 + template <typename T>
  1137 + std::vector<T> getCompressibleObjGens();
  1138 + std::vector<QPDFObjGen> getCompressibleObjVector();
  1139 + std::vector<bool> getCompressibleObjSet();
1110 1140
1111 // methods to support page handling 1141 // methods to support page handling
1112 1142
@@ -1352,6 +1382,7 @@ class QPDF @@ -1352,6 +1382,7 @@ class QPDF
1352 qpdf_offset_t getLinearizationOffset(QPDFObjGen const&); 1382 qpdf_offset_t getLinearizationOffset(QPDFObjGen const&);
1353 QPDFObjectHandle 1383 QPDFObjectHandle
1354 getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data); 1384 getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
  1385 + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
1355 int lengthNextN(int first_object, int n); 1386 int lengthNextN(int first_object, int n);
1356 void 1387 void
1357 checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj); 1388 checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
@@ -1362,28 +1393,23 @@ class QPDF @@ -1362,28 +1393,23 @@ class QPDF
1362 void dumpHSharedObject(); 1393 void dumpHSharedObject();
1363 void dumpHGeneric(HGeneric&); 1394 void dumpHGeneric(HGeneric&);
1364 qpdf_offset_t adjusted_offset(qpdf_offset_t offset); 1395 qpdf_offset_t adjusted_offset(qpdf_offset_t offset);
1365 - void calculateLinearizationData(std::map<int, int> const& object_stream_data); 1396 + template <typename T>
  1397 + void calculateLinearizationData(T const& object_stream_data);
  1398 + template <typename T>
1366 void pushOutlinesToPart( 1399 void pushOutlinesToPart(
1367 std::vector<QPDFObjectHandle>& part, 1400 std::vector<QPDFObjectHandle>& part,
1368 std::set<QPDFObjGen>& lc_outlines, 1401 std::set<QPDFObjGen>& lc_outlines,
1369 - std::map<int, int> const& object_stream_data); 1402 + T const& object_stream_data);
1370 int outputLengthNextN( 1403 int outputLengthNextN(
1371 int in_object, 1404 int in_object,
1372 int n, 1405 int n,
1373 - std::map<int, qpdf_offset_t> const& lengths,  
1374 - std::map<int, int> const& obj_renumber);  
1375 - void calculateHPageOffset(  
1376 - std::map<int, QPDFXRefEntry> const& xref,  
1377 - std::map<int, qpdf_offset_t> const& lengths,  
1378 - std::map<int, int> const& obj_renumber);  
1379 - void calculateHSharedObject(  
1380 - std::map<int, QPDFXRefEntry> const& xref,  
1381 - std::map<int, qpdf_offset_t> const& lengths,  
1382 - std::map<int, int> const& obj_renumber);  
1383 - void calculateHOutline(  
1384 - std::map<int, QPDFXRefEntry> const& xref,  
1385 - std::map<int, qpdf_offset_t> const& lengths,  
1386 - std::map<int, int> const& obj_renumber); 1406 + QPDFWriter::NewObjTable const& new_obj,
  1407 + QPDFWriter::ObjTable const& obj);
  1408 + void
  1409 + calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
  1410 + void
  1411 + calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
  1412 + void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
1387 void writeHPageOffset(BitWriter&); 1413 void writeHPageOffset(BitWriter&);
1388 void writeHSharedObject(BitWriter&); 1414 void writeHSharedObject(BitWriter&);
1389 void writeHGeneric(BitWriter&, HGeneric&); 1415 void writeHGeneric(BitWriter&, HGeneric&);
@@ -1407,6 +1433,7 @@ class QPDF @@ -1407,6 +1433,7 @@ class QPDF
1407 QPDFObjGen::set& visited, 1433 QPDFObjGen::set& visited,
1408 bool top); 1434 bool top);
1409 void filterCompressedObjects(std::map<int, int> const& object_stream_data); 1435 void filterCompressedObjects(std::map<int, int> const& object_stream_data);
  1436 + void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
1410 1437
1411 // JSON import 1438 // JSON import
1412 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); 1439 void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);
include/qpdf/QPDFWriter.hh
@@ -437,6 +437,12 @@ class QPDFWriter @@ -437,6 +437,12 @@ class QPDFWriter
437 QPDF_DLL 437 QPDF_DLL
438 std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable(); 438 std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable();
439 439
  440 + // The following structs / classes are not part of the public API.
  441 + struct Object;
  442 + struct NewObject;
  443 + class ObjTable;
  444 + class NewObjTable;
  445 +
440 private: 446 private:
441 // flags used by unparseObject 447 // flags used by unparseObject
442 static int const f_stream = 1 << 0; 448 static int const f_stream = 1 << 0;
@@ -550,6 +556,7 @@ class QPDFWriter @@ -550,6 +556,7 @@ class QPDFWriter
550 void writeLinearized(); 556 void writeLinearized();
551 void enqueuePart(std::vector<QPDFObjectHandle>& part); 557 void enqueuePart(std::vector<QPDFObjectHandle>& part);
552 void writeEncryptionDictionary(); 558 void writeEncryptionDictionary();
  559 + void initializeTables(size_t extra = 0);
553 void doWriteSetup(); 560 void doWriteSetup();
554 void writeHeader(); 561 void writeHeader();
555 void writeHintStream(int hint_id); 562 void writeHintStream(int hint_id);
@@ -604,98 +611,7 @@ class QPDFWriter @@ -604,98 +611,7 @@ class QPDFWriter
604 void pushMD5Pipeline(PipelinePopper&); 611 void pushMD5Pipeline(PipelinePopper&);
605 void computeDeterministicIDData(); 612 void computeDeterministicIDData();
606 613
607 - void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out);  
608 -  
609 - class Members  
610 - {  
611 - friend class QPDFWriter;  
612 -  
613 - public:  
614 - QPDF_DLL  
615 - ~Members();  
616 -  
617 - private:  
618 - Members(QPDF& pdf);  
619 - Members(Members const&) = delete;  
620 -  
621 - QPDF& pdf;  
622 - QPDFObjGen root_og{-1, 0};  
623 - char const* filename{"unspecified"};  
624 - FILE* file{nullptr};  
625 - bool close_file{false};  
626 - Pl_Buffer* buffer_pipeline{nullptr};  
627 - Buffer* output_buffer{nullptr};  
628 - bool normalize_content_set{false};  
629 - bool normalize_content{false};  
630 - bool compress_streams{true};  
631 - bool compress_streams_set{false};  
632 - qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none};  
633 - bool stream_decode_level_set{false};  
634 - bool recompress_flate{false};  
635 - bool qdf_mode{false};  
636 - bool preserve_unreferenced_objects{false};  
637 - bool newline_before_endstream{false};  
638 - bool static_id{false};  
639 - bool suppress_original_object_ids{false};  
640 - bool direct_stream_lengths{true};  
641 - bool encrypted{false};  
642 - bool preserve_encryption{true};  
643 - bool linearized{false};  
644 - bool pclm{false};  
645 - qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};  
646 - std::string encryption_key;  
647 - bool encrypt_metadata{true};  
648 - bool encrypt_use_aes{false};  
649 - std::map<std::string, std::string> encryption_dictionary;  
650 - int encryption_V{0};  
651 - int encryption_R{0};  
652 -  
653 - std::string id1; // for /ID key of  
654 - std::string id2; // trailer dictionary  
655 - std::string final_pdf_version;  
656 - int final_extension_level{0};  
657 - std::string min_pdf_version;  
658 - int min_extension_level{0};  
659 - std::string forced_pdf_version;  
660 - int forced_extension_level{0};  
661 - std::string extra_header_text;  
662 - int encryption_dict_objid{0};  
663 - std::string cur_data_key;  
664 - std::list<std::shared_ptr<Pipeline>> to_delete;  
665 - Pl_Count* pipeline{nullptr};  
666 - std::vector<QPDFObjectHandle> object_queue;  
667 - size_t object_queue_front{0};  
668 - std::map<QPDFObjGen, int> obj_renumber;  
669 - std::map<int, QPDFXRefEntry> xref;  
670 - std::map<int, qpdf_offset_t> lengths;  
671 - int next_objid{1};  
672 - int cur_stream_length_id{0};  
673 - size_t cur_stream_length{0};  
674 - bool added_newline{false};  
675 - int max_ostream_index{0};  
676 - std::set<QPDFObjGen> normalized_streams;  
677 - std::map<QPDFObjGen, int> page_object_to_seq;  
678 - std::map<QPDFObjGen, int> contents_to_page_seq;  
679 - std::map<QPDFObjGen, int> object_to_object_stream;  
680 - std::map<int, std::set<QPDFObjGen>> object_stream_to_objects;  
681 - std::list<Pipeline*> pipeline_stack;  
682 - unsigned long long next_stack_id{0};  
683 - bool deterministic_id{false};  
684 - Pl_MD5* md5_pipeline{nullptr};  
685 - std::string deterministic_id_data;  
686 - bool did_write_setup{false};  
687 -  
688 - // For linearization only  
689 - std::string lin_pass1_filename;  
690 - std::map<int, int> obj_renumber_no_gen;  
691 - std::map<int, int> object_to_object_stream_no_gen;  
692 -  
693 - // For progress reporting  
694 - std::shared_ptr<ProgressReporter> progress_reporter;  
695 - int events_expected{0};  
696 - int events_seen{0};  
697 - int next_progress_report{0};  
698 - }; 614 + class Members;
699 615
700 // Keep all member variables inside the Members object, which we dynamically allocate. This 616 // Keep all member variables inside the Members object, which we dynamically allocate. This
701 // makes it possible to add new private members without breaking binary compatibility. 617 // makes it possible to add new private members without breaking binary compatibility.
libqpdf/QPDF.cc
@@ -2370,6 +2370,12 @@ QPDF::getRoot() @@ -2370,6 +2370,12 @@ QPDF::getRoot()
2370 std::map<QPDFObjGen, QPDFXRefEntry> 2370 std::map<QPDFObjGen, QPDFXRefEntry>
2371 QPDF::getXRefTable() 2371 QPDF::getXRefTable()
2372 { 2372 {
  2373 + return getXRefTableInternal();
  2374 +}
  2375 +
  2376 +std::map<QPDFObjGen, QPDFXRefEntry> const&
  2377 +QPDF::getXRefTableInternal()
  2378 +{
2373 if (!m->parsed) { 2379 if (!m->parsed) {
2374 throw std::logic_error("QPDF::getXRefTable called before parsing."); 2380 throw std::logic_error("QPDF::getXRefTable called before parsing.");
2375 } 2381 }
@@ -2377,19 +2383,33 @@ QPDF::getXRefTable() @@ -2377,19 +2383,33 @@ QPDF::getXRefTable()
2377 return m->xref_table; 2383 return m->xref_table;
2378 } 2384 }
2379 2385
2380 -void  
2381 -QPDF::getObjectStreamData(std::map<int, int>& omap) 2386 +size_t
  2387 +QPDF::tableSize()
2382 { 2388 {
2383 - for (auto const& iter: m->xref_table) {  
2384 - QPDFObjGen const& og = iter.first;  
2385 - QPDFXRefEntry const& entry = iter.second;  
2386 - if (entry.getType() == 2) {  
2387 - omap[og.getObj()] = entry.getObjStreamNumber();  
2388 - } 2389 + // If obj_cache is dense, accommodate all object in tables,else accommodate only original
  2390 + // objects.
  2391 + auto max_xref = m->xref_table.size() ? m->xref_table.crbegin()->first.getObj() : 0;
  2392 + auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0;
  2393 + if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) {
  2394 + return toS(++max_obj);
2389 } 2395 }
  2396 + return toS(++max_xref);
2390 } 2397 }
2391 2398
2392 std::vector<QPDFObjGen> 2399 std::vector<QPDFObjGen>
  2400 +QPDF::getCompressibleObjVector()
  2401 +{
  2402 + return getCompressibleObjGens<QPDFObjGen>();
  2403 +}
  2404 +
  2405 +std::vector<bool>
  2406 +QPDF::getCompressibleObjSet()
  2407 +{
  2408 + return getCompressibleObjGens<bool>();
  2409 +}
  2410 +
  2411 +template <typename T>
  2412 +std::vector<T>
2393 QPDF::getCompressibleObjGens() 2413 QPDF::getCompressibleObjGens()
2394 { 2414 {
2395 // Return a list of objects that are allowed to be in object streams. Walk through the objects 2415 // Return a list of objects that are allowed to be in object streams. Walk through the objects
@@ -2407,7 +2427,14 @@ QPDF::getCompressibleObjGens() @@ -2407,7 +2427,14 @@ QPDF::getCompressibleObjGens()
2407 std::vector<QPDFObjectHandle> queue; 2427 std::vector<QPDFObjectHandle> queue;
2408 queue.reserve(512); 2428 queue.reserve(512);
2409 queue.push_back(m->trailer); 2429 queue.push_back(m->trailer);
2410 - std::vector<QPDFObjGen> result; 2430 + std::vector<T> result;
  2431 + if constexpr (std::is_same_v<T, QPDFObjGen>) {
  2432 + result.reserve(m->obj_cache.size());
  2433 + } else if constexpr (std::is_same_v<T, bool>) {
  2434 + result.resize(max_obj + 1U, false);
  2435 + } else {
  2436 + throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens");
  2437 + }
2411 while (!queue.empty()) { 2438 while (!queue.empty()) {
2412 auto obj = queue.back(); 2439 auto obj = queue.back();
2413 queue.pop_back(); 2440 queue.pop_back();
@@ -2439,7 +2466,11 @@ QPDF::getCompressibleObjGens() @@ -2439,7 +2466,11 @@ QPDF::getCompressibleObjGens()
2439 } else if (!(obj.isStream() || 2466 } else if (!(obj.isStream() ||
2440 (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") && 2467 (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") &&
2441 obj.hasKey("/Contents")))) { 2468 obj.hasKey("/Contents")))) {
2442 - result.push_back(og); 2469 + if constexpr (std::is_same_v<T, QPDFObjGen>) {
  2470 + result.push_back(og);
  2471 + } else if constexpr (std::is_same_v<T, bool>) {
  2472 + result[id + 1U] = true;
  2473 + }
2443 } 2474 }
2444 } 2475 }
2445 if (obj.isStream()) { 2476 if (obj.isStream()) {
libqpdf/QPDFWriter.cc
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 2
3 #include <qpdf/qpdf-config.h> // include early for large file support 3 #include <qpdf/qpdf-config.h> // include early for large file support
4 4
5 -#include <qpdf/QPDFWriter.hh> 5 +#include <qpdf/QPDFWriter_private.hh>
6 6
7 #include <qpdf/MD5.hh> 7 #include <qpdf/MD5.hh>
8 #include <qpdf/Pl_AES_PDF.hh> 8 #include <qpdf/Pl_AES_PDF.hh>
@@ -1038,7 +1038,7 @@ QPDFWriter::openObject(int objid) @@ -1038,7 +1038,7 @@ QPDFWriter::openObject(int objid)
1038 if (objid == 0) { 1038 if (objid == 0) {
1039 objid = m->next_objid++; 1039 objid = m->next_objid++;
1040 } 1040 }
1041 - m->xref[objid] = QPDFXRefEntry(m->pipeline->getCount()); 1041 + m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
1042 writeString(std::to_string(objid)); 1042 writeString(std::to_string(objid));
1043 writeString(" 0 obj\n"); 1043 writeString(" 0 obj\n");
1044 return objid; 1044 return objid;
@@ -1050,7 +1050,8 @@ QPDFWriter::closeObject(int objid) @@ -1050,7 +1050,8 @@ QPDFWriter::closeObject(int objid)
1050 // Write a newline before endobj as it makes the file easier to repair. 1050 // Write a newline before endobj as it makes the file easier to repair.
1051 writeString("\nendobj\n"); 1051 writeString("\nendobj\n");
1052 writeStringQDF("\n"); 1052 writeStringQDF("\n");
1053 - m->lengths[objid] = m->pipeline->getCount() - m->xref[objid].getOffset(); 1053 + auto& new_obj = m->new_obj[objid];
  1054 + new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
1054 } 1055 }
1055 1056
1056 void 1057 void
@@ -1064,7 +1065,7 @@ QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const&amp; og) @@ -1064,7 +1065,7 @@ QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const&amp; og)
1064 1065
1065 // Reserve numbers for the objects that belong to this object stream. 1066 // Reserve numbers for the objects that belong to this object stream.
1066 for (auto const& iter: m->object_stream_to_objects[objid]) { 1067 for (auto const& iter: m->object_stream_to_objects[objid]) {
1067 - m->obj_renumber[iter] = m->next_objid++; 1068 + m->obj[iter].renumber = m->next_objid++;
1068 } 1069 }
1069 } 1070 }
1070 1071
@@ -1093,18 +1094,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) @@ -1093,18 +1094,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
1093 } 1094 }
1094 1095
1095 QPDFObjGen og = object.getObjGen(); 1096 QPDFObjGen og = object.getObjGen();
  1097 + auto& obj = m->obj[og];
1096 1098
1097 - if (m->obj_renumber.count(og) == 0) {  
1098 - if (m->object_to_object_stream.count(og)) { 1099 + if (obj.renumber == 0) {
  1100 + if (obj.object_stream > 0) {
1099 // This is in an object stream. Don't process it here. Instead, enqueue the object 1101 // This is in an object stream. Don't process it here. Instead, enqueue the object
1100 // stream. Object streams always have generation 0. 1102 // stream. Object streams always have generation 0.
1101 - int stream_id = m->object_to_object_stream[og];  
1102 - // Detect loops by storing invalid object ID 0, which will get overwritten later.  
1103 - m->obj_renumber[og] = 0;  
1104 - enqueueObject(m->pdf.getObjectByID(stream_id, 0)); 1103 + // Detect loops by storing invalid object ID -1, which will get overwritten later.
  1104 + obj.renumber = -1;
  1105 + enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1105 } else { 1106 } else {
1106 m->object_queue.push_back(object); 1107 m->object_queue.push_back(object);
1107 - m->obj_renumber[og] = m->next_objid++; 1108 + obj.renumber = m->next_objid++;
1108 1109
1109 if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { 1110 if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) {
1110 // For linearized files, uncompressed objects go at end, and we take care of 1111 // For linearized files, uncompressed objects go at end, and we take care of
@@ -1117,7 +1118,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) @@ -1117,7 +1118,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
1117 ++m->next_objid; 1118 ++m->next_objid;
1118 } 1119 }
1119 } 1120 }
1120 - } else if (m->obj_renumber[og] == 0) { 1121 + } else if (obj.renumber == -1) {
1121 // This can happen if a specially constructed file indicates that an object stream is 1122 // This can happen if a specially constructed file indicates that an object stream is
1122 // inside itself. 1123 // inside itself.
1123 QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); 1124 QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream");
@@ -1147,9 +1148,7 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) @@ -1147,9 +1148,7 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1147 enqueueObject(child); 1148 enqueueObject(child);
1148 } 1149 }
1149 if (child.isIndirect()) { 1150 if (child.isIndirect()) {
1150 - QPDFObjGen old_og = child.getObjGen();  
1151 - int new_id = m->obj_renumber[old_og];  
1152 - writeString(std::to_string(new_id)); 1151 + writeString(std::to_string(m->obj[child].renumber));
1153 writeString(" 0 R"); 1152 writeString(" 0 R");
1154 } else { 1153 } else {
1155 unparseObject(child, level, flags); 1154 unparseObject(child, level, flags);
@@ -1527,9 +1526,8 @@ QPDFWriter::unparseObject( @@ -1527,9 +1526,8 @@ QPDFWriter::unparseObject(
1527 writeString(">>"); 1526 writeString(">>");
1528 } else if (tc == ::ot_stream) { 1527 } else if (tc == ::ot_stream) {
1529 // Write stream data to a buffer. 1528 // Write stream data to a buffer.
1530 - int new_id = m->obj_renumber[old_og];  
1531 if (!m->direct_stream_lengths) { 1529 if (!m->direct_stream_lengths) {
1532 - m->cur_stream_length_id = new_id + 1; 1530 + m->cur_stream_length_id = m->obj[old_og].renumber + 1;
1533 } 1531 }
1534 1532
1535 flags |= f_stream; 1533 flags |= f_stream;
@@ -1626,7 +1624,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1626,7 +1624,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1626 QPDFObjGen old_og = object.getObjGen(); 1624 QPDFObjGen old_og = object.getObjGen();
1627 qpdf_assert_debug(old_og.getGen() == 0); 1625 qpdf_assert_debug(old_og.getGen() == 0);
1628 int old_id = old_og.getObj(); 1626 int old_id = old_og.getObj();
1629 - int new_id = m->obj_renumber[old_og]; 1627 + int new_stream_id = m->obj[old_og].renumber;
1630 1628
1631 std::vector<qpdf_offset_t> offsets; 1629 std::vector<qpdf_offset_t> offsets;
1632 qpdf_offset_t first = 0; 1630 qpdf_offset_t first = 0;
@@ -1670,7 +1668,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1670,7 +1668,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1670 int count = -1; 1668 int count = -1;
1671 for (auto const& obj: m->object_stream_to_objects[old_id]) { 1669 for (auto const& obj: m->object_stream_to_objects[old_id]) {
1672 ++count; 1670 ++count;
1673 - int new_obj = m->obj_renumber[obj]; 1671 + int new_obj = m->obj[obj].renumber;
1674 if (first_obj == -1) { 1672 if (first_obj == -1) {
1675 first_obj = new_obj; 1673 first_obj = new_obj;
1676 } 1674 }
@@ -1706,13 +1704,13 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1706,13 +1704,13 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1706 } 1704 }
1707 writeObject(obj_to_write, count); 1705 writeObject(obj_to_write, count);
1708 1706
1709 - m->xref[new_obj] = QPDFXRefEntry(new_id, count); 1707 + m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
1710 } 1708 }
1711 } 1709 }
1712 1710
1713 // Write the object 1711 // Write the object
1714 - openObject(new_id);  
1715 - setDataKey(new_id); 1712 + openObject(new_stream_id);
  1713 + setDataKey(new_stream_id);
1716 writeString("<<"); 1714 writeString("<<");
1717 writeStringQDF("\n "); 1715 writeStringQDF("\n ");
1718 writeString(" /Type /ObjStm"); 1716 writeString(" /Type /ObjStm");
@@ -1754,7 +1752,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1754,7 +1752,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1754 } 1752 }
1755 writeString("endstream"); 1753 writeString("endstream");
1756 m->cur_data_key.clear(); 1754 m->cur_data_key.clear();
1757 - closeObject(new_id); 1755 + closeObject(new_stream_id);
1758 } 1756 }
1759 1757
1760 void 1758 void
@@ -1769,7 +1767,7 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) @@ -1769,7 +1767,7 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
1769 } 1767 }
1770 1768
1771 indicateProgress(false, false); 1769 indicateProgress(false, false);
1772 - int new_id = m->obj_renumber[old_og]; 1770 + auto new_id = m->obj[old_og].renumber;
1773 if (m->qdf_mode) { 1771 if (m->qdf_mode) {
1774 if (m->page_object_to_seq.count(old_og)) { 1772 if (m->page_object_to_seq.count(old_og)) {
1775 writeString("%% Page "); 1773 writeString("%% Page ");
@@ -1938,11 +1936,7 @@ QPDFWriter::initializeSpecialStreams() @@ -1938,11 +1936,7 @@ QPDFWriter::initializeSpecialStreams()
1938 void 1936 void
1939 QPDFWriter::preserveObjectStreams() 1937 QPDFWriter::preserveObjectStreams()
1940 { 1938 {
1941 - std::map<int, int> omap;  
1942 - QPDF::Writer::getObjectStreamData(m->pdf, omap);  
1943 - if (omap.empty()) {  
1944 - return;  
1945 - } 1939 + auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1946 // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object 1940 // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1947 // streams out of old objects that have generation numbers greater than zero. However in an 1941 // streams out of old objects that have generation numbers greater than zero. However in an
1948 // existing PDF, all object stream objects and all objects in them must have generation 0 1942 // existing PDF, all object stream objects and all objects in them must have generation 0
@@ -1950,20 +1944,43 @@ QPDFWriter::preserveObjectStreams() @@ -1950,20 +1944,43 @@ QPDFWriter::preserveObjectStreams()
1950 // that are not allowed to be in object streams. In addition to removing objects that were 1944 // that are not allowed to be in object streams. In addition to removing objects that were
1951 // erroneously included in object streams in the source PDF, it also prevents unreferenced 1945 // erroneously included in object streams in the source PDF, it also prevents unreferenced
1952 // objects from being included. 1946 // objects from being included.
1953 - std::set<QPDFObjGen> eligible;  
1954 - if (!m->preserve_unreferenced_objects) {  
1955 - std::vector<QPDFObjGen> eligible_v = QPDF::Writer::getCompressibleObjGens(m->pdf);  
1956 - eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end());  
1957 - }  
1958 - QTC::TC("qpdf", "QPDFWriter preserve object streams", m->preserve_unreferenced_objects ? 0 : 1);  
1959 - for (auto iter: omap) {  
1960 - QPDFObjGen og(iter.first, 0);  
1961 - if (eligible.count(og) || m->preserve_unreferenced_objects) {  
1962 - m->object_to_object_stream[og] = iter.second;  
1963 - } else {  
1964 - QTC::TC("qpdf", "QPDFWriter exclude from object stream"); 1947 + auto iter = xref.cbegin();
  1948 + auto end = xref.cend();
  1949 +
  1950 + // Start by scanning for first compressed object in case we don't have any object streams to
  1951 + // process.
  1952 + for (; iter != end; ++iter) {
  1953 + if (iter->second.getType() == 2) {
  1954 + // Pdf contains object streams.
  1955 + QTC::TC(
  1956 + "qpdf",
  1957 + "QPDFWriter preserve object streams",
  1958 + m->preserve_unreferenced_objects ? 0 : 1);
  1959 +
  1960 + if (m->preserve_unreferenced_objects) {
  1961 + for (; iter != end; ++iter) {
  1962 + if (iter->second.getType() == 2) {
  1963 + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
  1964 + }
  1965 + }
  1966 + } else {
  1967 + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
  1968 + for (; iter != end; ++iter) {
  1969 + if (iter->second.getType() == 2) {
  1970 + auto id = static_cast<size_t>(iter->first.getObj());
  1971 + if (id < eligible.size() && eligible[id]) {
  1972 + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
  1973 + } else {
  1974 + QTC::TC("qpdf", "QPDFWriter exclude from object stream");
  1975 + }
  1976 + }
  1977 + }
  1978 + }
  1979 + return;
1965 } 1980 }
1966 } 1981 }
  1982 + // No compressed objects found.
  1983 + m->obj.streams_empty = true;
1967 } 1984 }
1968 1985
1969 void 1986 void
@@ -1979,7 +1996,10 @@ QPDFWriter::generateObjectStreams() @@ -1979,7 +1996,10 @@ QPDFWriter::generateObjectStreams()
1979 1996
1980 std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); 1997 std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
1981 size_t n_object_streams = (eligible.size() + 99U) / 100U; 1998 size_t n_object_streams = (eligible.size() + 99U) / 100U;
  1999 +
  2000 + initializeTables(2U * n_object_streams);
1982 if (n_object_streams == 0) { 2001 if (n_object_streams == 0) {
  2002 + m->obj.streams_empty = true;
1983 return; 2003 return;
1984 } 2004 }
1985 size_t n_per = eligible.size() / n_object_streams; 2005 size_t n_per = eligible.size() / n_object_streams;
@@ -1987,20 +2007,18 @@ QPDFWriter::generateObjectStreams() @@ -1987,20 +2007,18 @@ QPDFWriter::generateObjectStreams()
1987 ++n_per; 2007 ++n_per;
1988 } 2008 }
1989 unsigned int n = 0; 2009 unsigned int n = 0;
1990 - int cur_ostream = 0;  
1991 - for (auto const& iter: eligible) {  
1992 - if ((n % n_per) == 0) {  
1993 - if (n > 0) {  
1994 - QTC::TC("qpdf", "QPDFWriter generate >1 ostream");  
1995 - } 2010 + int cur_ostream = m->pdf.newIndirectNull().getObjectID();
  2011 + for (auto const& item: eligible) {
  2012 + if (n == n_per) {
  2013 + QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
1996 n = 0; 2014 n = 0;
1997 - }  
1998 - if (n == 0) {  
1999 // Construct a new null object as the "original" object stream. The rest of the code 2015 // Construct a new null object as the "original" object stream. The rest of the code
2000 // knows that this means we're creating the object stream from scratch. 2016 // knows that this means we're creating the object stream from scratch.
2001 - cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); 2017 + cur_ostream = m->pdf.newIndirectNull().getObjectID();
2002 } 2018 }
2003 - m->object_to_object_stream[iter] = cur_ostream; 2019 + auto& obj = m->obj[item];
  2020 + obj.object_stream = cur_ostream;
  2021 + obj.gen = item.getGen();
2004 ++n; 2022 ++n;
2005 } 2023 }
2006 } 2024 }
@@ -2056,6 +2074,14 @@ QPDFWriter::prepareFileForWrite() @@ -2056,6 +2074,14 @@ QPDFWriter::prepareFileForWrite()
2056 } 2074 }
2057 2075
2058 void 2076 void
  2077 +QPDFWriter::initializeTables(size_t extra)
  2078 +{
  2079 + auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
  2080 + m->obj.initialize(size);
  2081 + m->new_obj.initialize(size);
  2082 +}
  2083 +
  2084 +void
2059 QPDFWriter::doWriteSetup() 2085 QPDFWriter::doWriteSetup()
2060 { 2086 {
2061 if (m->did_write_setup) { 2087 if (m->did_write_setup) {
@@ -2124,10 +2150,12 @@ QPDFWriter::doWriteSetup() @@ -2124,10 +2150,12 @@ QPDFWriter::doWriteSetup()
2124 2150
2125 switch (m->object_stream_mode) { 2151 switch (m->object_stream_mode) {
2126 case qpdf_o_disable: 2152 case qpdf_o_disable:
2127 - // no action required 2153 + initializeTables();
  2154 + m->obj.streams_empty = true;
2128 break; 2155 break;
2129 2156
2130 case qpdf_o_preserve: 2157 case qpdf_o_preserve:
  2158 + initializeTables();
2131 preserveObjectStreams(); 2159 preserveObjectStreams();
2132 break; 2160 break;
2133 2161
@@ -2138,39 +2166,45 @@ QPDFWriter::doWriteSetup() @@ -2138,39 +2166,45 @@ QPDFWriter::doWriteSetup()
2138 // no default so gcc will warn for missing case tag 2166 // no default so gcc will warn for missing case tag
2139 } 2167 }
2140 2168
2141 - if (m->linearized) {  
2142 - // Page dictionaries are not allowed to be compressed objects.  
2143 - for (auto& page: m->pdf.getAllPages()) {  
2144 - QPDFObjGen og = page.getObjGen();  
2145 - if (m->object_to_object_stream.count(og)) {  
2146 - QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");  
2147 - m->object_to_object_stream.erase(og); 2169 + if (!m->obj.streams_empty) {
  2170 + if (m->linearized) {
  2171 + // Page dictionaries are not allowed to be compressed objects.
  2172 + for (auto& page: m->pdf.getAllPages()) {
  2173 + if (m->obj[page].object_stream > 0) {
  2174 + QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
  2175 + m->obj[page].object_stream = 0;
  2176 + }
2148 } 2177 }
2149 } 2178 }
2150 - }  
2151 2179
2152 - if (m->linearized || m->encrypted) {  
2153 - // The document catalog is not allowed to be compressed in linearized files either. It also  
2154 - // appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to handle  
2155 - // encrypted files with compressed document catalogs, so we disable them in that case as  
2156 - // well.  
2157 - if (m->object_to_object_stream.count(m->root_og)) {  
2158 - QTC::TC("qpdf", "QPDFWriter uncompressing root");  
2159 - m->object_to_object_stream.erase(m->root_og); 2180 + if (m->linearized || m->encrypted) {
  2181 + // The document catalog is not allowed to be compressed in linearized files either. It
  2182 + // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
  2183 + // handle encrypted files with compressed document catalogs, so we disable them in that
  2184 + // case as well.
  2185 + if (m->obj[m->root_og].object_stream > 0) {
  2186 + QTC::TC("qpdf", "QPDFWriter uncompressing root");
  2187 + m->obj[m->root_og].object_stream = 0;
  2188 + }
2160 } 2189 }
2161 - }  
2162 2190
2163 - // Generate reverse mapping from object stream to objects  
2164 - for (auto const& iter: m->object_to_object_stream) {  
2165 - QPDFObjGen const& obj = iter.first;  
2166 - int stream = iter.second;  
2167 - m->object_stream_to_objects[stream].insert(obj);  
2168 - m->max_ostream_index = std::max(  
2169 - m->max_ostream_index, QIntC::to_int(m->object_stream_to_objects[stream].size()) - 1);  
2170 - } 2191 + // Generate reverse mapping from object stream to objects
  2192 + m->obj.forEach([this](auto id, auto const& item) -> void {
  2193 + if (item.object_stream > 0) {
  2194 + auto& vec = m->object_stream_to_objects[item.object_stream];
  2195 + vec.emplace_back(id, item.gen);
  2196 + if (m->max_ostream_index < vec.size()) {
  2197 + ++m->max_ostream_index;
  2198 + }
  2199 + }
  2200 + });
  2201 + --m->max_ostream_index;
2171 2202
2172 - if (!m->object_stream_to_objects.empty()) {  
2173 - setMinimumPDFVersion("1.5"); 2203 + if (m->object_stream_to_objects.empty()) {
  2204 + m->obj.streams_empty = true;
  2205 + } else {
  2206 + setMinimumPDFVersion("1.5");
  2207 + }
2174 } 2208 }
2175 2209
2176 setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); 2210 setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
@@ -2215,7 +2249,7 @@ QPDFWriter::write() @@ -2215,7 +2249,7 @@ QPDFWriter::write()
2215 QPDFObjGen 2249 QPDFObjGen
2216 QPDFWriter::getRenumberedObjGen(QPDFObjGen og) 2250 QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2217 { 2251 {
2218 - return QPDFObjGen(m->obj_renumber[og], 0); 2252 + return QPDFObjGen(m->obj[og].renumber, 0);
2219 } 2253 }
2220 2254
2221 std::map<QPDFObjGen, QPDFXRefEntry> 2255 std::map<QPDFObjGen, QPDFXRefEntry>
@@ -2223,12 +2257,12 @@ QPDFWriter::getWrittenXRefTable() @@ -2223,12 +2257,12 @@ QPDFWriter::getWrittenXRefTable()
2223 { 2257 {
2224 std::map<QPDFObjGen, QPDFXRefEntry> result; 2258 std::map<QPDFObjGen, QPDFXRefEntry> result;
2225 2259
2226 - for (auto const& iter: m->xref) {  
2227 - if (iter.first != 0 && iter.second.getType() != 0) {  
2228 - result[QPDFObjGen(iter.first, 0)] = iter.second; 2260 + auto it = result.begin();
  2261 + m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
  2262 + if (item.xref.getType() != 0) {
  2263 + it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
2229 } 2264 }
2230 - }  
2231 - 2265 + });
2232 return result; 2266 return result;
2233 } 2267 }
2234 2268
@@ -2290,8 +2324,7 @@ QPDFWriter::writeHintStream(int hint_id) @@ -2290,8 +2324,7 @@ QPDFWriter::writeHintStream(int hint_id)
2290 int S = 0; 2324 int S = 0;
2291 int O = 0; 2325 int O = 0;
2292 bool compressed = (m->compress_streams && !m->qdf_mode); 2326 bool compressed = (m->compress_streams && !m->qdf_mode);
2293 - QPDF::Writer::generateHintStream(  
2294 - m->pdf, m->xref, m->lengths, m->obj_renumber_no_gen, hint_buffer, S, O, compressed); 2327 + QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
2295 2328
2296 openObject(hint_id); 2329 openObject(hint_id);
2297 setDataKey(hint_id); 2330 setDataKey(hint_id);
@@ -2364,7 +2397,7 @@ QPDFWriter::writeXRefTable( @@ -2364,7 +2397,7 @@ QPDFWriter::writeXRefTable(
2364 } else { 2397 } else {
2365 qpdf_offset_t offset = 0; 2398 qpdf_offset_t offset = 0;
2366 if (!suppress_offsets) { 2399 if (!suppress_offsets) {
2367 - offset = m->xref[i].getOffset(); 2400 + offset = m->new_obj[i].xref.getOffset();
2368 if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { 2401 if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
2369 offset += hint_length; 2402 offset += hint_length;
2370 } 2403 }
@@ -2411,13 +2444,13 @@ QPDFWriter::writeXRefStream( @@ -2411,13 +2444,13 @@ QPDFWriter::writeXRefStream(
2411 unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); 2444 unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
2412 2445
2413 // field 2 contains object stream indices 2446 // field 2 contains object stream indices
2414 - unsigned int f2_size = bytesNeeded(m->max_ostream_index); 2447 + unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
2415 2448
2416 unsigned int esize = 1 + f1_size + f2_size; 2449 unsigned int esize = 1 + f1_size + f2_size;
2417 2450
2418 // Must store in xref table in advance of writing the actual data rather than waiting for 2451 // Must store in xref table in advance of writing the actual data rather than waiting for
2419 // openObject to do it. 2452 // openObject to do it.
2420 - m->xref[xref_id] = QPDFXRefEntry(m->pipeline->getCount()); 2453 + m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2421 2454
2422 Pipeline* p = pushPipeline(new Pl_Buffer("xref stream")); 2455 Pipeline* p = pushPipeline(new Pl_Buffer("xref stream"));
2423 bool compressed = false; 2456 bool compressed = false;
@@ -2435,7 +2468,7 @@ QPDFWriter::writeXRefStream( @@ -2435,7 +2468,7 @@ QPDFWriter::writeXRefStream(
2435 PipelinePopper pp_xref(this, &xref_data); 2468 PipelinePopper pp_xref(this, &xref_data);
2436 activatePipelineStack(pp_xref); 2469 activatePipelineStack(pp_xref);
2437 for (int i = first; i <= last; ++i) { 2470 for (int i = first; i <= last; ++i) {
2438 - QPDFXRefEntry& e = m->xref[i]; 2471 + QPDFXRefEntry& e = m->new_obj[i].xref;
2439 switch (e.getType()) { 2472 switch (e.getType()) {
2440 case 0: 2473 case 0:
2441 writeBinary(0, 1); 2474 writeBinary(0, 1);
@@ -2507,39 +2540,10 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) @@ -2507,39 +2540,10 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2507 } 2540 }
2508 2541
2509 void 2542 void
2510 -QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out)  
2511 -{  
2512 - // There are deep assumptions in the linearization code in QPDF that there is only one object  
2513 - // with each object number; i.e., you can't have two objects with the same object number and  
2514 - // different generations. This is a pretty safe assumption because Adobe Reader and Acrobat  
2515 - // can't actually handle this case. There is not much if any code in QPDF outside linearization  
2516 - // that assumes this, but the linearization code as currently implemented would do weird things  
2517 - // if we found such a case. In order to avoid breaking ABI changes in QPDF, we will first  
2518 - // assert that this condition holds. Then we can create new maps for QPDF that throw away  
2519 - // generation numbers.  
2520 -  
2521 - out.clear();  
2522 - for (auto const& iter: in) {  
2523 - if (out.count(iter.first.getObj())) {  
2524 - throw std::runtime_error("QPDF cannot currently linearize files that contain"  
2525 - " multiple objects with the same object ID and different"  
2526 - " generations. If you see this error message, please file"  
2527 - " a bug report and attach the file if possible. As a"  
2528 - " workaround, first convert the file with qpdf without"  
2529 - " linearizing, and then linearize the result of that"  
2530 - " conversion.");  
2531 - }  
2532 - out[iter.first.getObj()] = iter.second;  
2533 - }  
2534 -}  
2535 -  
2536 -void  
2537 QPDFWriter::writeLinearized() 2543 QPDFWriter::writeLinearized()
2538 { 2544 {
2539 // Optimize file and enqueue objects in order 2545 // Optimize file and enqueue objects in order
2540 2546
2541 - discardGeneration(m->object_to_object_stream, m->object_to_object_stream_no_gen);  
2542 -  
2543 auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { 2547 auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
2544 bool compress_stream; 2548 bool compress_stream;
2545 bool is_metadata; 2549 bool is_metadata;
@@ -2550,15 +2554,14 @@ QPDFWriter::writeLinearized() @@ -2550,15 +2554,14 @@ QPDFWriter::writeLinearized()
2550 } 2554 }
2551 }; 2555 };
2552 2556
2553 - m->pdf.optimize(m->object_to_object_stream_no_gen, true, skip_stream_parameters); 2557 + QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
2554 2558
2555 std::vector<QPDFObjectHandle> part4; 2559 std::vector<QPDFObjectHandle> part4;
2556 std::vector<QPDFObjectHandle> part6; 2560 std::vector<QPDFObjectHandle> part6;
2557 std::vector<QPDFObjectHandle> part7; 2561 std::vector<QPDFObjectHandle> part7;
2558 std::vector<QPDFObjectHandle> part8; 2562 std::vector<QPDFObjectHandle> part8;
2559 std::vector<QPDFObjectHandle> part9; 2563 std::vector<QPDFObjectHandle> part9;
2560 - QPDF::Writer::getLinearizedParts(  
2561 - m->pdf, m->object_to_object_stream_no_gen, part4, part6, part7, part8, part9); 2564 + QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
2562 2565
2563 // Object number sequence: 2566 // Object number sequence:
2564 // 2567 //
@@ -2582,7 +2585,7 @@ QPDFWriter::writeLinearized() @@ -2582,7 +2585,7 @@ QPDFWriter::writeLinearized()
2582 int after_second_half = 1 + second_half_uncompressed; 2585 int after_second_half = 1 + second_half_uncompressed;
2583 m->next_objid = after_second_half; 2586 m->next_objid = after_second_half;
2584 int second_half_xref = 0; 2587 int second_half_xref = 0;
2585 - bool need_xref_stream = (!m->object_to_object_stream.empty()); 2588 + bool need_xref_stream = !m->obj.streams_empty;
2586 if (need_xref_stream) { 2589 if (need_xref_stream) {
2587 second_half_xref = m->next_objid++; 2590 second_half_xref = m->next_objid++;
2588 } 2591 }
@@ -2690,14 +2693,14 @@ QPDFWriter::writeLinearized() @@ -2690,14 +2693,14 @@ QPDFWriter::writeLinearized()
2690 writeString("<<"); 2693 writeString("<<");
2691 if (pass == 2) { 2694 if (pass == 2) {
2692 std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages(); 2695 std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
2693 - int first_page_object = m->obj_renumber[pages.at(0).getObjGen()]; 2696 + int first_page_object = m->obj[pages.at(0)].renumber;
2694 int npages = QIntC::to_int(pages.size()); 2697 int npages = QIntC::to_int(pages.size());
2695 2698
2696 writeString(" /Linearized 1 /L "); 2699 writeString(" /Linearized 1 /L ");
2697 writeString(std::to_string(file_size + hint_length)); 2700 writeString(std::to_string(file_size + hint_length));
2698 // Implementation note 121 states that a space is mandatory after this open bracket. 2701 // Implementation note 121 states that a space is mandatory after this open bracket.
2699 writeString(" /H [ "); 2702 writeString(" /H [ ");
2700 - writeString(std::to_string(m->xref[hint_id].getOffset())); 2703 + writeString(std::to_string(m->new_obj[hint_id].xref.getOffset()));
2701 writeString(" "); 2704 writeString(" ");
2702 writeString(std::to_string(hint_length)); 2705 writeString(std::to_string(hint_length));
2703 writeString(" ] /O "); 2706 writeString(" ] /O ");
@@ -2724,7 +2727,7 @@ QPDFWriter::writeLinearized() @@ -2724,7 +2727,7 @@ QPDFWriter::writeLinearized()
2724 qpdf_offset_t first_xref_offset = m->pipeline->getCount(); 2727 qpdf_offset_t first_xref_offset = m->pipeline->getCount();
2725 qpdf_offset_t hint_offset = 0; 2728 qpdf_offset_t hint_offset = 0;
2726 if (pass == 2) { 2729 if (pass == 2) {
2727 - hint_offset = m->xref[hint_id].getOffset(); 2730 + hint_offset = m->new_obj[hint_id].xref.getOffset();
2728 } 2731 }
2729 if (need_xref_stream) { 2732 if (need_xref_stream) {
2730 // Must pad here too. 2733 // Must pad here too.
@@ -2795,7 +2798,7 @@ QPDFWriter::writeLinearized() @@ -2795,7 +2798,7 @@ QPDFWriter::writeLinearized()
2795 writeEncryptionDictionary(); 2798 writeEncryptionDictionary();
2796 } 2799 }
2797 if (pass == 1) { 2800 if (pass == 1) {
2798 - m->xref[hint_id] = QPDFXRefEntry(m->pipeline->getCount()); 2801 + m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2799 } else { 2802 } else {
2800 // Part 5: hint stream 2803 // Part 5: hint stream
2801 writeBuffer(hint_buffer); 2804 writeBuffer(hint_buffer);
@@ -2855,8 +2858,6 @@ QPDFWriter::writeLinearized() @@ -2855,8 +2858,6 @@ QPDFWriter::writeLinearized()
2855 writeString(std::to_string(first_xref_offset)); 2858 writeString(std::to_string(first_xref_offset));
2856 writeString("\n%%EOF\n"); 2859 writeString("\n%%EOF\n");
2857 2860
2858 - discardGeneration(m->obj_renumber, m->obj_renumber_no_gen);  
2859 -  
2860 if (pass == 1) { 2861 if (pass == 1) {
2861 if (m->deterministic_id) { 2862 if (m->deterministic_id) {
2862 QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); 2863 QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
@@ -2870,7 +2871,7 @@ QPDFWriter::writeLinearized() @@ -2870,7 +2871,7 @@ QPDFWriter::writeLinearized()
2870 pp_pass1 = nullptr; 2871 pp_pass1 = nullptr;
2871 2872
2872 // Save hint offset since it will be set to zero by calling openObject. 2873 // Save hint offset since it will be set to zero by calling openObject.
2873 - qpdf_offset_t hint_offset1 = m->xref[hint_id].getOffset(); 2874 + qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
2874 2875
2875 // Write hint stream to a buffer 2876 // Write hint stream to a buffer
2876 { 2877 {
@@ -2882,7 +2883,7 @@ QPDFWriter::writeLinearized() @@ -2882,7 +2883,7 @@ QPDFWriter::writeLinearized()
2882 hint_length = QIntC::to_offset(hint_buffer->getSize()); 2883 hint_length = QIntC::to_offset(hint_buffer->getSize());
2883 2884
2884 // Restore hint offset 2885 // Restore hint offset
2885 - m->xref[hint_id] = QPDFXRefEntry(hint_offset1); 2886 + m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
2886 if (lin_pass1_file) { 2887 if (lin_pass1_file) {
2887 // Write some debugging information 2888 // Write some debugging information
2888 fprintf( 2889 fprintf(
libqpdf/QPDF_linearization.cc
@@ -9,6 +9,7 @@ @@ -9,6 +9,7 @@
9 #include <qpdf/Pl_Flate.hh> 9 #include <qpdf/Pl_Flate.hh>
10 #include <qpdf/QPDFExc.hh> 10 #include <qpdf/QPDFExc.hh>
11 #include <qpdf/QPDFLogger.hh> 11 #include <qpdf/QPDFLogger.hh>
  12 +#include <qpdf/QPDFWriter_private.hh>
12 #include <qpdf/QTC.hh> 13 #include <qpdf/QTC.hh>
13 #include <qpdf/QUtil.hh> 14 #include <qpdf/QUtil.hh>
14 15
@@ -585,6 +586,17 @@ QPDF::getUncompressedObject(QPDFObjectHandle&amp; obj, std::map&lt;int, int&gt; const&amp; obj @@ -585,6 +586,17 @@ QPDF::getUncompressedObject(QPDFObjectHandle&amp; obj, std::map&lt;int, int&gt; const&amp; obj
585 } 586 }
586 } 587 }
587 588
  589 +QPDFObjectHandle
  590 +QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj)
  591 +{
  592 + if (obj.contains(oh)) {
  593 + if (auto id = obj[oh].object_stream; id > 0) {
  594 + return oh.isNull() ? oh : getObject(id, 0);
  595 + }
  596 + }
  597 + return oh;
  598 +}
  599 +
588 int 600 int
589 QPDF::lengthNextN(int first_object, int n) 601 QPDF::lengthNextN(int first_object, int n)
590 { 602 {
@@ -959,8 +971,9 @@ QPDF::dumpHGeneric(HGeneric&amp; t) @@ -959,8 +971,9 @@ QPDF::dumpHGeneric(HGeneric&amp; t)
959 << "group_length: " << t.group_length << "\n"; 971 << "group_length: " << t.group_length << "\n";
960 } 972 }
961 973
  974 +template <typename T>
962 void 975 void
963 -QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) 976 +QPDF::calculateLinearizationData(T const& object_stream_data)
964 { 977 {
965 // This function calculates the ordering of objects, divides them into the appropriate parts, 978 // This function calculates the ordering of objects, divides them into the appropriate parts,
966 // and computes some values for the linearization parameter dictionary and hint tables. The 979 // and computes some values for the linearization parameter dictionary and hint tables. The
@@ -1402,11 +1415,12 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -1402,11 +1415,12 @@ QPDF::calculateLinearizationData(std::map&lt;int, int&gt; const&amp; object_stream_data)
1402 } 1415 }
1403 } 1416 }
1404 1417
  1418 +template <typename T>
1405 void 1419 void
1406 QPDF::pushOutlinesToPart( 1420 QPDF::pushOutlinesToPart(
1407 std::vector<QPDFObjectHandle>& part, 1421 std::vector<QPDFObjectHandle>& part,
1408 std::set<QPDFObjGen>& lc_outlines, 1422 std::set<QPDFObjGen>& lc_outlines,
1409 - std::map<int, int> const& object_stream_data) 1423 + T const& object_stream_data)
1410 { 1424 {
1411 QPDFObjectHandle root = getRoot(); 1425 QPDFObjectHandle root = getRoot();
1412 QPDFObjectHandle outlines = root.getKey("/Outlines"); 1426 QPDFObjectHandle outlines = root.getKey("/Outlines");
@@ -1433,14 +1447,14 @@ QPDF::pushOutlinesToPart( @@ -1433,14 +1447,14 @@ QPDF::pushOutlinesToPart(
1433 1447
1434 void 1448 void
1435 QPDF::getLinearizedParts( 1449 QPDF::getLinearizedParts(
1436 - std::map<int, int> const& object_stream_data, 1450 + QPDFWriter::ObjTable const& obj,
1437 std::vector<QPDFObjectHandle>& part4, 1451 std::vector<QPDFObjectHandle>& part4,
1438 std::vector<QPDFObjectHandle>& part6, 1452 std::vector<QPDFObjectHandle>& part6,
1439 std::vector<QPDFObjectHandle>& part7, 1453 std::vector<QPDFObjectHandle>& part7,
1440 std::vector<QPDFObjectHandle>& part8, 1454 std::vector<QPDFObjectHandle>& part8,
1441 std::vector<QPDFObjectHandle>& part9) 1455 std::vector<QPDFObjectHandle>& part9)
1442 { 1456 {
1443 - calculateLinearizationData(object_stream_data); 1457 + calculateLinearizationData(obj);
1444 part4 = m->part4; 1458 part4 = m->part4;
1445 part6 = m->part6; 1459 part6 = m->part6;
1446 part7 = m->part7; 1460 part7 = m->part7;
@@ -1456,33 +1470,29 @@ nbits(int val) @@ -1456,33 +1470,29 @@ nbits(int val)
1456 1470
1457 int 1471 int
1458 QPDF::outputLengthNextN( 1472 QPDF::outputLengthNextN(
1459 - int in_object,  
1460 - int n,  
1461 - std::map<int, qpdf_offset_t> const& lengths,  
1462 - std::map<int, int> const& obj_renumber) 1473 + int in_object, int n, QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj)
1463 { 1474 {
1464 // Figure out the length of a series of n consecutive objects in the output file starting with 1475 // Figure out the length of a series of n consecutive objects in the output file starting with
1465 // whatever object in_object from the input file mapped to. 1476 // whatever object in_object from the input file mapped to.
1466 1477
1467 - if (obj_renumber.count(in_object) == 0) { 1478 + int first = obj[in_object].renumber;
  1479 + int last = first + n;
  1480 + if (first <= 0) {
1468 stopOnError("found object that is not renumbered while writing linearization data"); 1481 stopOnError("found object that is not renumbered while writing linearization data");
1469 } 1482 }
1470 - int first = (*(obj_renumber.find(in_object))).second;  
1471 - int length = 0;  
1472 - for (int i = 0; i < n; ++i) {  
1473 - if (lengths.count(first + i) == 0) { 1483 + qpdf_offset_t length = 0;
  1484 + for (int i = first; i < last; ++i) {
  1485 + auto l = new_obj[i].length;
  1486 + if (l == 0) {
1474 stopOnError("found item with unknown length while writing linearization data"); 1487 stopOnError("found item with unknown length while writing linearization data");
1475 } 1488 }
1476 - length += toI((*(lengths.find(first + toI(i)))).second); 1489 + length += l;
1477 } 1490 }
1478 - return length; 1491 + return toI(length);
1479 } 1492 }
1480 1493
1481 void 1494 void
1482 -QPDF::calculateHPageOffset(  
1483 - std::map<int, QPDFXRefEntry> const& xref,  
1484 - std::map<int, qpdf_offset_t> const& lengths,  
1485 - std::map<int, int> const& obj_renumber) 1495 +QPDF::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj)
1486 { 1496 {
1487 // Page Offset Hint Table 1497 // Page Offset Hint Table
1488 1498
@@ -1497,8 +1507,7 @@ QPDF::calculateHPageOffset( @@ -1497,8 +1507,7 @@ QPDF::calculateHPageOffset(
1497 1507
1498 int min_nobjects = cphe.at(0).nobjects; 1508 int min_nobjects = cphe.at(0).nobjects;
1499 int max_nobjects = min_nobjects; 1509 int max_nobjects = min_nobjects;
1500 - int min_length =  
1501 - outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, lengths, obj_renumber); 1510 + int min_length = outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, new_obj, obj);
1502 int max_length = min_length; 1511 int max_length = min_length;
1503 int max_shared = cphe.at(0).nshared_objects; 1512 int max_shared = cphe.at(0).nshared_objects;
1504 1513
@@ -1515,7 +1524,7 @@ QPDF::calculateHPageOffset( @@ -1515,7 +1524,7 @@ QPDF::calculateHPageOffset(
1515 // assignments. 1524 // assignments.
1516 1525
1517 int nobjects = cphe.at(i).nobjects; 1526 int nobjects = cphe.at(i).nobjects;
1518 - int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); 1527 + int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, new_obj, obj);
1519 int nshared = cphe.at(i).nshared_objects; 1528 int nshared = cphe.at(i).nshared_objects;
1520 1529
1521 min_nobjects = std::min(min_nobjects, nobjects); 1530 min_nobjects = std::min(min_nobjects, nobjects);
@@ -1530,9 +1539,7 @@ QPDF::calculateHPageOffset( @@ -1530,9 +1539,7 @@ QPDF::calculateHPageOffset(
1530 } 1539 }
1531 1540
1532 ph.min_nobjects = min_nobjects; 1541 ph.min_nobjects = min_nobjects;
1533 - int in_page0_id = pages.at(0).getObjectID();  
1534 - int out_page0_id = (*(obj_renumber.find(in_page0_id))).second;  
1535 - ph.first_page_offset = (*(xref.find(out_page0_id))).second.getOffset(); 1542 + ph.first_page_offset = new_obj[obj[pages.at(0)].renumber].xref.getOffset();
1536 ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects); 1543 ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects);
1537 ph.min_page_length = min_length; 1544 ph.min_page_length = min_length;
1538 ph.nbits_delta_page_length = nbits(max_length - min_length); 1545 ph.nbits_delta_page_length = nbits(max_length - min_length);
@@ -1567,9 +1574,7 @@ QPDF::calculateHPageOffset( @@ -1567,9 +1574,7 @@ QPDF::calculateHPageOffset(
1567 1574
1568 void 1575 void
1569 QPDF::calculateHSharedObject( 1576 QPDF::calculateHSharedObject(
1570 - std::map<int, QPDFXRefEntry> const& xref,  
1571 - std::map<int, qpdf_offset_t> const& lengths,  
1572 - std::map<int, int> const& obj_renumber) 1577 + QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj)
1573 { 1578 {
1574 CHSharedObject& cso = m->c_shared_object_data; 1579 CHSharedObject& cso = m->c_shared_object_data;
1575 std::vector<CHSharedObjectEntry>& csoe = cso.entries; 1580 std::vector<CHSharedObjectEntry>& csoe = cso.entries;
@@ -1577,12 +1582,12 @@ QPDF::calculateHSharedObject( @@ -1577,12 +1582,12 @@ QPDF::calculateHSharedObject(
1577 std::vector<HSharedObjectEntry>& soe = so.entries; 1582 std::vector<HSharedObjectEntry>& soe = so.entries;
1578 soe.clear(); 1583 soe.clear();
1579 1584
1580 - int min_length = outputLengthNextN(csoe.at(0).object, 1, lengths, obj_renumber); 1585 + int min_length = outputLengthNextN(csoe.at(0).object, 1, new_obj, obj);
1581 int max_length = min_length; 1586 int max_length = min_length;
1582 1587
1583 for (size_t i = 0; i < toS(cso.nshared_total); ++i) { 1588 for (size_t i = 0; i < toS(cso.nshared_total); ++i) {
1584 // Assign absolute numbers to deltas; adjust later 1589 // Assign absolute numbers to deltas; adjust later
1585 - int length = outputLengthNextN(csoe.at(i).object, 1, lengths, obj_renumber); 1590 + int length = outputLengthNextN(csoe.at(i).object, 1, new_obj, obj);
1586 min_length = std::min(min_length, length); 1591 min_length = std::min(min_length, length);
1587 max_length = std::max(max_length, length); 1592 max_length = std::max(max_length, length);
1588 soe.emplace_back(); 1593 soe.emplace_back();
@@ -1595,8 +1600,9 @@ QPDF::calculateHSharedObject( @@ -1595,8 +1600,9 @@ QPDF::calculateHSharedObject(
1595 so.nshared_total = cso.nshared_total; 1600 so.nshared_total = cso.nshared_total;
1596 so.nshared_first_page = cso.nshared_first_page; 1601 so.nshared_first_page = cso.nshared_first_page;
1597 if (so.nshared_total > so.nshared_first_page) { 1602 if (so.nshared_total > so.nshared_first_page) {
1598 - so.first_shared_obj = (*(obj_renumber.find(cso.first_shared_obj))).second;  
1599 - so.first_shared_offset = (*(xref.find(so.first_shared_obj))).second.getOffset(); 1603 + so.first_shared_obj = obj[cso.first_shared_obj].renumber;
  1604 + so.min_group_length = min_length;
  1605 + so.first_shared_offset = new_obj[so.first_shared_obj].xref.getOffset();
1600 } 1606 }
1601 so.min_group_length = min_length; 1607 so.min_group_length = min_length;
1602 so.nbits_delta_group_length = nbits(max_length - min_length); 1608 so.nbits_delta_group_length = nbits(max_length - min_length);
@@ -1611,10 +1617,7 @@ QPDF::calculateHSharedObject( @@ -1611,10 +1617,7 @@ QPDF::calculateHSharedObject(
1611 } 1617 }
1612 1618
1613 void 1619 void
1614 -QPDF::calculateHOutline(  
1615 - std::map<int, QPDFXRefEntry> const& xref,  
1616 - std::map<int, qpdf_offset_t> const& lengths,  
1617 - std::map<int, int> const& obj_renumber) 1620 +QPDF::calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj)
1618 { 1621 {
1619 HGeneric& cho = m->c_outline_data; 1622 HGeneric& cho = m->c_outline_data;
1620 1623
@@ -1624,10 +1627,10 @@ QPDF::calculateHOutline( @@ -1624,10 +1627,10 @@ QPDF::calculateHOutline(
1624 1627
1625 HGeneric& ho = m->outline_hints; 1628 HGeneric& ho = m->outline_hints;
1626 1629
1627 - ho.first_object = (*(obj_renumber.find(cho.first_object))).second;  
1628 - ho.first_object_offset = (*(xref.find(ho.first_object))).second.getOffset(); 1630 + ho.first_object = obj[cho.first_object].renumber;
  1631 + ho.first_object_offset = new_obj[ho.first_object].xref.getOffset();
1629 ho.nobjects = cho.nobjects; 1632 ho.nobjects = cho.nobjects;
1630 - ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, lengths, obj_renumber); 1633 + ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, new_obj, obj);
1631 } 1634 }
1632 1635
1633 template <class T, class int_type> 1636 template <class T, class int_type>
@@ -1756,18 +1759,17 @@ QPDF::writeHGeneric(BitWriter&amp; w, HGeneric&amp; t) @@ -1756,18 +1759,17 @@ QPDF::writeHGeneric(BitWriter&amp; w, HGeneric&amp; t)
1756 1759
1757 void 1760 void
1758 QPDF::generateHintStream( 1761 QPDF::generateHintStream(
1759 - std::map<int, QPDFXRefEntry> const& xref,  
1760 - std::map<int, qpdf_offset_t> const& lengths,  
1761 - std::map<int, int> const& obj_renumber, 1762 + QPDFWriter::NewObjTable const& new_obj,
  1763 + QPDFWriter::ObjTable const& obj,
1762 std::shared_ptr<Buffer>& hint_buffer, 1764 std::shared_ptr<Buffer>& hint_buffer,
1763 int& S, 1765 int& S,
1764 int& O, 1766 int& O,
1765 bool compressed) 1767 bool compressed)
1766 { 1768 {
1767 // Populate actual hint table values 1769 // Populate actual hint table values
1768 - calculateHPageOffset(xref, lengths, obj_renumber);  
1769 - calculateHSharedObject(xref, lengths, obj_renumber);  
1770 - calculateHOutline(xref, lengths, obj_renumber); 1770 + calculateHPageOffset(new_obj, obj);
  1771 + calculateHSharedObject(new_obj, obj);
  1772 + calculateHOutline(new_obj, obj);
1771 1773
1772 // Write the hint stream itself into a compressed memory buffer. Write through a counter so we 1774 // Write the hint stream itself into a compressed memory buffer. Write through a counter so we
1773 // can get offsets. 1775 // can get offsets.
libqpdf/QPDF_optimization.cc
@@ -5,6 +5,7 @@ @@ -5,6 +5,7 @@
5 #include <qpdf/QPDF.hh> 5 #include <qpdf/QPDF.hh>
6 6
7 #include <qpdf/QPDFExc.hh> 7 #include <qpdf/QPDFExc.hh>
  8 +#include <qpdf/QPDFWriter_private.hh>
8 #include <qpdf/QPDF_Array.hh> 9 #include <qpdf/QPDF_Array.hh>
9 #include <qpdf/QPDF_Dictionary.hh> 10 #include <qpdf/QPDF_Dictionary.hh>
10 #include <qpdf/QTC.hh> 11 #include <qpdf/QTC.hh>
@@ -59,6 +60,23 @@ QPDF::optimize( @@ -59,6 +60,23 @@ QPDF::optimize(
59 bool allow_changes, 60 bool allow_changes,
60 std::function<int(QPDFObjectHandle&)> skip_stream_parameters) 61 std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
61 { 62 {
  63 + optimize_internal(object_stream_data, allow_changes, skip_stream_parameters);
  64 +}
  65 +
  66 +void
  67 +QPDF::optimize(
  68 + QPDFWriter::ObjTable const& obj, std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  69 +{
  70 + optimize_internal(obj, true, skip_stream_parameters);
  71 +}
  72 +
  73 +template <typename T>
  74 +void
  75 +QPDF::optimize_internal(
  76 + T const& object_stream_data,
  77 + bool allow_changes,
  78 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  79 +{
62 if (!m->obj_user_to_objects.empty()) { 80 if (!m->obj_user_to_objects.empty()) {
63 // already optimized 81 // already optimized
64 return; 82 return;
@@ -379,3 +397,45 @@ QPDF::filterCompressedObjects(std::map&lt;int, int&gt; const&amp; object_stream_data) @@ -379,3 +397,45 @@ QPDF::filterCompressedObjects(std::map&lt;int, int&gt; const&amp; object_stream_data)
379 m->obj_user_to_objects = t_obj_user_to_objects; 397 m->obj_user_to_objects = t_obj_user_to_objects;
380 m->object_to_obj_users = t_object_to_obj_users; 398 m->object_to_obj_users = t_object_to_obj_users;
381 } 399 }
  400 +
  401 +void
  402 +QPDF::filterCompressedObjects(QPDFWriter::ObjTable const& obj)
  403 +{
  404 + if (obj.getStreamsEmpty()) {
  405 + return;
  406 + }
  407 +
  408 + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
  409 + // objects. If something is a user of a compressed object, then it is really a user of the
  410 + // object stream that contains it.
  411 +
  412 + std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
  413 + std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;
  414 +
  415 + for (auto const& i1: m->obj_user_to_objects) {
  416 + ObjUser const& ou = i1.first;
  417 + // Loop over objects.
  418 + for (auto const& og: i1.second) {
  419 + if (auto const& i2 = obj[og].object_stream; i2 <= 0) {
  420 + t_obj_user_to_objects[ou].insert(og);
  421 + } else {
  422 + t_obj_user_to_objects[ou].insert(QPDFObjGen(i2, 0));
  423 + }
  424 + }
  425 + }
  426 +
  427 + for (auto const& i1: m->object_to_obj_users) {
  428 + QPDFObjGen const& og = i1.first;
  429 + // Loop over obj_users.
  430 + for (auto const& ou: i1.second) {
  431 + if (auto i2 = obj[og].object_stream; i2 <= 0) {
  432 + t_object_to_obj_users[og].insert(ou);
  433 + } else {
  434 + t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou);
  435 + }
  436 + }
  437 + }
  438 +
  439 + m->obj_user_to_objects = t_obj_user_to_objects;
  440 + m->object_to_obj_users = t_object_to_obj_users;
  441 +}
libqpdf/qpdf/ObjTable.hh 0 → 100644
  1 +#ifndef OBJTABLE_HH
  2 +#define OBJTABLE_HH
  3 +
  4 +#include <qpdf/QPDFObjGen.hh>
  5 +#include <qpdf/QPDFObjectHandle.hh>
  6 +
  7 +#include "qpdf/QIntC.hh"
  8 +#include <limits>
  9 +
  10 +// A table of objects indexed by object id. This is intended as a more efficient replacement for
  11 +// std::map<QPDFObjGen, T> containers.
  12 +//
  13 +// The table is implemented as a std::vector, with the object id implicitly represented by the index
  14 +// of the object. This has a number of implications, including:
  15 +// - operations that change the index of existing elements such as insertion and deletions are not
  16 +// permitted.
  17 +// - operations that extend the table may invalidate iterators and references to objects.
  18 +//
  19 +// The provided overloads of the access operator[] are safe. For out of bounds access they will
  20 +// either extend the table or throw a runtime error.
  21 +//
  22 +// ObjTable has a map 'sparse_elements' to deal with very sparse / extremely large object tables
  23 +// (usually as the result of invalid dangling references). This map may contain objects not found in
  24 +// the xref table of the original pdf if there are dangling references with an id significantly
  25 +// larger than the largest valid object id found in original pdf.
  26 +
  27 +template <class T>
  28 +class ObjTable: public std::vector<T>
  29 +{
  30 + public:
  31 + ObjTable() = default;
  32 + ObjTable(const ObjTable&) = delete;
  33 + ObjTable(ObjTable&&) = delete;
  34 + ObjTable& operator[](const ObjTable&) = delete;
  35 + ObjTable& operator[](ObjTable&&) = delete;
  36 +
  37 + // Remove unchecked access.
  38 + T& operator[](unsigned long idx) = delete;
  39 + T const& operator[](unsigned long idx) const = delete;
  40 +
  41 + inline T const&
  42 + operator[](int idx) const
  43 + {
  44 + return element(static_cast<size_t>(idx));
  45 + }
  46 +
  47 + inline T const&
  48 + operator[](QPDFObjGen og) const
  49 + {
  50 + return element(static_cast<size_t>(og.getObj()));
  51 + }
  52 +
  53 + inline T const&
  54 + operator[](QPDFObjectHandle oh) const
  55 + {
  56 + return element(static_cast<size_t>(oh.getObjectID()));
  57 + }
  58 +
  59 + inline bool
  60 + contains(size_t idx) const
  61 + {
  62 + return idx < std::vector<T>::size() || sparse_elements.count(idx);
  63 + }
  64 +
  65 + inline bool
  66 + contains(QPDFObjectHandle oh) const
  67 + {
  68 + return contains(static_cast<size_t>(oh.getObjectID()));
  69 + }
  70 +
  71 + protected:
  72 + inline T&
  73 + operator[](int id)
  74 + {
  75 + return element(static_cast<size_t>(id));
  76 + }
  77 +
  78 + inline T&
  79 + operator[](QPDFObjGen og)
  80 + {
  81 + return element(static_cast<size_t>(og.getObj()));
  82 + }
  83 +
  84 + inline T&
  85 + operator[](QPDFObjectHandle oh)
  86 + {
  87 + return element(static_cast<size_t>(oh.getObjectID()));
  88 + }
  89 +
  90 + inline T&
  91 + operator[](unsigned int id)
  92 + {
  93 + return element(id);
  94 + }
  95 +
  96 + void
  97 + initialize(size_t idx)
  98 + {
  99 + if (std::vector<T>::size() > 0 || sparse_elements.size() > 0) {
  100 + throw ::std::logic_error("ObjTable accessed before initialization");
  101 + } else if (
  102 + idx >= static_cast<size_t>(std::numeric_limits<int>::max()) ||
  103 + idx >= std::vector<T>::max_size()) {
  104 + throw std::runtime_error("Invalid maximum object id initializing ObjTable.");
  105 + } else {
  106 + std::vector<T>::resize(++idx);
  107 + }
  108 + }
  109 +
  110 + inline void
  111 + forEach(std::function<void(int, const T&)> fn)
  112 + {
  113 + int i = 0;
  114 + for (auto const& item: *this) {
  115 + fn(i++, item);
  116 + }
  117 + for (auto const& [id, item]: sparse_elements) {
  118 + fn(QIntC::to_int(id), item);
  119 + }
  120 + }
  121 +
  122 + private:
  123 + std::map<size_t, T> sparse_elements;
  124 +
  125 + inline T&
  126 + element(size_t idx)
  127 + {
  128 + if (idx < std::vector<T>::size()) {
  129 + return std::vector<T>::operator[](idx);
  130 + } else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) {
  131 + return sparse_elements[idx];
  132 + }
  133 + throw std::runtime_error("Invalid object id accessing ObjTable.");
  134 + return element(0); // doesn't return
  135 + }
  136 +
  137 + inline T const&
  138 + element(size_t idx) const
  139 + {
  140 + if (idx < std::vector<T>::size()) {
  141 + return std::vector<T>::operator[](idx);
  142 + } else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) {
  143 + return sparse_elements.at(idx);
  144 + }
  145 + throw std::runtime_error("Invalid object id accessing ObjTable.");
  146 + return element(0); // doesn't return
  147 + }
  148 +};
  149 +
  150 +#endif // OBJTABLE_HH
libqpdf/qpdf/QPDFWriter_private.hh 0 → 100644
  1 +#ifndef QPDFWRITER_PRIVATE_HH
  2 +#define QPDFWRITER_PRIVATE_HH
  3 +
  4 +#include <qpdf/QPDFWriter.hh>
  5 +
  6 +#include <qpdf/ObjTable.hh>
  7 +
  8 +// This file is intended for inclusion by QPDFWriter, QPDF, QPDF_optimization and QPDF_linearization
  9 +// only.
  10 +
  11 +struct QPDFWriter::Object
  12 +{
  13 + int renumber{0};
  14 + int gen{0};
  15 + int object_stream{0};
  16 +};
  17 +
  18 +struct QPDFWriter::NewObject
  19 +{
  20 + QPDFXRefEntry xref;
  21 + qpdf_offset_t length{0};
  22 +};
  23 +
  24 +class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object>
  25 +{
  26 + friend class QPDFWriter;
  27 +
  28 + public:
  29 + bool
  30 + getStreamsEmpty() const noexcept
  31 + {
  32 + return streams_empty;
  33 + }
  34 +
  35 + private:
  36 + // For performance, set by QPDFWriter rather than tracked by ObjTable.
  37 + bool streams_empty{false};
  38 +};
  39 +
  40 +class QPDFWriter::NewObjTable: public ::ObjTable<QPDFWriter::NewObject>
  41 +{
  42 + friend class QPDFWriter;
  43 +};
  44 +
  45 +class QPDFWriter::Members
  46 +{
  47 + friend class QPDFWriter;
  48 +
  49 + public:
  50 + QPDF_DLL
  51 + ~Members();
  52 +
  53 + private:
  54 + Members(QPDF& pdf);
  55 + Members(Members const&) = delete;
  56 +
  57 + QPDF& pdf;
  58 + QPDFObjGen root_og{-1, 0};
  59 + char const* filename{"unspecified"};
  60 + FILE* file{nullptr};
  61 + bool close_file{false};
  62 + Pl_Buffer* buffer_pipeline{nullptr};
  63 + Buffer* output_buffer{nullptr};
  64 + bool normalize_content_set{false};
  65 + bool normalize_content{false};
  66 + bool compress_streams{true};
  67 + bool compress_streams_set{false};
  68 + qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none};
  69 + bool stream_decode_level_set{false};
  70 + bool recompress_flate{false};
  71 + bool qdf_mode{false};
  72 + bool preserve_unreferenced_objects{false};
  73 + bool newline_before_endstream{false};
  74 + bool static_id{false};
  75 + bool suppress_original_object_ids{false};
  76 + bool direct_stream_lengths{true};
  77 + bool encrypted{false};
  78 + bool preserve_encryption{true};
  79 + bool linearized{false};
  80 + bool pclm{false};
  81 + qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
  82 + std::string encryption_key;
  83 + bool encrypt_metadata{true};
  84 + bool encrypt_use_aes{false};
  85 + std::map<std::string, std::string> encryption_dictionary;
  86 + int encryption_V{0};
  87 + int encryption_R{0};
  88 +
  89 + std::string id1; // for /ID key of
  90 + std::string id2; // trailer dictionary
  91 + std::string final_pdf_version;
  92 + int final_extension_level{0};
  93 + std::string min_pdf_version;
  94 + int min_extension_level{0};
  95 + std::string forced_pdf_version;
  96 + int forced_extension_level{0};
  97 + std::string extra_header_text;
  98 + int encryption_dict_objid{0};
  99 + std::string cur_data_key;
  100 + std::list<std::shared_ptr<Pipeline>> to_delete;
  101 + Pl_Count* pipeline{nullptr};
  102 + std::vector<QPDFObjectHandle> object_queue;
  103 + size_t object_queue_front{0};
  104 + QPDFWriter::ObjTable obj;
  105 + QPDFWriter::NewObjTable new_obj;
  106 + int next_objid{1};
  107 + int cur_stream_length_id{0};
  108 + size_t cur_stream_length{0};
  109 + bool added_newline{false};
  110 + size_t max_ostream_index{0};
  111 + std::set<QPDFObjGen> normalized_streams;
  112 + std::map<QPDFObjGen, int> page_object_to_seq;
  113 + std::map<QPDFObjGen, int> contents_to_page_seq;
  114 + std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
  115 + std::list<Pipeline*> pipeline_stack;
  116 + unsigned long long next_stack_id{0};
  117 + bool deterministic_id{false};
  118 + Pl_MD5* md5_pipeline{nullptr};
  119 + std::string deterministic_id_data;
  120 + bool did_write_setup{false};
  121 +
  122 + // For linearization only
  123 + std::string lin_pass1_filename;
  124 +
  125 + // For progress reporting
  126 + std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
  127 + int events_expected{0};
  128 + int events_seen{0};
  129 + int next_progress_report{0};
  130 +};
  131 +
  132 +#endif // QPDFWRITER_PRIVATE_HH
libtests/CMakeLists.txt
@@ -23,6 +23,7 @@ set(TEST_PROGRAMS @@ -23,6 +23,7 @@ set(TEST_PROGRAMS
23 md5 23 md5
24 nntree 24 nntree
25 numrange 25 numrange
  26 + obj_table
26 pdf_version 27 pdf_version
27 pl_function 28 pl_function
28 pointer_holder 29 pointer_holder
libtests/obj_table.cc 0 → 100644
  1 +#include <qpdf/ObjTable.hh>
  2 +
  3 +struct Test
  4 +{
  5 + int value{0};
  6 +};
  7 +
  8 +class Table: public ObjTable<Test>
  9 +{
  10 + public:
  11 + Table()
  12 + {
  13 + initialize(5);
  14 + }
  15 +
  16 + void
  17 + test()
  18 + {
  19 + for (int i = 0; i < 10; ++i) {
  20 + (*this)[i].value = 2 * i;
  21 + (*this)[1000 + i].value = 2 * (1000 + i);
  22 + }
  23 +
  24 + forEach([](auto i, auto const& item) -> void {
  25 + std::cout << std::to_string(i) << " : " << std::to_string(item.value) << "\n";
  26 + });
  27 +
  28 + std::cout << "2000 : " << std::to_string((*this)[2000].value) << "\n";
  29 + }
  30 +};
  31 +
  32 +int
  33 +main()
  34 +{
  35 + Table().test();
  36 +
  37 + std::cout << "object table tests done\n";
  38 + return 0;
  39 +}
libtests/qtest/obj_table.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +chdir("obj_table") or die "chdir testdir failed: $!\n";
  7 +
  8 +require TestDriver;
  9 +
  10 +my $td = new TestDriver('object table');
  11 +
  12 +$td->runtest("obj_table",
  13 + {$td->COMMAND => "obj_table"},
  14 + {$td->FILE => "obj_table.out",
  15 + $td->EXIT_STATUS => 0},
  16 + $td->NORMALIZE_NEWLINES);
  17 +
  18 +$td->report(1);
libtests/qtest/obj_table/obj_table.out 0 → 100644
  1 +0 : 0
  2 +1 : 2
  3 +2 : 4
  4 +3 : 6
  5 +4 : 8
  6 +5 : 10
  7 +6 : 12
  8 +7 : 14
  9 +8 : 16
  10 +9 : 18
  11 +1000 : 2000
  12 +1001 : 2002
  13 +1002 : 2004
  14 +1003 : 2006
  15 +1004 : 2008
  16 +1005 : 2010
  17 +1006 : 2012
  18 +1007 : 2014
  19 +1008 : 2016
  20 +1009 : 2018
  21 +2000 : 0
  22 +object table tests done