Commit 0bb1458f3899d524b1dc76e0542be08b75e5e9e4
Committed by
GitHub
Merge pull request #1161 from m-holger/writer
Tune QPDFWriter
Showing
12 changed files
with
715 additions
and
316 deletions
include/qpdf/QPDF.hh
| @@ -41,6 +41,7 @@ | @@ -41,6 +41,7 @@ | ||
| 41 | #include <qpdf/QPDFObjectHandle.hh> | 41 | #include <qpdf/QPDFObjectHandle.hh> |
| 42 | #include <qpdf/QPDFStreamFilter.hh> | 42 | #include <qpdf/QPDFStreamFilter.hh> |
| 43 | #include <qpdf/QPDFTokenizer.hh> | 43 | #include <qpdf/QPDFTokenizer.hh> |
| 44 | +#include <qpdf/QPDFWriter.hh> | ||
| 44 | #include <qpdf/QPDFXRefEntry.hh> | 45 | #include <qpdf/QPDFXRefEntry.hh> |
| 45 | 46 | ||
| 46 | class QPDF_Stream; | 47 | class QPDF_Stream; |
| @@ -727,43 +728,62 @@ class QPDF | @@ -727,43 +728,62 @@ class QPDF | ||
| 727 | 728 | ||
| 728 | private: | 729 | private: |
| 729 | static void | 730 | static void |
| 731 | + optimize( | ||
| 732 | + QPDF& qpdf, | ||
| 733 | + QPDFWriter::ObjTable const& obj, | ||
| 734 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | ||
| 735 | + { | ||
| 736 | + return qpdf.optimize(obj, skip_stream_parameters); | ||
| 737 | + } | ||
| 738 | + | ||
| 739 | + static void | ||
| 730 | getLinearizedParts( | 740 | getLinearizedParts( |
| 731 | QPDF& qpdf, | 741 | QPDF& qpdf, |
| 732 | - std::map<int, int> const& object_stream_data, | 742 | + QPDFWriter::ObjTable const& obj, |
| 733 | std::vector<QPDFObjectHandle>& part4, | 743 | std::vector<QPDFObjectHandle>& part4, |
| 734 | std::vector<QPDFObjectHandle>& part6, | 744 | std::vector<QPDFObjectHandle>& part6, |
| 735 | std::vector<QPDFObjectHandle>& part7, | 745 | std::vector<QPDFObjectHandle>& part7, |
| 736 | std::vector<QPDFObjectHandle>& part8, | 746 | std::vector<QPDFObjectHandle>& part8, |
| 737 | std::vector<QPDFObjectHandle>& part9) | 747 | std::vector<QPDFObjectHandle>& part9) |
| 738 | { | 748 | { |
| 739 | - qpdf.getLinearizedParts(object_stream_data, part4, part6, part7, part8, part9); | 749 | + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); |
| 740 | } | 750 | } |
| 741 | 751 | ||
| 742 | static void | 752 | static void |
| 743 | generateHintStream( | 753 | generateHintStream( |
| 744 | QPDF& qpdf, | 754 | QPDF& qpdf, |
| 745 | - std::map<int, QPDFXRefEntry> const& xref, | ||
| 746 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 747 | - std::map<int, int> const& obj_renumber, | 755 | + QPDFWriter::NewObjTable const& new_obj, |
| 756 | + QPDFWriter::ObjTable const& obj, | ||
| 748 | std::shared_ptr<Buffer>& hint_stream, | 757 | std::shared_ptr<Buffer>& hint_stream, |
| 749 | int& S, | 758 | int& S, |
| 750 | int& O, | 759 | int& O, |
| 751 | bool compressed) | 760 | bool compressed) |
| 752 | { | 761 | { |
| 753 | - return qpdf.generateHintStream( | ||
| 754 | - xref, lengths, obj_renumber, hint_stream, S, O, compressed); | 762 | + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); |
| 755 | } | 763 | } |
| 756 | 764 | ||
| 757 | - static void | ||
| 758 | - getObjectStreamData(QPDF& qpdf, std::map<int, int>& omap) | 765 | + static std::vector<QPDFObjGen> |
| 766 | + getCompressibleObjGens(QPDF& qpdf) | ||
| 759 | { | 767 | { |
| 760 | - qpdf.getObjectStreamData(omap); | 768 | + return qpdf.getCompressibleObjVector(); |
| 761 | } | 769 | } |
| 762 | 770 | ||
| 763 | - static std::vector<QPDFObjGen> | ||
| 764 | - getCompressibleObjGens(QPDF& qpdf) | 771 | + static std::vector<bool> |
| 772 | + getCompressibleObjSet(QPDF& qpdf) | ||
| 765 | { | 773 | { |
| 766 | - return qpdf.getCompressibleObjGens(); | 774 | + return qpdf.getCompressibleObjSet(); |
| 775 | + } | ||
| 776 | + | ||
| 777 | + static std::map<QPDFObjGen, QPDFXRefEntry> const& | ||
| 778 | + getXRefTable(QPDF& qpdf) | ||
| 779 | + { | ||
| 780 | + return qpdf.getXRefTableInternal(); | ||
| 781 | + } | ||
| 782 | + | ||
| 783 | + static size_t | ||
| 784 | + tableSize(QPDF& qpdf) | ||
| 785 | + { | ||
| 786 | + return qpdf.tableSize(); | ||
| 767 | } | 787 | } |
| 768 | }; | 788 | }; |
| 769 | 789 | ||
| @@ -1083,10 +1103,21 @@ class QPDF | @@ -1083,10 +1103,21 @@ class QPDF | ||
| 1083 | 1103 | ||
| 1084 | // For QPDFWriter: | 1104 | // For QPDFWriter: |
| 1085 | 1105 | ||
| 1106 | + std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal(); | ||
| 1107 | + template <typename T> | ||
| 1108 | + void optimize_internal( | ||
| 1109 | + T const& object_stream_data, | ||
| 1110 | + bool allow_changes = true, | ||
| 1111 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr); | ||
| 1112 | + void optimize( | ||
| 1113 | + QPDFWriter::ObjTable const& obj, | ||
| 1114 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters); | ||
| 1115 | + size_t tableSize(); | ||
| 1116 | + | ||
| 1086 | // Get lists of all objects in order according to the part of a linearized file that they belong | 1117 | // Get lists of all objects in order according to the part of a linearized file that they belong |
| 1087 | // to. | 1118 | // to. |
| 1088 | void getLinearizedParts( | 1119 | void getLinearizedParts( |
| 1089 | - std::map<int, int> const& object_stream_data, | 1120 | + QPDFWriter::ObjTable const& obj, |
| 1090 | std::vector<QPDFObjectHandle>& part4, | 1121 | std::vector<QPDFObjectHandle>& part4, |
| 1091 | std::vector<QPDFObjectHandle>& part6, | 1122 | std::vector<QPDFObjectHandle>& part6, |
| 1092 | std::vector<QPDFObjectHandle>& part7, | 1123 | std::vector<QPDFObjectHandle>& part7, |
| @@ -1094,19 +1125,18 @@ class QPDF | @@ -1094,19 +1125,18 @@ class QPDF | ||
| 1094 | std::vector<QPDFObjectHandle>& part9); | 1125 | std::vector<QPDFObjectHandle>& part9); |
| 1095 | 1126 | ||
| 1096 | void generateHintStream( | 1127 | void generateHintStream( |
| 1097 | - std::map<int, QPDFXRefEntry> const& xref, | ||
| 1098 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1099 | - std::map<int, int> const& obj_renumber, | 1128 | + QPDFWriter::NewObjTable const& new_obj, |
| 1129 | + QPDFWriter::ObjTable const& obj, | ||
| 1100 | std::shared_ptr<Buffer>& hint_stream, | 1130 | std::shared_ptr<Buffer>& hint_stream, |
| 1101 | int& S, | 1131 | int& S, |
| 1102 | int& O, | 1132 | int& O, |
| 1103 | bool compressed); | 1133 | bool compressed); |
| 1104 | 1134 | ||
| 1105 | - // Map object to object stream that contains it | ||
| 1106 | - void getObjectStreamData(std::map<int, int>&); | ||
| 1107 | - | ||
| 1108 | // Get a list of objects that would be permitted in an object stream. | 1135 | // Get a list of objects that would be permitted in an object stream. |
| 1109 | - std::vector<QPDFObjGen> getCompressibleObjGens(); | 1136 | + template <typename T> |
| 1137 | + std::vector<T> getCompressibleObjGens(); | ||
| 1138 | + std::vector<QPDFObjGen> getCompressibleObjVector(); | ||
| 1139 | + std::vector<bool> getCompressibleObjSet(); | ||
| 1110 | 1140 | ||
| 1111 | // methods to support page handling | 1141 | // methods to support page handling |
| 1112 | 1142 | ||
| @@ -1352,6 +1382,7 @@ class QPDF | @@ -1352,6 +1382,7 @@ class QPDF | ||
| 1352 | qpdf_offset_t getLinearizationOffset(QPDFObjGen const&); | 1382 | qpdf_offset_t getLinearizationOffset(QPDFObjGen const&); |
| 1353 | QPDFObjectHandle | 1383 | QPDFObjectHandle |
| 1354 | getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data); | 1384 | getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data); |
| 1385 | + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); | ||
| 1355 | int lengthNextN(int first_object, int n); | 1386 | int lengthNextN(int first_object, int n); |
| 1356 | void | 1387 | void |
| 1357 | checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj); | 1388 | checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj); |
| @@ -1362,28 +1393,23 @@ class QPDF | @@ -1362,28 +1393,23 @@ class QPDF | ||
| 1362 | void dumpHSharedObject(); | 1393 | void dumpHSharedObject(); |
| 1363 | void dumpHGeneric(HGeneric&); | 1394 | void dumpHGeneric(HGeneric&); |
| 1364 | qpdf_offset_t adjusted_offset(qpdf_offset_t offset); | 1395 | qpdf_offset_t adjusted_offset(qpdf_offset_t offset); |
| 1365 | - void calculateLinearizationData(std::map<int, int> const& object_stream_data); | 1396 | + template <typename T> |
| 1397 | + void calculateLinearizationData(T const& object_stream_data); | ||
| 1398 | + template <typename T> | ||
| 1366 | void pushOutlinesToPart( | 1399 | void pushOutlinesToPart( |
| 1367 | std::vector<QPDFObjectHandle>& part, | 1400 | std::vector<QPDFObjectHandle>& part, |
| 1368 | std::set<QPDFObjGen>& lc_outlines, | 1401 | std::set<QPDFObjGen>& lc_outlines, |
| 1369 | - std::map<int, int> const& object_stream_data); | 1402 | + T const& object_stream_data); |
| 1370 | int outputLengthNextN( | 1403 | int outputLengthNextN( |
| 1371 | int in_object, | 1404 | int in_object, |
| 1372 | int n, | 1405 | int n, |
| 1373 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1374 | - std::map<int, int> const& obj_renumber); | ||
| 1375 | - void calculateHPageOffset( | ||
| 1376 | - std::map<int, QPDFXRefEntry> const& xref, | ||
| 1377 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1378 | - std::map<int, int> const& obj_renumber); | ||
| 1379 | - void calculateHSharedObject( | ||
| 1380 | - std::map<int, QPDFXRefEntry> const& xref, | ||
| 1381 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1382 | - std::map<int, int> const& obj_renumber); | ||
| 1383 | - void calculateHOutline( | ||
| 1384 | - std::map<int, QPDFXRefEntry> const& xref, | ||
| 1385 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1386 | - std::map<int, int> const& obj_renumber); | 1406 | + QPDFWriter::NewObjTable const& new_obj, |
| 1407 | + QPDFWriter::ObjTable const& obj); | ||
| 1408 | + void | ||
| 1409 | + calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); | ||
| 1410 | + void | ||
| 1411 | + calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); | ||
| 1412 | + void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); | ||
| 1387 | void writeHPageOffset(BitWriter&); | 1413 | void writeHPageOffset(BitWriter&); |
| 1388 | void writeHSharedObject(BitWriter&); | 1414 | void writeHSharedObject(BitWriter&); |
| 1389 | void writeHGeneric(BitWriter&, HGeneric&); | 1415 | void writeHGeneric(BitWriter&, HGeneric&); |
| @@ -1407,6 +1433,7 @@ class QPDF | @@ -1407,6 +1433,7 @@ class QPDF | ||
| 1407 | QPDFObjGen::set& visited, | 1433 | QPDFObjGen::set& visited, |
| 1408 | bool top); | 1434 | bool top); |
| 1409 | void filterCompressedObjects(std::map<int, int> const& object_stream_data); | 1435 | void filterCompressedObjects(std::map<int, int> const& object_stream_data); |
| 1436 | + void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); | ||
| 1410 | 1437 | ||
| 1411 | // JSON import | 1438 | // JSON import |
| 1412 | void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); | 1439 | void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); |
include/qpdf/QPDFWriter.hh
| @@ -437,6 +437,12 @@ class QPDFWriter | @@ -437,6 +437,12 @@ class QPDFWriter | ||
| 437 | QPDF_DLL | 437 | QPDF_DLL |
| 438 | std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable(); | 438 | std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable(); |
| 439 | 439 | ||
| 440 | + // The following structs / classes are not part of the public API. | ||
| 441 | + struct Object; | ||
| 442 | + struct NewObject; | ||
| 443 | + class ObjTable; | ||
| 444 | + class NewObjTable; | ||
| 445 | + | ||
| 440 | private: | 446 | private: |
| 441 | // flags used by unparseObject | 447 | // flags used by unparseObject |
| 442 | static int const f_stream = 1 << 0; | 448 | static int const f_stream = 1 << 0; |
| @@ -550,6 +556,7 @@ class QPDFWriter | @@ -550,6 +556,7 @@ class QPDFWriter | ||
| 550 | void writeLinearized(); | 556 | void writeLinearized(); |
| 551 | void enqueuePart(std::vector<QPDFObjectHandle>& part); | 557 | void enqueuePart(std::vector<QPDFObjectHandle>& part); |
| 552 | void writeEncryptionDictionary(); | 558 | void writeEncryptionDictionary(); |
| 559 | + void initializeTables(size_t extra = 0); | ||
| 553 | void doWriteSetup(); | 560 | void doWriteSetup(); |
| 554 | void writeHeader(); | 561 | void writeHeader(); |
| 555 | void writeHintStream(int hint_id); | 562 | void writeHintStream(int hint_id); |
| @@ -604,98 +611,7 @@ class QPDFWriter | @@ -604,98 +611,7 @@ class QPDFWriter | ||
| 604 | void pushMD5Pipeline(PipelinePopper&); | 611 | void pushMD5Pipeline(PipelinePopper&); |
| 605 | void computeDeterministicIDData(); | 612 | void computeDeterministicIDData(); |
| 606 | 613 | ||
| 607 | - void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out); | ||
| 608 | - | ||
| 609 | - class Members | ||
| 610 | - { | ||
| 611 | - friend class QPDFWriter; | ||
| 612 | - | ||
| 613 | - public: | ||
| 614 | - QPDF_DLL | ||
| 615 | - ~Members(); | ||
| 616 | - | ||
| 617 | - private: | ||
| 618 | - Members(QPDF& pdf); | ||
| 619 | - Members(Members const&) = delete; | ||
| 620 | - | ||
| 621 | - QPDF& pdf; | ||
| 622 | - QPDFObjGen root_og{-1, 0}; | ||
| 623 | - char const* filename{"unspecified"}; | ||
| 624 | - FILE* file{nullptr}; | ||
| 625 | - bool close_file{false}; | ||
| 626 | - Pl_Buffer* buffer_pipeline{nullptr}; | ||
| 627 | - Buffer* output_buffer{nullptr}; | ||
| 628 | - bool normalize_content_set{false}; | ||
| 629 | - bool normalize_content{false}; | ||
| 630 | - bool compress_streams{true}; | ||
| 631 | - bool compress_streams_set{false}; | ||
| 632 | - qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none}; | ||
| 633 | - bool stream_decode_level_set{false}; | ||
| 634 | - bool recompress_flate{false}; | ||
| 635 | - bool qdf_mode{false}; | ||
| 636 | - bool preserve_unreferenced_objects{false}; | ||
| 637 | - bool newline_before_endstream{false}; | ||
| 638 | - bool static_id{false}; | ||
| 639 | - bool suppress_original_object_ids{false}; | ||
| 640 | - bool direct_stream_lengths{true}; | ||
| 641 | - bool encrypted{false}; | ||
| 642 | - bool preserve_encryption{true}; | ||
| 643 | - bool linearized{false}; | ||
| 644 | - bool pclm{false}; | ||
| 645 | - qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; | ||
| 646 | - std::string encryption_key; | ||
| 647 | - bool encrypt_metadata{true}; | ||
| 648 | - bool encrypt_use_aes{false}; | ||
| 649 | - std::map<std::string, std::string> encryption_dictionary; | ||
| 650 | - int encryption_V{0}; | ||
| 651 | - int encryption_R{0}; | ||
| 652 | - | ||
| 653 | - std::string id1; // for /ID key of | ||
| 654 | - std::string id2; // trailer dictionary | ||
| 655 | - std::string final_pdf_version; | ||
| 656 | - int final_extension_level{0}; | ||
| 657 | - std::string min_pdf_version; | ||
| 658 | - int min_extension_level{0}; | ||
| 659 | - std::string forced_pdf_version; | ||
| 660 | - int forced_extension_level{0}; | ||
| 661 | - std::string extra_header_text; | ||
| 662 | - int encryption_dict_objid{0}; | ||
| 663 | - std::string cur_data_key; | ||
| 664 | - std::list<std::shared_ptr<Pipeline>> to_delete; | ||
| 665 | - Pl_Count* pipeline{nullptr}; | ||
| 666 | - std::vector<QPDFObjectHandle> object_queue; | ||
| 667 | - size_t object_queue_front{0}; | ||
| 668 | - std::map<QPDFObjGen, int> obj_renumber; | ||
| 669 | - std::map<int, QPDFXRefEntry> xref; | ||
| 670 | - std::map<int, qpdf_offset_t> lengths; | ||
| 671 | - int next_objid{1}; | ||
| 672 | - int cur_stream_length_id{0}; | ||
| 673 | - size_t cur_stream_length{0}; | ||
| 674 | - bool added_newline{false}; | ||
| 675 | - int max_ostream_index{0}; | ||
| 676 | - std::set<QPDFObjGen> normalized_streams; | ||
| 677 | - std::map<QPDFObjGen, int> page_object_to_seq; | ||
| 678 | - std::map<QPDFObjGen, int> contents_to_page_seq; | ||
| 679 | - std::map<QPDFObjGen, int> object_to_object_stream; | ||
| 680 | - std::map<int, std::set<QPDFObjGen>> object_stream_to_objects; | ||
| 681 | - std::list<Pipeline*> pipeline_stack; | ||
| 682 | - unsigned long long next_stack_id{0}; | ||
| 683 | - bool deterministic_id{false}; | ||
| 684 | - Pl_MD5* md5_pipeline{nullptr}; | ||
| 685 | - std::string deterministic_id_data; | ||
| 686 | - bool did_write_setup{false}; | ||
| 687 | - | ||
| 688 | - // For linearization only | ||
| 689 | - std::string lin_pass1_filename; | ||
| 690 | - std::map<int, int> obj_renumber_no_gen; | ||
| 691 | - std::map<int, int> object_to_object_stream_no_gen; | ||
| 692 | - | ||
| 693 | - // For progress reporting | ||
| 694 | - std::shared_ptr<ProgressReporter> progress_reporter; | ||
| 695 | - int events_expected{0}; | ||
| 696 | - int events_seen{0}; | ||
| 697 | - int next_progress_report{0}; | ||
| 698 | - }; | 614 | + class Members; |
| 699 | 615 | ||
| 700 | // Keep all member variables inside the Members object, which we dynamically allocate. This | 616 | // Keep all member variables inside the Members object, which we dynamically allocate. This |
| 701 | // makes it possible to add new private members without breaking binary compatibility. | 617 | // makes it possible to add new private members without breaking binary compatibility. |
libqpdf/QPDF.cc
| @@ -2370,6 +2370,12 @@ QPDF::getRoot() | @@ -2370,6 +2370,12 @@ QPDF::getRoot() | ||
| 2370 | std::map<QPDFObjGen, QPDFXRefEntry> | 2370 | std::map<QPDFObjGen, QPDFXRefEntry> |
| 2371 | QPDF::getXRefTable() | 2371 | QPDF::getXRefTable() |
| 2372 | { | 2372 | { |
| 2373 | + return getXRefTableInternal(); | ||
| 2374 | +} | ||
| 2375 | + | ||
| 2376 | +std::map<QPDFObjGen, QPDFXRefEntry> const& | ||
| 2377 | +QPDF::getXRefTableInternal() | ||
| 2378 | +{ | ||
| 2373 | if (!m->parsed) { | 2379 | if (!m->parsed) { |
| 2374 | throw std::logic_error("QPDF::getXRefTable called before parsing."); | 2380 | throw std::logic_error("QPDF::getXRefTable called before parsing."); |
| 2375 | } | 2381 | } |
| @@ -2377,19 +2383,33 @@ QPDF::getXRefTable() | @@ -2377,19 +2383,33 @@ QPDF::getXRefTable() | ||
| 2377 | return m->xref_table; | 2383 | return m->xref_table; |
| 2378 | } | 2384 | } |
| 2379 | 2385 | ||
| 2380 | -void | ||
| 2381 | -QPDF::getObjectStreamData(std::map<int, int>& omap) | 2386 | +size_t |
| 2387 | +QPDF::tableSize() | ||
| 2382 | { | 2388 | { |
| 2383 | - for (auto const& iter: m->xref_table) { | ||
| 2384 | - QPDFObjGen const& og = iter.first; | ||
| 2385 | - QPDFXRefEntry const& entry = iter.second; | ||
| 2386 | - if (entry.getType() == 2) { | ||
| 2387 | - omap[og.getObj()] = entry.getObjStreamNumber(); | ||
| 2388 | - } | 2389 | + // If obj_cache is dense, accommodate all object in tables,else accommodate only original |
| 2390 | + // objects. | ||
| 2391 | + auto max_xref = m->xref_table.size() ? m->xref_table.crbegin()->first.getObj() : 0; | ||
| 2392 | + auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0; | ||
| 2393 | + if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) { | ||
| 2394 | + return toS(++max_obj); | ||
| 2389 | } | 2395 | } |
| 2396 | + return toS(++max_xref); | ||
| 2390 | } | 2397 | } |
| 2391 | 2398 | ||
| 2392 | std::vector<QPDFObjGen> | 2399 | std::vector<QPDFObjGen> |
| 2400 | +QPDF::getCompressibleObjVector() | ||
| 2401 | +{ | ||
| 2402 | + return getCompressibleObjGens<QPDFObjGen>(); | ||
| 2403 | +} | ||
| 2404 | + | ||
| 2405 | +std::vector<bool> | ||
| 2406 | +QPDF::getCompressibleObjSet() | ||
| 2407 | +{ | ||
| 2408 | + return getCompressibleObjGens<bool>(); | ||
| 2409 | +} | ||
| 2410 | + | ||
| 2411 | +template <typename T> | ||
| 2412 | +std::vector<T> | ||
| 2393 | QPDF::getCompressibleObjGens() | 2413 | QPDF::getCompressibleObjGens() |
| 2394 | { | 2414 | { |
| 2395 | // Return a list of objects that are allowed to be in object streams. Walk through the objects | 2415 | // Return a list of objects that are allowed to be in object streams. Walk through the objects |
| @@ -2407,7 +2427,14 @@ QPDF::getCompressibleObjGens() | @@ -2407,7 +2427,14 @@ QPDF::getCompressibleObjGens() | ||
| 2407 | std::vector<QPDFObjectHandle> queue; | 2427 | std::vector<QPDFObjectHandle> queue; |
| 2408 | queue.reserve(512); | 2428 | queue.reserve(512); |
| 2409 | queue.push_back(m->trailer); | 2429 | queue.push_back(m->trailer); |
| 2410 | - std::vector<QPDFObjGen> result; | 2430 | + std::vector<T> result; |
| 2431 | + if constexpr (std::is_same_v<T, QPDFObjGen>) { | ||
| 2432 | + result.reserve(m->obj_cache.size()); | ||
| 2433 | + } else if constexpr (std::is_same_v<T, bool>) { | ||
| 2434 | + result.resize(max_obj + 1U, false); | ||
| 2435 | + } else { | ||
| 2436 | + throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens"); | ||
| 2437 | + } | ||
| 2411 | while (!queue.empty()) { | 2438 | while (!queue.empty()) { |
| 2412 | auto obj = queue.back(); | 2439 | auto obj = queue.back(); |
| 2413 | queue.pop_back(); | 2440 | queue.pop_back(); |
| @@ -2439,7 +2466,11 @@ QPDF::getCompressibleObjGens() | @@ -2439,7 +2466,11 @@ QPDF::getCompressibleObjGens() | ||
| 2439 | } else if (!(obj.isStream() || | 2466 | } else if (!(obj.isStream() || |
| 2440 | (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") && | 2467 | (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") && |
| 2441 | obj.hasKey("/Contents")))) { | 2468 | obj.hasKey("/Contents")))) { |
| 2442 | - result.push_back(og); | 2469 | + if constexpr (std::is_same_v<T, QPDFObjGen>) { |
| 2470 | + result.push_back(og); | ||
| 2471 | + } else if constexpr (std::is_same_v<T, bool>) { | ||
| 2472 | + result[id + 1U] = true; | ||
| 2473 | + } | ||
| 2443 | } | 2474 | } |
| 2444 | } | 2475 | } |
| 2445 | if (obj.isStream()) { | 2476 | if (obj.isStream()) { |
libqpdf/QPDFWriter.cc
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | #include <qpdf/qpdf-config.h> // include early for large file support | 3 | #include <qpdf/qpdf-config.h> // include early for large file support |
| 4 | 4 | ||
| 5 | -#include <qpdf/QPDFWriter.hh> | 5 | +#include <qpdf/QPDFWriter_private.hh> |
| 6 | 6 | ||
| 7 | #include <qpdf/MD5.hh> | 7 | #include <qpdf/MD5.hh> |
| 8 | #include <qpdf/Pl_AES_PDF.hh> | 8 | #include <qpdf/Pl_AES_PDF.hh> |
| @@ -1038,7 +1038,7 @@ QPDFWriter::openObject(int objid) | @@ -1038,7 +1038,7 @@ QPDFWriter::openObject(int objid) | ||
| 1038 | if (objid == 0) { | 1038 | if (objid == 0) { |
| 1039 | objid = m->next_objid++; | 1039 | objid = m->next_objid++; |
| 1040 | } | 1040 | } |
| 1041 | - m->xref[objid] = QPDFXRefEntry(m->pipeline->getCount()); | 1041 | + m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount()); |
| 1042 | writeString(std::to_string(objid)); | 1042 | writeString(std::to_string(objid)); |
| 1043 | writeString(" 0 obj\n"); | 1043 | writeString(" 0 obj\n"); |
| 1044 | return objid; | 1044 | return objid; |
| @@ -1050,7 +1050,8 @@ QPDFWriter::closeObject(int objid) | @@ -1050,7 +1050,8 @@ QPDFWriter::closeObject(int objid) | ||
| 1050 | // Write a newline before endobj as it makes the file easier to repair. | 1050 | // Write a newline before endobj as it makes the file easier to repair. |
| 1051 | writeString("\nendobj\n"); | 1051 | writeString("\nendobj\n"); |
| 1052 | writeStringQDF("\n"); | 1052 | writeStringQDF("\n"); |
| 1053 | - m->lengths[objid] = m->pipeline->getCount() - m->xref[objid].getOffset(); | 1053 | + auto& new_obj = m->new_obj[objid]; |
| 1054 | + new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset(); | ||
| 1054 | } | 1055 | } |
| 1055 | 1056 | ||
| 1056 | void | 1057 | void |
| @@ -1064,7 +1065,7 @@ QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og) | @@ -1064,7 +1065,7 @@ QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og) | ||
| 1064 | 1065 | ||
| 1065 | // Reserve numbers for the objects that belong to this object stream. | 1066 | // Reserve numbers for the objects that belong to this object stream. |
| 1066 | for (auto const& iter: m->object_stream_to_objects[objid]) { | 1067 | for (auto const& iter: m->object_stream_to_objects[objid]) { |
| 1067 | - m->obj_renumber[iter] = m->next_objid++; | 1068 | + m->obj[iter].renumber = m->next_objid++; |
| 1068 | } | 1069 | } |
| 1069 | } | 1070 | } |
| 1070 | 1071 | ||
| @@ -1093,18 +1094,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) | @@ -1093,18 +1094,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) | ||
| 1093 | } | 1094 | } |
| 1094 | 1095 | ||
| 1095 | QPDFObjGen og = object.getObjGen(); | 1096 | QPDFObjGen og = object.getObjGen(); |
| 1097 | + auto& obj = m->obj[og]; | ||
| 1096 | 1098 | ||
| 1097 | - if (m->obj_renumber.count(og) == 0) { | ||
| 1098 | - if (m->object_to_object_stream.count(og)) { | 1099 | + if (obj.renumber == 0) { |
| 1100 | + if (obj.object_stream > 0) { | ||
| 1099 | // This is in an object stream. Don't process it here. Instead, enqueue the object | 1101 | // This is in an object stream. Don't process it here. Instead, enqueue the object |
| 1100 | // stream. Object streams always have generation 0. | 1102 | // stream. Object streams always have generation 0. |
| 1101 | - int stream_id = m->object_to_object_stream[og]; | ||
| 1102 | - // Detect loops by storing invalid object ID 0, which will get overwritten later. | ||
| 1103 | - m->obj_renumber[og] = 0; | ||
| 1104 | - enqueueObject(m->pdf.getObjectByID(stream_id, 0)); | 1103 | + // Detect loops by storing invalid object ID -1, which will get overwritten later. |
| 1104 | + obj.renumber = -1; | ||
| 1105 | + enqueueObject(m->pdf.getObject(obj.object_stream, 0)); | ||
| 1105 | } else { | 1106 | } else { |
| 1106 | m->object_queue.push_back(object); | 1107 | m->object_queue.push_back(object); |
| 1107 | - m->obj_renumber[og] = m->next_objid++; | 1108 | + obj.renumber = m->next_objid++; |
| 1108 | 1109 | ||
| 1109 | if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { | 1110 | if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { |
| 1110 | // For linearized files, uncompressed objects go at end, and we take care of | 1111 | // For linearized files, uncompressed objects go at end, and we take care of |
| @@ -1117,7 +1118,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) | @@ -1117,7 +1118,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) | ||
| 1117 | ++m->next_objid; | 1118 | ++m->next_objid; |
| 1118 | } | 1119 | } |
| 1119 | } | 1120 | } |
| 1120 | - } else if (m->obj_renumber[og] == 0) { | 1121 | + } else if (obj.renumber == -1) { |
| 1121 | // This can happen if a specially constructed file indicates that an object stream is | 1122 | // This can happen if a specially constructed file indicates that an object stream is |
| 1122 | // inside itself. | 1123 | // inside itself. |
| 1123 | QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); | 1124 | QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); |
| @@ -1147,9 +1148,7 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) | @@ -1147,9 +1148,7 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) | ||
| 1147 | enqueueObject(child); | 1148 | enqueueObject(child); |
| 1148 | } | 1149 | } |
| 1149 | if (child.isIndirect()) { | 1150 | if (child.isIndirect()) { |
| 1150 | - QPDFObjGen old_og = child.getObjGen(); | ||
| 1151 | - int new_id = m->obj_renumber[old_og]; | ||
| 1152 | - writeString(std::to_string(new_id)); | 1151 | + writeString(std::to_string(m->obj[child].renumber)); |
| 1153 | writeString(" 0 R"); | 1152 | writeString(" 0 R"); |
| 1154 | } else { | 1153 | } else { |
| 1155 | unparseObject(child, level, flags); | 1154 | unparseObject(child, level, flags); |
| @@ -1527,9 +1526,8 @@ QPDFWriter::unparseObject( | @@ -1527,9 +1526,8 @@ QPDFWriter::unparseObject( | ||
| 1527 | writeString(">>"); | 1526 | writeString(">>"); |
| 1528 | } else if (tc == ::ot_stream) { | 1527 | } else if (tc == ::ot_stream) { |
| 1529 | // Write stream data to a buffer. | 1528 | // Write stream data to a buffer. |
| 1530 | - int new_id = m->obj_renumber[old_og]; | ||
| 1531 | if (!m->direct_stream_lengths) { | 1529 | if (!m->direct_stream_lengths) { |
| 1532 | - m->cur_stream_length_id = new_id + 1; | 1530 | + m->cur_stream_length_id = m->obj[old_og].renumber + 1; |
| 1533 | } | 1531 | } |
| 1534 | 1532 | ||
| 1535 | flags |= f_stream; | 1533 | flags |= f_stream; |
| @@ -1626,7 +1624,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) | @@ -1626,7 +1624,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) | ||
| 1626 | QPDFObjGen old_og = object.getObjGen(); | 1624 | QPDFObjGen old_og = object.getObjGen(); |
| 1627 | qpdf_assert_debug(old_og.getGen() == 0); | 1625 | qpdf_assert_debug(old_og.getGen() == 0); |
| 1628 | int old_id = old_og.getObj(); | 1626 | int old_id = old_og.getObj(); |
| 1629 | - int new_id = m->obj_renumber[old_og]; | 1627 | + int new_stream_id = m->obj[old_og].renumber; |
| 1630 | 1628 | ||
| 1631 | std::vector<qpdf_offset_t> offsets; | 1629 | std::vector<qpdf_offset_t> offsets; |
| 1632 | qpdf_offset_t first = 0; | 1630 | qpdf_offset_t first = 0; |
| @@ -1670,7 +1668,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) | @@ -1670,7 +1668,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) | ||
| 1670 | int count = -1; | 1668 | int count = -1; |
| 1671 | for (auto const& obj: m->object_stream_to_objects[old_id]) { | 1669 | for (auto const& obj: m->object_stream_to_objects[old_id]) { |
| 1672 | ++count; | 1670 | ++count; |
| 1673 | - int new_obj = m->obj_renumber[obj]; | 1671 | + int new_obj = m->obj[obj].renumber; |
| 1674 | if (first_obj == -1) { | 1672 | if (first_obj == -1) { |
| 1675 | first_obj = new_obj; | 1673 | first_obj = new_obj; |
| 1676 | } | 1674 | } |
| @@ -1706,13 +1704,13 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) | @@ -1706,13 +1704,13 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) | ||
| 1706 | } | 1704 | } |
| 1707 | writeObject(obj_to_write, count); | 1705 | writeObject(obj_to_write, count); |
| 1708 | 1706 | ||
| 1709 | - m->xref[new_obj] = QPDFXRefEntry(new_id, count); | 1707 | + m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count); |
| 1710 | } | 1708 | } |
| 1711 | } | 1709 | } |
| 1712 | 1710 | ||
| 1713 | // Write the object | 1711 | // Write the object |
| 1714 | - openObject(new_id); | ||
| 1715 | - setDataKey(new_id); | 1712 | + openObject(new_stream_id); |
| 1713 | + setDataKey(new_stream_id); | ||
| 1716 | writeString("<<"); | 1714 | writeString("<<"); |
| 1717 | writeStringQDF("\n "); | 1715 | writeStringQDF("\n "); |
| 1718 | writeString(" /Type /ObjStm"); | 1716 | writeString(" /Type /ObjStm"); |
| @@ -1754,7 +1752,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) | @@ -1754,7 +1752,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) | ||
| 1754 | } | 1752 | } |
| 1755 | writeString("endstream"); | 1753 | writeString("endstream"); |
| 1756 | m->cur_data_key.clear(); | 1754 | m->cur_data_key.clear(); |
| 1757 | - closeObject(new_id); | 1755 | + closeObject(new_stream_id); |
| 1758 | } | 1756 | } |
| 1759 | 1757 | ||
| 1760 | void | 1758 | void |
| @@ -1769,7 +1767,7 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) | @@ -1769,7 +1767,7 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) | ||
| 1769 | } | 1767 | } |
| 1770 | 1768 | ||
| 1771 | indicateProgress(false, false); | 1769 | indicateProgress(false, false); |
| 1772 | - int new_id = m->obj_renumber[old_og]; | 1770 | + auto new_id = m->obj[old_og].renumber; |
| 1773 | if (m->qdf_mode) { | 1771 | if (m->qdf_mode) { |
| 1774 | if (m->page_object_to_seq.count(old_og)) { | 1772 | if (m->page_object_to_seq.count(old_og)) { |
| 1775 | writeString("%% Page "); | 1773 | writeString("%% Page "); |
| @@ -1938,11 +1936,7 @@ QPDFWriter::initializeSpecialStreams() | @@ -1938,11 +1936,7 @@ QPDFWriter::initializeSpecialStreams() | ||
| 1938 | void | 1936 | void |
| 1939 | QPDFWriter::preserveObjectStreams() | 1937 | QPDFWriter::preserveObjectStreams() |
| 1940 | { | 1938 | { |
| 1941 | - std::map<int, int> omap; | ||
| 1942 | - QPDF::Writer::getObjectStreamData(m->pdf, omap); | ||
| 1943 | - if (omap.empty()) { | ||
| 1944 | - return; | ||
| 1945 | - } | 1939 | + auto const& xref = QPDF::Writer::getXRefTable(m->pdf); |
| 1946 | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object | 1940 | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object |
| 1947 | // streams out of old objects that have generation numbers greater than zero. However in an | 1941 | // streams out of old objects that have generation numbers greater than zero. However in an |
| 1948 | // existing PDF, all object stream objects and all objects in them must have generation 0 | 1942 | // existing PDF, all object stream objects and all objects in them must have generation 0 |
| @@ -1950,20 +1944,43 @@ QPDFWriter::preserveObjectStreams() | @@ -1950,20 +1944,43 @@ QPDFWriter::preserveObjectStreams() | ||
| 1950 | // that are not allowed to be in object streams. In addition to removing objects that were | 1944 | // that are not allowed to be in object streams. In addition to removing objects that were |
| 1951 | // erroneously included in object streams in the source PDF, it also prevents unreferenced | 1945 | // erroneously included in object streams in the source PDF, it also prevents unreferenced |
| 1952 | // objects from being included. | 1946 | // objects from being included. |
| 1953 | - std::set<QPDFObjGen> eligible; | ||
| 1954 | - if (!m->preserve_unreferenced_objects) { | ||
| 1955 | - std::vector<QPDFObjGen> eligible_v = QPDF::Writer::getCompressibleObjGens(m->pdf); | ||
| 1956 | - eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end()); | ||
| 1957 | - } | ||
| 1958 | - QTC::TC("qpdf", "QPDFWriter preserve object streams", m->preserve_unreferenced_objects ? 0 : 1); | ||
| 1959 | - for (auto iter: omap) { | ||
| 1960 | - QPDFObjGen og(iter.first, 0); | ||
| 1961 | - if (eligible.count(og) || m->preserve_unreferenced_objects) { | ||
| 1962 | - m->object_to_object_stream[og] = iter.second; | ||
| 1963 | - } else { | ||
| 1964 | - QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | 1947 | + auto iter = xref.cbegin(); |
| 1948 | + auto end = xref.cend(); | ||
| 1949 | + | ||
| 1950 | + // Start by scanning for first compressed object in case we don't have any object streams to | ||
| 1951 | + // process. | ||
| 1952 | + for (; iter != end; ++iter) { | ||
| 1953 | + if (iter->second.getType() == 2) { | ||
| 1954 | + // Pdf contains object streams. | ||
| 1955 | + QTC::TC( | ||
| 1956 | + "qpdf", | ||
| 1957 | + "QPDFWriter preserve object streams", | ||
| 1958 | + m->preserve_unreferenced_objects ? 0 : 1); | ||
| 1959 | + | ||
| 1960 | + if (m->preserve_unreferenced_objects) { | ||
| 1961 | + for (; iter != end; ++iter) { | ||
| 1962 | + if (iter->second.getType() == 2) { | ||
| 1963 | + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); | ||
| 1964 | + } | ||
| 1965 | + } | ||
| 1966 | + } else { | ||
| 1967 | + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); | ||
| 1968 | + for (; iter != end; ++iter) { | ||
| 1969 | + if (iter->second.getType() == 2) { | ||
| 1970 | + auto id = static_cast<size_t>(iter->first.getObj()); | ||
| 1971 | + if (id < eligible.size() && eligible[id]) { | ||
| 1972 | + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); | ||
| 1973 | + } else { | ||
| 1974 | + QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | ||
| 1975 | + } | ||
| 1976 | + } | ||
| 1977 | + } | ||
| 1978 | + } | ||
| 1979 | + return; | ||
| 1965 | } | 1980 | } |
| 1966 | } | 1981 | } |
| 1982 | + // No compressed objects found. | ||
| 1983 | + m->obj.streams_empty = true; | ||
| 1967 | } | 1984 | } |
| 1968 | 1985 | ||
| 1969 | void | 1986 | void |
| @@ -1979,7 +1996,10 @@ QPDFWriter::generateObjectStreams() | @@ -1979,7 +1996,10 @@ QPDFWriter::generateObjectStreams() | ||
| 1979 | 1996 | ||
| 1980 | std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); | 1997 | std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); |
| 1981 | size_t n_object_streams = (eligible.size() + 99U) / 100U; | 1998 | size_t n_object_streams = (eligible.size() + 99U) / 100U; |
| 1999 | + | ||
| 2000 | + initializeTables(2U * n_object_streams); | ||
| 1982 | if (n_object_streams == 0) { | 2001 | if (n_object_streams == 0) { |
| 2002 | + m->obj.streams_empty = true; | ||
| 1983 | return; | 2003 | return; |
| 1984 | } | 2004 | } |
| 1985 | size_t n_per = eligible.size() / n_object_streams; | 2005 | size_t n_per = eligible.size() / n_object_streams; |
| @@ -1987,20 +2007,18 @@ QPDFWriter::generateObjectStreams() | @@ -1987,20 +2007,18 @@ QPDFWriter::generateObjectStreams() | ||
| 1987 | ++n_per; | 2007 | ++n_per; |
| 1988 | } | 2008 | } |
| 1989 | unsigned int n = 0; | 2009 | unsigned int n = 0; |
| 1990 | - int cur_ostream = 0; | ||
| 1991 | - for (auto const& iter: eligible) { | ||
| 1992 | - if ((n % n_per) == 0) { | ||
| 1993 | - if (n > 0) { | ||
| 1994 | - QTC::TC("qpdf", "QPDFWriter generate >1 ostream"); | ||
| 1995 | - } | 2010 | + int cur_ostream = m->pdf.newIndirectNull().getObjectID(); |
| 2011 | + for (auto const& item: eligible) { | ||
| 2012 | + if (n == n_per) { | ||
| 2013 | + QTC::TC("qpdf", "QPDFWriter generate >1 ostream"); | ||
| 1996 | n = 0; | 2014 | n = 0; |
| 1997 | - } | ||
| 1998 | - if (n == 0) { | ||
| 1999 | // Construct a new null object as the "original" object stream. The rest of the code | 2015 | // Construct a new null object as the "original" object stream. The rest of the code |
| 2000 | // knows that this means we're creating the object stream from scratch. | 2016 | // knows that this means we're creating the object stream from scratch. |
| 2001 | - cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); | 2017 | + cur_ostream = m->pdf.newIndirectNull().getObjectID(); |
| 2002 | } | 2018 | } |
| 2003 | - m->object_to_object_stream[iter] = cur_ostream; | 2019 | + auto& obj = m->obj[item]; |
| 2020 | + obj.object_stream = cur_ostream; | ||
| 2021 | + obj.gen = item.getGen(); | ||
| 2004 | ++n; | 2022 | ++n; |
| 2005 | } | 2023 | } |
| 2006 | } | 2024 | } |
| @@ -2056,6 +2074,14 @@ QPDFWriter::prepareFileForWrite() | @@ -2056,6 +2074,14 @@ QPDFWriter::prepareFileForWrite() | ||
| 2056 | } | 2074 | } |
| 2057 | 2075 | ||
| 2058 | void | 2076 | void |
| 2077 | +QPDFWriter::initializeTables(size_t extra) | ||
| 2078 | +{ | ||
| 2079 | + auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra; | ||
| 2080 | + m->obj.initialize(size); | ||
| 2081 | + m->new_obj.initialize(size); | ||
| 2082 | +} | ||
| 2083 | + | ||
| 2084 | +void | ||
| 2059 | QPDFWriter::doWriteSetup() | 2085 | QPDFWriter::doWriteSetup() |
| 2060 | { | 2086 | { |
| 2061 | if (m->did_write_setup) { | 2087 | if (m->did_write_setup) { |
| @@ -2124,10 +2150,12 @@ QPDFWriter::doWriteSetup() | @@ -2124,10 +2150,12 @@ QPDFWriter::doWriteSetup() | ||
| 2124 | 2150 | ||
| 2125 | switch (m->object_stream_mode) { | 2151 | switch (m->object_stream_mode) { |
| 2126 | case qpdf_o_disable: | 2152 | case qpdf_o_disable: |
| 2127 | - // no action required | 2153 | + initializeTables(); |
| 2154 | + m->obj.streams_empty = true; | ||
| 2128 | break; | 2155 | break; |
| 2129 | 2156 | ||
| 2130 | case qpdf_o_preserve: | 2157 | case qpdf_o_preserve: |
| 2158 | + initializeTables(); | ||
| 2131 | preserveObjectStreams(); | 2159 | preserveObjectStreams(); |
| 2132 | break; | 2160 | break; |
| 2133 | 2161 | ||
| @@ -2138,39 +2166,45 @@ QPDFWriter::doWriteSetup() | @@ -2138,39 +2166,45 @@ QPDFWriter::doWriteSetup() | ||
| 2138 | // no default so gcc will warn for missing case tag | 2166 | // no default so gcc will warn for missing case tag |
| 2139 | } | 2167 | } |
| 2140 | 2168 | ||
| 2141 | - if (m->linearized) { | ||
| 2142 | - // Page dictionaries are not allowed to be compressed objects. | ||
| 2143 | - for (auto& page: m->pdf.getAllPages()) { | ||
| 2144 | - QPDFObjGen og = page.getObjGen(); | ||
| 2145 | - if (m->object_to_object_stream.count(og)) { | ||
| 2146 | - QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); | ||
| 2147 | - m->object_to_object_stream.erase(og); | 2169 | + if (!m->obj.streams_empty) { |
| 2170 | + if (m->linearized) { | ||
| 2171 | + // Page dictionaries are not allowed to be compressed objects. | ||
| 2172 | + for (auto& page: m->pdf.getAllPages()) { | ||
| 2173 | + if (m->obj[page].object_stream > 0) { | ||
| 2174 | + QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); | ||
| 2175 | + m->obj[page].object_stream = 0; | ||
| 2176 | + } | ||
| 2148 | } | 2177 | } |
| 2149 | } | 2178 | } |
| 2150 | - } | ||
| 2151 | 2179 | ||
| 2152 | - if (m->linearized || m->encrypted) { | ||
| 2153 | - // The document catalog is not allowed to be compressed in linearized files either. It also | ||
| 2154 | - // appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to handle | ||
| 2155 | - // encrypted files with compressed document catalogs, so we disable them in that case as | ||
| 2156 | - // well. | ||
| 2157 | - if (m->object_to_object_stream.count(m->root_og)) { | ||
| 2158 | - QTC::TC("qpdf", "QPDFWriter uncompressing root"); | ||
| 2159 | - m->object_to_object_stream.erase(m->root_og); | 2180 | + if (m->linearized || m->encrypted) { |
| 2181 | + // The document catalog is not allowed to be compressed in linearized files either. It | ||
| 2182 | + // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to | ||
| 2183 | + // handle encrypted files with compressed document catalogs, so we disable them in that | ||
| 2184 | + // case as well. | ||
| 2185 | + if (m->obj[m->root_og].object_stream > 0) { | ||
| 2186 | + QTC::TC("qpdf", "QPDFWriter uncompressing root"); | ||
| 2187 | + m->obj[m->root_og].object_stream = 0; | ||
| 2188 | + } | ||
| 2160 | } | 2189 | } |
| 2161 | - } | ||
| 2162 | 2190 | ||
| 2163 | - // Generate reverse mapping from object stream to objects | ||
| 2164 | - for (auto const& iter: m->object_to_object_stream) { | ||
| 2165 | - QPDFObjGen const& obj = iter.first; | ||
| 2166 | - int stream = iter.second; | ||
| 2167 | - m->object_stream_to_objects[stream].insert(obj); | ||
| 2168 | - m->max_ostream_index = std::max( | ||
| 2169 | - m->max_ostream_index, QIntC::to_int(m->object_stream_to_objects[stream].size()) - 1); | ||
| 2170 | - } | 2191 | + // Generate reverse mapping from object stream to objects |
| 2192 | + m->obj.forEach([this](auto id, auto const& item) -> void { | ||
| 2193 | + if (item.object_stream > 0) { | ||
| 2194 | + auto& vec = m->object_stream_to_objects[item.object_stream]; | ||
| 2195 | + vec.emplace_back(id, item.gen); | ||
| 2196 | + if (m->max_ostream_index < vec.size()) { | ||
| 2197 | + ++m->max_ostream_index; | ||
| 2198 | + } | ||
| 2199 | + } | ||
| 2200 | + }); | ||
| 2201 | + --m->max_ostream_index; | ||
| 2171 | 2202 | ||
| 2172 | - if (!m->object_stream_to_objects.empty()) { | ||
| 2173 | - setMinimumPDFVersion("1.5"); | 2203 | + if (m->object_stream_to_objects.empty()) { |
| 2204 | + m->obj.streams_empty = true; | ||
| 2205 | + } else { | ||
| 2206 | + setMinimumPDFVersion("1.5"); | ||
| 2207 | + } | ||
| 2174 | } | 2208 | } |
| 2175 | 2209 | ||
| 2176 | setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); | 2210 | setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); |
| @@ -2215,7 +2249,7 @@ QPDFWriter::write() | @@ -2215,7 +2249,7 @@ QPDFWriter::write() | ||
| 2215 | QPDFObjGen | 2249 | QPDFObjGen |
| 2216 | QPDFWriter::getRenumberedObjGen(QPDFObjGen og) | 2250 | QPDFWriter::getRenumberedObjGen(QPDFObjGen og) |
| 2217 | { | 2251 | { |
| 2218 | - return QPDFObjGen(m->obj_renumber[og], 0); | 2252 | + return QPDFObjGen(m->obj[og].renumber, 0); |
| 2219 | } | 2253 | } |
| 2220 | 2254 | ||
| 2221 | std::map<QPDFObjGen, QPDFXRefEntry> | 2255 | std::map<QPDFObjGen, QPDFXRefEntry> |
| @@ -2223,12 +2257,12 @@ QPDFWriter::getWrittenXRefTable() | @@ -2223,12 +2257,12 @@ QPDFWriter::getWrittenXRefTable() | ||
| 2223 | { | 2257 | { |
| 2224 | std::map<QPDFObjGen, QPDFXRefEntry> result; | 2258 | std::map<QPDFObjGen, QPDFXRefEntry> result; |
| 2225 | 2259 | ||
| 2226 | - for (auto const& iter: m->xref) { | ||
| 2227 | - if (iter.first != 0 && iter.second.getType() != 0) { | ||
| 2228 | - result[QPDFObjGen(iter.first, 0)] = iter.second; | 2260 | + auto it = result.begin(); |
| 2261 | + m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void { | ||
| 2262 | + if (item.xref.getType() != 0) { | ||
| 2263 | + it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref); | ||
| 2229 | } | 2264 | } |
| 2230 | - } | ||
| 2231 | - | 2265 | + }); |
| 2232 | return result; | 2266 | return result; |
| 2233 | } | 2267 | } |
| 2234 | 2268 | ||
| @@ -2290,8 +2324,7 @@ QPDFWriter::writeHintStream(int hint_id) | @@ -2290,8 +2324,7 @@ QPDFWriter::writeHintStream(int hint_id) | ||
| 2290 | int S = 0; | 2324 | int S = 0; |
| 2291 | int O = 0; | 2325 | int O = 0; |
| 2292 | bool compressed = (m->compress_streams && !m->qdf_mode); | 2326 | bool compressed = (m->compress_streams && !m->qdf_mode); |
| 2293 | - QPDF::Writer::generateHintStream( | ||
| 2294 | - m->pdf, m->xref, m->lengths, m->obj_renumber_no_gen, hint_buffer, S, O, compressed); | 2327 | + QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed); |
| 2295 | 2328 | ||
| 2296 | openObject(hint_id); | 2329 | openObject(hint_id); |
| 2297 | setDataKey(hint_id); | 2330 | setDataKey(hint_id); |
| @@ -2364,7 +2397,7 @@ QPDFWriter::writeXRefTable( | @@ -2364,7 +2397,7 @@ QPDFWriter::writeXRefTable( | ||
| 2364 | } else { | 2397 | } else { |
| 2365 | qpdf_offset_t offset = 0; | 2398 | qpdf_offset_t offset = 0; |
| 2366 | if (!suppress_offsets) { | 2399 | if (!suppress_offsets) { |
| 2367 | - offset = m->xref[i].getOffset(); | 2400 | + offset = m->new_obj[i].xref.getOffset(); |
| 2368 | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { | 2401 | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
| 2369 | offset += hint_length; | 2402 | offset += hint_length; |
| 2370 | } | 2403 | } |
| @@ -2411,13 +2444,13 @@ QPDFWriter::writeXRefStream( | @@ -2411,13 +2444,13 @@ QPDFWriter::writeXRefStream( | ||
| 2411 | unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); | 2444 | unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); |
| 2412 | 2445 | ||
| 2413 | // field 2 contains object stream indices | 2446 | // field 2 contains object stream indices |
| 2414 | - unsigned int f2_size = bytesNeeded(m->max_ostream_index); | 2447 | + unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index)); |
| 2415 | 2448 | ||
| 2416 | unsigned int esize = 1 + f1_size + f2_size; | 2449 | unsigned int esize = 1 + f1_size + f2_size; |
| 2417 | 2450 | ||
| 2418 | // Must store in xref table in advance of writing the actual data rather than waiting for | 2451 | // Must store in xref table in advance of writing the actual data rather than waiting for |
| 2419 | // openObject to do it. | 2452 | // openObject to do it. |
| 2420 | - m->xref[xref_id] = QPDFXRefEntry(m->pipeline->getCount()); | 2453 | + m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount()); |
| 2421 | 2454 | ||
| 2422 | Pipeline* p = pushPipeline(new Pl_Buffer("xref stream")); | 2455 | Pipeline* p = pushPipeline(new Pl_Buffer("xref stream")); |
| 2423 | bool compressed = false; | 2456 | bool compressed = false; |
| @@ -2435,7 +2468,7 @@ QPDFWriter::writeXRefStream( | @@ -2435,7 +2468,7 @@ QPDFWriter::writeXRefStream( | ||
| 2435 | PipelinePopper pp_xref(this, &xref_data); | 2468 | PipelinePopper pp_xref(this, &xref_data); |
| 2436 | activatePipelineStack(pp_xref); | 2469 | activatePipelineStack(pp_xref); |
| 2437 | for (int i = first; i <= last; ++i) { | 2470 | for (int i = first; i <= last; ++i) { |
| 2438 | - QPDFXRefEntry& e = m->xref[i]; | 2471 | + QPDFXRefEntry& e = m->new_obj[i].xref; |
| 2439 | switch (e.getType()) { | 2472 | switch (e.getType()) { |
| 2440 | case 0: | 2473 | case 0: |
| 2441 | writeBinary(0, 1); | 2474 | writeBinary(0, 1); |
| @@ -2507,39 +2540,10 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) | @@ -2507,39 +2540,10 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) | ||
| 2507 | } | 2540 | } |
| 2508 | 2541 | ||
| 2509 | void | 2542 | void |
| 2510 | -QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out) | ||
| 2511 | -{ | ||
| 2512 | - // There are deep assumptions in the linearization code in QPDF that there is only one object | ||
| 2513 | - // with each object number; i.e., you can't have two objects with the same object number and | ||
| 2514 | - // different generations. This is a pretty safe assumption because Adobe Reader and Acrobat | ||
| 2515 | - // can't actually handle this case. There is not much if any code in QPDF outside linearization | ||
| 2516 | - // that assumes this, but the linearization code as currently implemented would do weird things | ||
| 2517 | - // if we found such a case. In order to avoid breaking ABI changes in QPDF, we will first | ||
| 2518 | - // assert that this condition holds. Then we can create new maps for QPDF that throw away | ||
| 2519 | - // generation numbers. | ||
| 2520 | - | ||
| 2521 | - out.clear(); | ||
| 2522 | - for (auto const& iter: in) { | ||
| 2523 | - if (out.count(iter.first.getObj())) { | ||
| 2524 | - throw std::runtime_error("QPDF cannot currently linearize files that contain" | ||
| 2525 | - " multiple objects with the same object ID and different" | ||
| 2526 | - " generations. If you see this error message, please file" | ||
| 2527 | - " a bug report and attach the file if possible. As a" | ||
| 2528 | - " workaround, first convert the file with qpdf without" | ||
| 2529 | - " linearizing, and then linearize the result of that" | ||
| 2530 | - " conversion."); | ||
| 2531 | - } | ||
| 2532 | - out[iter.first.getObj()] = iter.second; | ||
| 2533 | - } | ||
| 2534 | -} | ||
| 2535 | - | ||
| 2536 | -void | ||
| 2537 | QPDFWriter::writeLinearized() | 2543 | QPDFWriter::writeLinearized() |
| 2538 | { | 2544 | { |
| 2539 | // Optimize file and enqueue objects in order | 2545 | // Optimize file and enqueue objects in order |
| 2540 | 2546 | ||
| 2541 | - discardGeneration(m->object_to_object_stream, m->object_to_object_stream_no_gen); | ||
| 2542 | - | ||
| 2543 | auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { | 2547 | auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { |
| 2544 | bool compress_stream; | 2548 | bool compress_stream; |
| 2545 | bool is_metadata; | 2549 | bool is_metadata; |
| @@ -2550,15 +2554,14 @@ QPDFWriter::writeLinearized() | @@ -2550,15 +2554,14 @@ QPDFWriter::writeLinearized() | ||
| 2550 | } | 2554 | } |
| 2551 | }; | 2555 | }; |
| 2552 | 2556 | ||
| 2553 | - m->pdf.optimize(m->object_to_object_stream_no_gen, true, skip_stream_parameters); | 2557 | + QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters); |
| 2554 | 2558 | ||
| 2555 | std::vector<QPDFObjectHandle> part4; | 2559 | std::vector<QPDFObjectHandle> part4; |
| 2556 | std::vector<QPDFObjectHandle> part6; | 2560 | std::vector<QPDFObjectHandle> part6; |
| 2557 | std::vector<QPDFObjectHandle> part7; | 2561 | std::vector<QPDFObjectHandle> part7; |
| 2558 | std::vector<QPDFObjectHandle> part8; | 2562 | std::vector<QPDFObjectHandle> part8; |
| 2559 | std::vector<QPDFObjectHandle> part9; | 2563 | std::vector<QPDFObjectHandle> part9; |
| 2560 | - QPDF::Writer::getLinearizedParts( | ||
| 2561 | - m->pdf, m->object_to_object_stream_no_gen, part4, part6, part7, part8, part9); | 2564 | + QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9); |
| 2562 | 2565 | ||
| 2563 | // Object number sequence: | 2566 | // Object number sequence: |
| 2564 | // | 2567 | // |
| @@ -2582,7 +2585,7 @@ QPDFWriter::writeLinearized() | @@ -2582,7 +2585,7 @@ QPDFWriter::writeLinearized() | ||
| 2582 | int after_second_half = 1 + second_half_uncompressed; | 2585 | int after_second_half = 1 + second_half_uncompressed; |
| 2583 | m->next_objid = after_second_half; | 2586 | m->next_objid = after_second_half; |
| 2584 | int second_half_xref = 0; | 2587 | int second_half_xref = 0; |
| 2585 | - bool need_xref_stream = (!m->object_to_object_stream.empty()); | 2588 | + bool need_xref_stream = !m->obj.streams_empty; |
| 2586 | if (need_xref_stream) { | 2589 | if (need_xref_stream) { |
| 2587 | second_half_xref = m->next_objid++; | 2590 | second_half_xref = m->next_objid++; |
| 2588 | } | 2591 | } |
| @@ -2690,14 +2693,14 @@ QPDFWriter::writeLinearized() | @@ -2690,14 +2693,14 @@ QPDFWriter::writeLinearized() | ||
| 2690 | writeString("<<"); | 2693 | writeString("<<"); |
| 2691 | if (pass == 2) { | 2694 | if (pass == 2) { |
| 2692 | std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages(); | 2695 | std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages(); |
| 2693 | - int first_page_object = m->obj_renumber[pages.at(0).getObjGen()]; | 2696 | + int first_page_object = m->obj[pages.at(0)].renumber; |
| 2694 | int npages = QIntC::to_int(pages.size()); | 2697 | int npages = QIntC::to_int(pages.size()); |
| 2695 | 2698 | ||
| 2696 | writeString(" /Linearized 1 /L "); | 2699 | writeString(" /Linearized 1 /L "); |
| 2697 | writeString(std::to_string(file_size + hint_length)); | 2700 | writeString(std::to_string(file_size + hint_length)); |
| 2698 | // Implementation note 121 states that a space is mandatory after this open bracket. | 2701 | // Implementation note 121 states that a space is mandatory after this open bracket. |
| 2699 | writeString(" /H [ "); | 2702 | writeString(" /H [ "); |
| 2700 | - writeString(std::to_string(m->xref[hint_id].getOffset())); | 2703 | + writeString(std::to_string(m->new_obj[hint_id].xref.getOffset())); |
| 2701 | writeString(" "); | 2704 | writeString(" "); |
| 2702 | writeString(std::to_string(hint_length)); | 2705 | writeString(std::to_string(hint_length)); |
| 2703 | writeString(" ] /O "); | 2706 | writeString(" ] /O "); |
| @@ -2724,7 +2727,7 @@ QPDFWriter::writeLinearized() | @@ -2724,7 +2727,7 @@ QPDFWriter::writeLinearized() | ||
| 2724 | qpdf_offset_t first_xref_offset = m->pipeline->getCount(); | 2727 | qpdf_offset_t first_xref_offset = m->pipeline->getCount(); |
| 2725 | qpdf_offset_t hint_offset = 0; | 2728 | qpdf_offset_t hint_offset = 0; |
| 2726 | if (pass == 2) { | 2729 | if (pass == 2) { |
| 2727 | - hint_offset = m->xref[hint_id].getOffset(); | 2730 | + hint_offset = m->new_obj[hint_id].xref.getOffset(); |
| 2728 | } | 2731 | } |
| 2729 | if (need_xref_stream) { | 2732 | if (need_xref_stream) { |
| 2730 | // Must pad here too. | 2733 | // Must pad here too. |
| @@ -2795,7 +2798,7 @@ QPDFWriter::writeLinearized() | @@ -2795,7 +2798,7 @@ QPDFWriter::writeLinearized() | ||
| 2795 | writeEncryptionDictionary(); | 2798 | writeEncryptionDictionary(); |
| 2796 | } | 2799 | } |
| 2797 | if (pass == 1) { | 2800 | if (pass == 1) { |
| 2798 | - m->xref[hint_id] = QPDFXRefEntry(m->pipeline->getCount()); | 2801 | + m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount()); |
| 2799 | } else { | 2802 | } else { |
| 2800 | // Part 5: hint stream | 2803 | // Part 5: hint stream |
| 2801 | writeBuffer(hint_buffer); | 2804 | writeBuffer(hint_buffer); |
| @@ -2855,8 +2858,6 @@ QPDFWriter::writeLinearized() | @@ -2855,8 +2858,6 @@ QPDFWriter::writeLinearized() | ||
| 2855 | writeString(std::to_string(first_xref_offset)); | 2858 | writeString(std::to_string(first_xref_offset)); |
| 2856 | writeString("\n%%EOF\n"); | 2859 | writeString("\n%%EOF\n"); |
| 2857 | 2860 | ||
| 2858 | - discardGeneration(m->obj_renumber, m->obj_renumber_no_gen); | ||
| 2859 | - | ||
| 2860 | if (pass == 1) { | 2861 | if (pass == 1) { |
| 2861 | if (m->deterministic_id) { | 2862 | if (m->deterministic_id) { |
| 2862 | QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); | 2863 | QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); |
| @@ -2870,7 +2871,7 @@ QPDFWriter::writeLinearized() | @@ -2870,7 +2871,7 @@ QPDFWriter::writeLinearized() | ||
| 2870 | pp_pass1 = nullptr; | 2871 | pp_pass1 = nullptr; |
| 2871 | 2872 | ||
| 2872 | // Save hint offset since it will be set to zero by calling openObject. | 2873 | // Save hint offset since it will be set to zero by calling openObject. |
| 2873 | - qpdf_offset_t hint_offset1 = m->xref[hint_id].getOffset(); | 2874 | + qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset(); |
| 2874 | 2875 | ||
| 2875 | // Write hint stream to a buffer | 2876 | // Write hint stream to a buffer |
| 2876 | { | 2877 | { |
| @@ -2882,7 +2883,7 @@ QPDFWriter::writeLinearized() | @@ -2882,7 +2883,7 @@ QPDFWriter::writeLinearized() | ||
| 2882 | hint_length = QIntC::to_offset(hint_buffer->getSize()); | 2883 | hint_length = QIntC::to_offset(hint_buffer->getSize()); |
| 2883 | 2884 | ||
| 2884 | // Restore hint offset | 2885 | // Restore hint offset |
| 2885 | - m->xref[hint_id] = QPDFXRefEntry(hint_offset1); | 2886 | + m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1); |
| 2886 | if (lin_pass1_file) { | 2887 | if (lin_pass1_file) { |
| 2887 | // Write some debugging information | 2888 | // Write some debugging information |
| 2888 | fprintf( | 2889 | fprintf( |
libqpdf/QPDF_linearization.cc
| @@ -9,6 +9,7 @@ | @@ -9,6 +9,7 @@ | ||
| 9 | #include <qpdf/Pl_Flate.hh> | 9 | #include <qpdf/Pl_Flate.hh> |
| 10 | #include <qpdf/QPDFExc.hh> | 10 | #include <qpdf/QPDFExc.hh> |
| 11 | #include <qpdf/QPDFLogger.hh> | 11 | #include <qpdf/QPDFLogger.hh> |
| 12 | +#include <qpdf/QPDFWriter_private.hh> | ||
| 12 | #include <qpdf/QTC.hh> | 13 | #include <qpdf/QTC.hh> |
| 13 | #include <qpdf/QUtil.hh> | 14 | #include <qpdf/QUtil.hh> |
| 14 | 15 | ||
| @@ -585,6 +586,17 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map<int, int> const& obj | @@ -585,6 +586,17 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map<int, int> const& obj | ||
| 585 | } | 586 | } |
| 586 | } | 587 | } |
| 587 | 588 | ||
| 589 | +QPDFObjectHandle | ||
| 590 | +QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj) | ||
| 591 | +{ | ||
| 592 | + if (obj.contains(oh)) { | ||
| 593 | + if (auto id = obj[oh].object_stream; id > 0) { | ||
| 594 | + return oh.isNull() ? oh : getObject(id, 0); | ||
| 595 | + } | ||
| 596 | + } | ||
| 597 | + return oh; | ||
| 598 | +} | ||
| 599 | + | ||
| 588 | int | 600 | int |
| 589 | QPDF::lengthNextN(int first_object, int n) | 601 | QPDF::lengthNextN(int first_object, int n) |
| 590 | { | 602 | { |
| @@ -959,8 +971,9 @@ QPDF::dumpHGeneric(HGeneric& t) | @@ -959,8 +971,9 @@ QPDF::dumpHGeneric(HGeneric& t) | ||
| 959 | << "group_length: " << t.group_length << "\n"; | 971 | << "group_length: " << t.group_length << "\n"; |
| 960 | } | 972 | } |
| 961 | 973 | ||
| 974 | +template <typename T> | ||
| 962 | void | 975 | void |
| 963 | -QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | 976 | +QPDF::calculateLinearizationData(T const& object_stream_data) |
| 964 | { | 977 | { |
| 965 | // This function calculates the ordering of objects, divides them into the appropriate parts, | 978 | // This function calculates the ordering of objects, divides them into the appropriate parts, |
| 966 | // and computes some values for the linearization parameter dictionary and hint tables. The | 979 | // and computes some values for the linearization parameter dictionary and hint tables. The |
| @@ -1402,11 +1415,12 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | @@ -1402,11 +1415,12 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | ||
| 1402 | } | 1415 | } |
| 1403 | } | 1416 | } |
| 1404 | 1417 | ||
| 1418 | +template <typename T> | ||
| 1405 | void | 1419 | void |
| 1406 | QPDF::pushOutlinesToPart( | 1420 | QPDF::pushOutlinesToPart( |
| 1407 | std::vector<QPDFObjectHandle>& part, | 1421 | std::vector<QPDFObjectHandle>& part, |
| 1408 | std::set<QPDFObjGen>& lc_outlines, | 1422 | std::set<QPDFObjGen>& lc_outlines, |
| 1409 | - std::map<int, int> const& object_stream_data) | 1423 | + T const& object_stream_data) |
| 1410 | { | 1424 | { |
| 1411 | QPDFObjectHandle root = getRoot(); | 1425 | QPDFObjectHandle root = getRoot(); |
| 1412 | QPDFObjectHandle outlines = root.getKey("/Outlines"); | 1426 | QPDFObjectHandle outlines = root.getKey("/Outlines"); |
| @@ -1433,14 +1447,14 @@ QPDF::pushOutlinesToPart( | @@ -1433,14 +1447,14 @@ QPDF::pushOutlinesToPart( | ||
| 1433 | 1447 | ||
| 1434 | void | 1448 | void |
| 1435 | QPDF::getLinearizedParts( | 1449 | QPDF::getLinearizedParts( |
| 1436 | - std::map<int, int> const& object_stream_data, | 1450 | + QPDFWriter::ObjTable const& obj, |
| 1437 | std::vector<QPDFObjectHandle>& part4, | 1451 | std::vector<QPDFObjectHandle>& part4, |
| 1438 | std::vector<QPDFObjectHandle>& part6, | 1452 | std::vector<QPDFObjectHandle>& part6, |
| 1439 | std::vector<QPDFObjectHandle>& part7, | 1453 | std::vector<QPDFObjectHandle>& part7, |
| 1440 | std::vector<QPDFObjectHandle>& part8, | 1454 | std::vector<QPDFObjectHandle>& part8, |
| 1441 | std::vector<QPDFObjectHandle>& part9) | 1455 | std::vector<QPDFObjectHandle>& part9) |
| 1442 | { | 1456 | { |
| 1443 | - calculateLinearizationData(object_stream_data); | 1457 | + calculateLinearizationData(obj); |
| 1444 | part4 = m->part4; | 1458 | part4 = m->part4; |
| 1445 | part6 = m->part6; | 1459 | part6 = m->part6; |
| 1446 | part7 = m->part7; | 1460 | part7 = m->part7; |
| @@ -1456,33 +1470,29 @@ nbits(int val) | @@ -1456,33 +1470,29 @@ nbits(int val) | ||
| 1456 | 1470 | ||
| 1457 | int | 1471 | int |
| 1458 | QPDF::outputLengthNextN( | 1472 | QPDF::outputLengthNextN( |
| 1459 | - int in_object, | ||
| 1460 | - int n, | ||
| 1461 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1462 | - std::map<int, int> const& obj_renumber) | 1473 | + int in_object, int n, QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) |
| 1463 | { | 1474 | { |
| 1464 | // Figure out the length of a series of n consecutive objects in the output file starting with | 1475 | // Figure out the length of a series of n consecutive objects in the output file starting with |
| 1465 | // whatever object in_object from the input file mapped to. | 1476 | // whatever object in_object from the input file mapped to. |
| 1466 | 1477 | ||
| 1467 | - if (obj_renumber.count(in_object) == 0) { | 1478 | + int first = obj[in_object].renumber; |
| 1479 | + int last = first + n; | ||
| 1480 | + if (first <= 0) { | ||
| 1468 | stopOnError("found object that is not renumbered while writing linearization data"); | 1481 | stopOnError("found object that is not renumbered while writing linearization data"); |
| 1469 | } | 1482 | } |
| 1470 | - int first = (*(obj_renumber.find(in_object))).second; | ||
| 1471 | - int length = 0; | ||
| 1472 | - for (int i = 0; i < n; ++i) { | ||
| 1473 | - if (lengths.count(first + i) == 0) { | 1483 | + qpdf_offset_t length = 0; |
| 1484 | + for (int i = first; i < last; ++i) { | ||
| 1485 | + auto l = new_obj[i].length; | ||
| 1486 | + if (l == 0) { | ||
| 1474 | stopOnError("found item with unknown length while writing linearization data"); | 1487 | stopOnError("found item with unknown length while writing linearization data"); |
| 1475 | } | 1488 | } |
| 1476 | - length += toI((*(lengths.find(first + toI(i)))).second); | 1489 | + length += l; |
| 1477 | } | 1490 | } |
| 1478 | - return length; | 1491 | + return toI(length); |
| 1479 | } | 1492 | } |
| 1480 | 1493 | ||
| 1481 | void | 1494 | void |
| 1482 | -QPDF::calculateHPageOffset( | ||
| 1483 | - std::map<int, QPDFXRefEntry> const& xref, | ||
| 1484 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1485 | - std::map<int, int> const& obj_renumber) | 1495 | +QPDF::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) |
| 1486 | { | 1496 | { |
| 1487 | // Page Offset Hint Table | 1497 | // Page Offset Hint Table |
| 1488 | 1498 | ||
| @@ -1497,8 +1507,7 @@ QPDF::calculateHPageOffset( | @@ -1497,8 +1507,7 @@ QPDF::calculateHPageOffset( | ||
| 1497 | 1507 | ||
| 1498 | int min_nobjects = cphe.at(0).nobjects; | 1508 | int min_nobjects = cphe.at(0).nobjects; |
| 1499 | int max_nobjects = min_nobjects; | 1509 | int max_nobjects = min_nobjects; |
| 1500 | - int min_length = | ||
| 1501 | - outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, lengths, obj_renumber); | 1510 | + int min_length = outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, new_obj, obj); |
| 1502 | int max_length = min_length; | 1511 | int max_length = min_length; |
| 1503 | int max_shared = cphe.at(0).nshared_objects; | 1512 | int max_shared = cphe.at(0).nshared_objects; |
| 1504 | 1513 | ||
| @@ -1515,7 +1524,7 @@ QPDF::calculateHPageOffset( | @@ -1515,7 +1524,7 @@ QPDF::calculateHPageOffset( | ||
| 1515 | // assignments. | 1524 | // assignments. |
| 1516 | 1525 | ||
| 1517 | int nobjects = cphe.at(i).nobjects; | 1526 | int nobjects = cphe.at(i).nobjects; |
| 1518 | - int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); | 1527 | + int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, new_obj, obj); |
| 1519 | int nshared = cphe.at(i).nshared_objects; | 1528 | int nshared = cphe.at(i).nshared_objects; |
| 1520 | 1529 | ||
| 1521 | min_nobjects = std::min(min_nobjects, nobjects); | 1530 | min_nobjects = std::min(min_nobjects, nobjects); |
| @@ -1530,9 +1539,7 @@ QPDF::calculateHPageOffset( | @@ -1530,9 +1539,7 @@ QPDF::calculateHPageOffset( | ||
| 1530 | } | 1539 | } |
| 1531 | 1540 | ||
| 1532 | ph.min_nobjects = min_nobjects; | 1541 | ph.min_nobjects = min_nobjects; |
| 1533 | - int in_page0_id = pages.at(0).getObjectID(); | ||
| 1534 | - int out_page0_id = (*(obj_renumber.find(in_page0_id))).second; | ||
| 1535 | - ph.first_page_offset = (*(xref.find(out_page0_id))).second.getOffset(); | 1542 | + ph.first_page_offset = new_obj[obj[pages.at(0)].renumber].xref.getOffset(); |
| 1536 | ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects); | 1543 | ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects); |
| 1537 | ph.min_page_length = min_length; | 1544 | ph.min_page_length = min_length; |
| 1538 | ph.nbits_delta_page_length = nbits(max_length - min_length); | 1545 | ph.nbits_delta_page_length = nbits(max_length - min_length); |
| @@ -1567,9 +1574,7 @@ QPDF::calculateHPageOffset( | @@ -1567,9 +1574,7 @@ QPDF::calculateHPageOffset( | ||
| 1567 | 1574 | ||
| 1568 | void | 1575 | void |
| 1569 | QPDF::calculateHSharedObject( | 1576 | QPDF::calculateHSharedObject( |
| 1570 | - std::map<int, QPDFXRefEntry> const& xref, | ||
| 1571 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1572 | - std::map<int, int> const& obj_renumber) | 1577 | + QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) |
| 1573 | { | 1578 | { |
| 1574 | CHSharedObject& cso = m->c_shared_object_data; | 1579 | CHSharedObject& cso = m->c_shared_object_data; |
| 1575 | std::vector<CHSharedObjectEntry>& csoe = cso.entries; | 1580 | std::vector<CHSharedObjectEntry>& csoe = cso.entries; |
| @@ -1577,12 +1582,12 @@ QPDF::calculateHSharedObject( | @@ -1577,12 +1582,12 @@ QPDF::calculateHSharedObject( | ||
| 1577 | std::vector<HSharedObjectEntry>& soe = so.entries; | 1582 | std::vector<HSharedObjectEntry>& soe = so.entries; |
| 1578 | soe.clear(); | 1583 | soe.clear(); |
| 1579 | 1584 | ||
| 1580 | - int min_length = outputLengthNextN(csoe.at(0).object, 1, lengths, obj_renumber); | 1585 | + int min_length = outputLengthNextN(csoe.at(0).object, 1, new_obj, obj); |
| 1581 | int max_length = min_length; | 1586 | int max_length = min_length; |
| 1582 | 1587 | ||
| 1583 | for (size_t i = 0; i < toS(cso.nshared_total); ++i) { | 1588 | for (size_t i = 0; i < toS(cso.nshared_total); ++i) { |
| 1584 | // Assign absolute numbers to deltas; adjust later | 1589 | // Assign absolute numbers to deltas; adjust later |
| 1585 | - int length = outputLengthNextN(csoe.at(i).object, 1, lengths, obj_renumber); | 1590 | + int length = outputLengthNextN(csoe.at(i).object, 1, new_obj, obj); |
| 1586 | min_length = std::min(min_length, length); | 1591 | min_length = std::min(min_length, length); |
| 1587 | max_length = std::max(max_length, length); | 1592 | max_length = std::max(max_length, length); |
| 1588 | soe.emplace_back(); | 1593 | soe.emplace_back(); |
| @@ -1595,8 +1600,9 @@ QPDF::calculateHSharedObject( | @@ -1595,8 +1600,9 @@ QPDF::calculateHSharedObject( | ||
| 1595 | so.nshared_total = cso.nshared_total; | 1600 | so.nshared_total = cso.nshared_total; |
| 1596 | so.nshared_first_page = cso.nshared_first_page; | 1601 | so.nshared_first_page = cso.nshared_first_page; |
| 1597 | if (so.nshared_total > so.nshared_first_page) { | 1602 | if (so.nshared_total > so.nshared_first_page) { |
| 1598 | - so.first_shared_obj = (*(obj_renumber.find(cso.first_shared_obj))).second; | ||
| 1599 | - so.first_shared_offset = (*(xref.find(so.first_shared_obj))).second.getOffset(); | 1603 | + so.first_shared_obj = obj[cso.first_shared_obj].renumber; |
| 1604 | + so.min_group_length = min_length; | ||
| 1605 | + so.first_shared_offset = new_obj[so.first_shared_obj].xref.getOffset(); | ||
| 1600 | } | 1606 | } |
| 1601 | so.min_group_length = min_length; | 1607 | so.min_group_length = min_length; |
| 1602 | so.nbits_delta_group_length = nbits(max_length - min_length); | 1608 | so.nbits_delta_group_length = nbits(max_length - min_length); |
| @@ -1611,10 +1617,7 @@ QPDF::calculateHSharedObject( | @@ -1611,10 +1617,7 @@ QPDF::calculateHSharedObject( | ||
| 1611 | } | 1617 | } |
| 1612 | 1618 | ||
| 1613 | void | 1619 | void |
| 1614 | -QPDF::calculateHOutline( | ||
| 1615 | - std::map<int, QPDFXRefEntry> const& xref, | ||
| 1616 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1617 | - std::map<int, int> const& obj_renumber) | 1620 | +QPDF::calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) |
| 1618 | { | 1621 | { |
| 1619 | HGeneric& cho = m->c_outline_data; | 1622 | HGeneric& cho = m->c_outline_data; |
| 1620 | 1623 | ||
| @@ -1624,10 +1627,10 @@ QPDF::calculateHOutline( | @@ -1624,10 +1627,10 @@ QPDF::calculateHOutline( | ||
| 1624 | 1627 | ||
| 1625 | HGeneric& ho = m->outline_hints; | 1628 | HGeneric& ho = m->outline_hints; |
| 1626 | 1629 | ||
| 1627 | - ho.first_object = (*(obj_renumber.find(cho.first_object))).second; | ||
| 1628 | - ho.first_object_offset = (*(xref.find(ho.first_object))).second.getOffset(); | 1630 | + ho.first_object = obj[cho.first_object].renumber; |
| 1631 | + ho.first_object_offset = new_obj[ho.first_object].xref.getOffset(); | ||
| 1629 | ho.nobjects = cho.nobjects; | 1632 | ho.nobjects = cho.nobjects; |
| 1630 | - ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, lengths, obj_renumber); | 1633 | + ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, new_obj, obj); |
| 1631 | } | 1634 | } |
| 1632 | 1635 | ||
| 1633 | template <class T, class int_type> | 1636 | template <class T, class int_type> |
| @@ -1756,18 +1759,17 @@ QPDF::writeHGeneric(BitWriter& w, HGeneric& t) | @@ -1756,18 +1759,17 @@ QPDF::writeHGeneric(BitWriter& w, HGeneric& t) | ||
| 1756 | 1759 | ||
| 1757 | void | 1760 | void |
| 1758 | QPDF::generateHintStream( | 1761 | QPDF::generateHintStream( |
| 1759 | - std::map<int, QPDFXRefEntry> const& xref, | ||
| 1760 | - std::map<int, qpdf_offset_t> const& lengths, | ||
| 1761 | - std::map<int, int> const& obj_renumber, | 1762 | + QPDFWriter::NewObjTable const& new_obj, |
| 1763 | + QPDFWriter::ObjTable const& obj, | ||
| 1762 | std::shared_ptr<Buffer>& hint_buffer, | 1764 | std::shared_ptr<Buffer>& hint_buffer, |
| 1763 | int& S, | 1765 | int& S, |
| 1764 | int& O, | 1766 | int& O, |
| 1765 | bool compressed) | 1767 | bool compressed) |
| 1766 | { | 1768 | { |
| 1767 | // Populate actual hint table values | 1769 | // Populate actual hint table values |
| 1768 | - calculateHPageOffset(xref, lengths, obj_renumber); | ||
| 1769 | - calculateHSharedObject(xref, lengths, obj_renumber); | ||
| 1770 | - calculateHOutline(xref, lengths, obj_renumber); | 1770 | + calculateHPageOffset(new_obj, obj); |
| 1771 | + calculateHSharedObject(new_obj, obj); | ||
| 1772 | + calculateHOutline(new_obj, obj); | ||
| 1771 | 1773 | ||
| 1772 | // Write the hint stream itself into a compressed memory buffer. Write through a counter so we | 1774 | // Write the hint stream itself into a compressed memory buffer. Write through a counter so we |
| 1773 | // can get offsets. | 1775 | // can get offsets. |
libqpdf/QPDF_optimization.cc
| @@ -5,6 +5,7 @@ | @@ -5,6 +5,7 @@ | ||
| 5 | #include <qpdf/QPDF.hh> | 5 | #include <qpdf/QPDF.hh> |
| 6 | 6 | ||
| 7 | #include <qpdf/QPDFExc.hh> | 7 | #include <qpdf/QPDFExc.hh> |
| 8 | +#include <qpdf/QPDFWriter_private.hh> | ||
| 8 | #include <qpdf/QPDF_Array.hh> | 9 | #include <qpdf/QPDF_Array.hh> |
| 9 | #include <qpdf/QPDF_Dictionary.hh> | 10 | #include <qpdf/QPDF_Dictionary.hh> |
| 10 | #include <qpdf/QTC.hh> | 11 | #include <qpdf/QTC.hh> |
| @@ -59,6 +60,23 @@ QPDF::optimize( | @@ -59,6 +60,23 @@ QPDF::optimize( | ||
| 59 | bool allow_changes, | 60 | bool allow_changes, |
| 60 | std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | 61 | std::function<int(QPDFObjectHandle&)> skip_stream_parameters) |
| 61 | { | 62 | { |
| 63 | + optimize_internal(object_stream_data, allow_changes, skip_stream_parameters); | ||
| 64 | +} | ||
| 65 | + | ||
| 66 | +void | ||
| 67 | +QPDF::optimize( | ||
| 68 | + QPDFWriter::ObjTable const& obj, std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | ||
| 69 | +{ | ||
| 70 | + optimize_internal(obj, true, skip_stream_parameters); | ||
| 71 | +} | ||
| 72 | + | ||
| 73 | +template <typename T> | ||
| 74 | +void | ||
| 75 | +QPDF::optimize_internal( | ||
| 76 | + T const& object_stream_data, | ||
| 77 | + bool allow_changes, | ||
| 78 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | ||
| 79 | +{ | ||
| 62 | if (!m->obj_user_to_objects.empty()) { | 80 | if (!m->obj_user_to_objects.empty()) { |
| 63 | // already optimized | 81 | // already optimized |
| 64 | return; | 82 | return; |
| @@ -379,3 +397,45 @@ QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data) | @@ -379,3 +397,45 @@ QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data) | ||
| 379 | m->obj_user_to_objects = t_obj_user_to_objects; | 397 | m->obj_user_to_objects = t_obj_user_to_objects; |
| 380 | m->object_to_obj_users = t_object_to_obj_users; | 398 | m->object_to_obj_users = t_object_to_obj_users; |
| 381 | } | 399 | } |
| 400 | + | ||
| 401 | +void | ||
| 402 | +QPDF::filterCompressedObjects(QPDFWriter::ObjTable const& obj) | ||
| 403 | +{ | ||
| 404 | + if (obj.getStreamsEmpty()) { | ||
| 405 | + return; | ||
| 406 | + } | ||
| 407 | + | ||
| 408 | + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed | ||
| 409 | + // objects. If something is a user of a compressed object, then it is really a user of the | ||
| 410 | + // object stream that contains it. | ||
| 411 | + | ||
| 412 | + std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects; | ||
| 413 | + std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users; | ||
| 414 | + | ||
| 415 | + for (auto const& i1: m->obj_user_to_objects) { | ||
| 416 | + ObjUser const& ou = i1.first; | ||
| 417 | + // Loop over objects. | ||
| 418 | + for (auto const& og: i1.second) { | ||
| 419 | + if (auto const& i2 = obj[og].object_stream; i2 <= 0) { | ||
| 420 | + t_obj_user_to_objects[ou].insert(og); | ||
| 421 | + } else { | ||
| 422 | + t_obj_user_to_objects[ou].insert(QPDFObjGen(i2, 0)); | ||
| 423 | + } | ||
| 424 | + } | ||
| 425 | + } | ||
| 426 | + | ||
| 427 | + for (auto const& i1: m->object_to_obj_users) { | ||
| 428 | + QPDFObjGen const& og = i1.first; | ||
| 429 | + // Loop over obj_users. | ||
| 430 | + for (auto const& ou: i1.second) { | ||
| 431 | + if (auto i2 = obj[og].object_stream; i2 <= 0) { | ||
| 432 | + t_object_to_obj_users[og].insert(ou); | ||
| 433 | + } else { | ||
| 434 | + t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou); | ||
| 435 | + } | ||
| 436 | + } | ||
| 437 | + } | ||
| 438 | + | ||
| 439 | + m->obj_user_to_objects = t_obj_user_to_objects; | ||
| 440 | + m->object_to_obj_users = t_object_to_obj_users; | ||
| 441 | +} |
libqpdf/qpdf/ObjTable.hh
0 → 100644
| 1 | +#ifndef OBJTABLE_HH | ||
| 2 | +#define OBJTABLE_HH | ||
| 3 | + | ||
| 4 | +#include <qpdf/QPDFObjGen.hh> | ||
| 5 | +#include <qpdf/QPDFObjectHandle.hh> | ||
| 6 | + | ||
| 7 | +#include "qpdf/QIntC.hh" | ||
| 8 | +#include <limits> | ||
| 9 | + | ||
| 10 | +// A table of objects indexed by object id. This is intended as a more efficient replacement for | ||
| 11 | +// std::map<QPDFObjGen, T> containers. | ||
| 12 | +// | ||
| 13 | +// The table is implemented as a std::vector, with the object id implicitly represented by the index | ||
| 14 | +// of the object. This has a number of implications, including: | ||
| 15 | +// - operations that change the index of existing elements such as insertion and deletions are not | ||
| 16 | +// permitted. | ||
| 17 | +// - operations that extend the table may invalidate iterators and references to objects. | ||
| 18 | +// | ||
| 19 | +// The provided overloads of the access operator[] are safe. For out of bounds access they will | ||
| 20 | +// either extend the table or throw a runtime error. | ||
| 21 | +// | ||
| 22 | +// ObjTable has a map 'sparse_elements' to deal with very sparse / extremely large object tables | ||
| 23 | +// (usually as the result of invalid dangling references). This map may contain objects not found in | ||
| 24 | +// the xref table of the original pdf if there are dangling references with an id significantly | ||
| 25 | +// larger than the largest valid object id found in original pdf. | ||
| 26 | + | ||
| 27 | +template <class T> | ||
| 28 | +class ObjTable: public std::vector<T> | ||
| 29 | +{ | ||
| 30 | + public: | ||
| 31 | + ObjTable() = default; | ||
| 32 | + ObjTable(const ObjTable&) = delete; | ||
| 33 | + ObjTable(ObjTable&&) = delete; | ||
| 34 | + ObjTable& operator[](const ObjTable&) = delete; | ||
| 35 | + ObjTable& operator[](ObjTable&&) = delete; | ||
| 36 | + | ||
| 37 | + // Remove unchecked access. | ||
| 38 | + T& operator[](unsigned long idx) = delete; | ||
| 39 | + T const& operator[](unsigned long idx) const = delete; | ||
| 40 | + | ||
| 41 | + inline T const& | ||
| 42 | + operator[](int idx) const | ||
| 43 | + { | ||
| 44 | + return element(static_cast<size_t>(idx)); | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + inline T const& | ||
| 48 | + operator[](QPDFObjGen og) const | ||
| 49 | + { | ||
| 50 | + return element(static_cast<size_t>(og.getObj())); | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + inline T const& | ||
| 54 | + operator[](QPDFObjectHandle oh) const | ||
| 55 | + { | ||
| 56 | + return element(static_cast<size_t>(oh.getObjectID())); | ||
| 57 | + } | ||
| 58 | + | ||
| 59 | + inline bool | ||
| 60 | + contains(size_t idx) const | ||
| 61 | + { | ||
| 62 | + return idx < std::vector<T>::size() || sparse_elements.count(idx); | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + inline bool | ||
| 66 | + contains(QPDFObjectHandle oh) const | ||
| 67 | + { | ||
| 68 | + return contains(static_cast<size_t>(oh.getObjectID())); | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + protected: | ||
| 72 | + inline T& | ||
| 73 | + operator[](int id) | ||
| 74 | + { | ||
| 75 | + return element(static_cast<size_t>(id)); | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + inline T& | ||
| 79 | + operator[](QPDFObjGen og) | ||
| 80 | + { | ||
| 81 | + return element(static_cast<size_t>(og.getObj())); | ||
| 82 | + } | ||
| 83 | + | ||
| 84 | + inline T& | ||
| 85 | + operator[](QPDFObjectHandle oh) | ||
| 86 | + { | ||
| 87 | + return element(static_cast<size_t>(oh.getObjectID())); | ||
| 88 | + } | ||
| 89 | + | ||
| 90 | + inline T& | ||
| 91 | + operator[](unsigned int id) | ||
| 92 | + { | ||
| 93 | + return element(id); | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + void | ||
| 97 | + initialize(size_t idx) | ||
| 98 | + { | ||
| 99 | + if (std::vector<T>::size() > 0 || sparse_elements.size() > 0) { | ||
| 100 | + throw ::std::logic_error("ObjTable accessed before initialization"); | ||
| 101 | + } else if ( | ||
| 102 | + idx >= static_cast<size_t>(std::numeric_limits<int>::max()) || | ||
| 103 | + idx >= std::vector<T>::max_size()) { | ||
| 104 | + throw std::runtime_error("Invalid maximum object id initializing ObjTable."); | ||
| 105 | + } else { | ||
| 106 | + std::vector<T>::resize(++idx); | ||
| 107 | + } | ||
| 108 | + } | ||
| 109 | + | ||
| 110 | + inline void | ||
| 111 | + forEach(std::function<void(int, const T&)> fn) | ||
| 112 | + { | ||
| 113 | + int i = 0; | ||
| 114 | + for (auto const& item: *this) { | ||
| 115 | + fn(i++, item); | ||
| 116 | + } | ||
| 117 | + for (auto const& [id, item]: sparse_elements) { | ||
| 118 | + fn(QIntC::to_int(id), item); | ||
| 119 | + } | ||
| 120 | + } | ||
| 121 | + | ||
| 122 | + private: | ||
| 123 | + std::map<size_t, T> sparse_elements; | ||
| 124 | + | ||
| 125 | + inline T& | ||
| 126 | + element(size_t idx) | ||
| 127 | + { | ||
| 128 | + if (idx < std::vector<T>::size()) { | ||
| 129 | + return std::vector<T>::operator[](idx); | ||
| 130 | + } else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) { | ||
| 131 | + return sparse_elements[idx]; | ||
| 132 | + } | ||
| 133 | + throw std::runtime_error("Invalid object id accessing ObjTable."); | ||
| 134 | + return element(0); // doesn't return | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | + inline T const& | ||
| 138 | + element(size_t idx) const | ||
| 139 | + { | ||
| 140 | + if (idx < std::vector<T>::size()) { | ||
| 141 | + return std::vector<T>::operator[](idx); | ||
| 142 | + } else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) { | ||
| 143 | + return sparse_elements.at(idx); | ||
| 144 | + } | ||
| 145 | + throw std::runtime_error("Invalid object id accessing ObjTable."); | ||
| 146 | + return element(0); // doesn't return | ||
| 147 | + } | ||
| 148 | +}; | ||
| 149 | + | ||
| 150 | +#endif // OBJTABLE_HH |
libqpdf/qpdf/QPDFWriter_private.hh
0 → 100644
| 1 | +#ifndef QPDFWRITER_PRIVATE_HH | ||
| 2 | +#define QPDFWRITER_PRIVATE_HH | ||
| 3 | + | ||
| 4 | +#include <qpdf/QPDFWriter.hh> | ||
| 5 | + | ||
| 6 | +#include <qpdf/ObjTable.hh> | ||
| 7 | + | ||
| 8 | +// This file is intended for inclusion by QPDFWriter, QPDF, QPDF_optimization and QPDF_linearization | ||
| 9 | +// only. | ||
| 10 | + | ||
| 11 | +struct QPDFWriter::Object | ||
| 12 | +{ | ||
| 13 | + int renumber{0}; | ||
| 14 | + int gen{0}; | ||
| 15 | + int object_stream{0}; | ||
| 16 | +}; | ||
| 17 | + | ||
| 18 | +struct QPDFWriter::NewObject | ||
| 19 | +{ | ||
| 20 | + QPDFXRefEntry xref; | ||
| 21 | + qpdf_offset_t length{0}; | ||
| 22 | +}; | ||
| 23 | + | ||
| 24 | +class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object> | ||
| 25 | +{ | ||
| 26 | + friend class QPDFWriter; | ||
| 27 | + | ||
| 28 | + public: | ||
| 29 | + bool | ||
| 30 | + getStreamsEmpty() const noexcept | ||
| 31 | + { | ||
| 32 | + return streams_empty; | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + private: | ||
| 36 | + // For performance, set by QPDFWriter rather than tracked by ObjTable. | ||
| 37 | + bool streams_empty{false}; | ||
| 38 | +}; | ||
| 39 | + | ||
| 40 | +class QPDFWriter::NewObjTable: public ::ObjTable<QPDFWriter::NewObject> | ||
| 41 | +{ | ||
| 42 | + friend class QPDFWriter; | ||
| 43 | +}; | ||
| 44 | + | ||
| 45 | +class QPDFWriter::Members | ||
| 46 | +{ | ||
| 47 | + friend class QPDFWriter; | ||
| 48 | + | ||
| 49 | + public: | ||
| 50 | + QPDF_DLL | ||
| 51 | + ~Members(); | ||
| 52 | + | ||
| 53 | + private: | ||
| 54 | + Members(QPDF& pdf); | ||
| 55 | + Members(Members const&) = delete; | ||
| 56 | + | ||
| 57 | + QPDF& pdf; | ||
| 58 | + QPDFObjGen root_og{-1, 0}; | ||
| 59 | + char const* filename{"unspecified"}; | ||
| 60 | + FILE* file{nullptr}; | ||
| 61 | + bool close_file{false}; | ||
| 62 | + Pl_Buffer* buffer_pipeline{nullptr}; | ||
| 63 | + Buffer* output_buffer{nullptr}; | ||
| 64 | + bool normalize_content_set{false}; | ||
| 65 | + bool normalize_content{false}; | ||
| 66 | + bool compress_streams{true}; | ||
| 67 | + bool compress_streams_set{false}; | ||
| 68 | + qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none}; | ||
| 69 | + bool stream_decode_level_set{false}; | ||
| 70 | + bool recompress_flate{false}; | ||
| 71 | + bool qdf_mode{false}; | ||
| 72 | + bool preserve_unreferenced_objects{false}; | ||
| 73 | + bool newline_before_endstream{false}; | ||
| 74 | + bool static_id{false}; | ||
| 75 | + bool suppress_original_object_ids{false}; | ||
| 76 | + bool direct_stream_lengths{true}; | ||
| 77 | + bool encrypted{false}; | ||
| 78 | + bool preserve_encryption{true}; | ||
| 79 | + bool linearized{false}; | ||
| 80 | + bool pclm{false}; | ||
| 81 | + qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; | ||
| 82 | + std::string encryption_key; | ||
| 83 | + bool encrypt_metadata{true}; | ||
| 84 | + bool encrypt_use_aes{false}; | ||
| 85 | + std::map<std::string, std::string> encryption_dictionary; | ||
| 86 | + int encryption_V{0}; | ||
| 87 | + int encryption_R{0}; | ||
| 88 | + | ||
| 89 | + std::string id1; // for /ID key of | ||
| 90 | + std::string id2; // trailer dictionary | ||
| 91 | + std::string final_pdf_version; | ||
| 92 | + int final_extension_level{0}; | ||
| 93 | + std::string min_pdf_version; | ||
| 94 | + int min_extension_level{0}; | ||
| 95 | + std::string forced_pdf_version; | ||
| 96 | + int forced_extension_level{0}; | ||
| 97 | + std::string extra_header_text; | ||
| 98 | + int encryption_dict_objid{0}; | ||
| 99 | + std::string cur_data_key; | ||
| 100 | + std::list<std::shared_ptr<Pipeline>> to_delete; | ||
| 101 | + Pl_Count* pipeline{nullptr}; | ||
| 102 | + std::vector<QPDFObjectHandle> object_queue; | ||
| 103 | + size_t object_queue_front{0}; | ||
| 104 | + QPDFWriter::ObjTable obj; | ||
| 105 | + QPDFWriter::NewObjTable new_obj; | ||
| 106 | + int next_objid{1}; | ||
| 107 | + int cur_stream_length_id{0}; | ||
| 108 | + size_t cur_stream_length{0}; | ||
| 109 | + bool added_newline{false}; | ||
| 110 | + size_t max_ostream_index{0}; | ||
| 111 | + std::set<QPDFObjGen> normalized_streams; | ||
| 112 | + std::map<QPDFObjGen, int> page_object_to_seq; | ||
| 113 | + std::map<QPDFObjGen, int> contents_to_page_seq; | ||
| 114 | + std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects; | ||
| 115 | + std::list<Pipeline*> pipeline_stack; | ||
| 116 | + unsigned long long next_stack_id{0}; | ||
| 117 | + bool deterministic_id{false}; | ||
| 118 | + Pl_MD5* md5_pipeline{nullptr}; | ||
| 119 | + std::string deterministic_id_data; | ||
| 120 | + bool did_write_setup{false}; | ||
| 121 | + | ||
| 122 | + // For linearization only | ||
| 123 | + std::string lin_pass1_filename; | ||
| 124 | + | ||
| 125 | + // For progress reporting | ||
| 126 | + std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter; | ||
| 127 | + int events_expected{0}; | ||
| 128 | + int events_seen{0}; | ||
| 129 | + int next_progress_report{0}; | ||
| 130 | +}; | ||
| 131 | + | ||
| 132 | +#endif // QPDFWRITER_PRIVATE_HH |
libtests/CMakeLists.txt
libtests/obj_table.cc
0 → 100644
| 1 | +#include <qpdf/ObjTable.hh> | ||
| 2 | + | ||
| 3 | +struct Test | ||
| 4 | +{ | ||
| 5 | + int value{0}; | ||
| 6 | +}; | ||
| 7 | + | ||
| 8 | +class Table: public ObjTable<Test> | ||
| 9 | +{ | ||
| 10 | + public: | ||
| 11 | + Table() | ||
| 12 | + { | ||
| 13 | + initialize(5); | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + void | ||
| 17 | + test() | ||
| 18 | + { | ||
| 19 | + for (int i = 0; i < 10; ++i) { | ||
| 20 | + (*this)[i].value = 2 * i; | ||
| 21 | + (*this)[1000 + i].value = 2 * (1000 + i); | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + forEach([](auto i, auto const& item) -> void { | ||
| 25 | + std::cout << std::to_string(i) << " : " << std::to_string(item.value) << "\n"; | ||
| 26 | + }); | ||
| 27 | + | ||
| 28 | + std::cout << "2000 : " << std::to_string((*this)[2000].value) << "\n"; | ||
| 29 | + } | ||
| 30 | +}; | ||
| 31 | + | ||
| 32 | +int | ||
| 33 | +main() | ||
| 34 | +{ | ||
| 35 | + Table().test(); | ||
| 36 | + | ||
| 37 | + std::cout << "object table tests done\n"; | ||
| 38 | + return 0; | ||
| 39 | +} |
libtests/qtest/obj_table.test
0 → 100644
| 1 | +#!/usr/bin/env perl | ||
| 2 | +require 5.008; | ||
| 3 | +use warnings; | ||
| 4 | +use strict; | ||
| 5 | + | ||
| 6 | +chdir("obj_table") or die "chdir testdir failed: $!\n"; | ||
| 7 | + | ||
| 8 | +require TestDriver; | ||
| 9 | + | ||
| 10 | +my $td = new TestDriver('object table'); | ||
| 11 | + | ||
| 12 | +$td->runtest("obj_table", | ||
| 13 | + {$td->COMMAND => "obj_table"}, | ||
| 14 | + {$td->FILE => "obj_table.out", | ||
| 15 | + $td->EXIT_STATUS => 0}, | ||
| 16 | + $td->NORMALIZE_NEWLINES); | ||
| 17 | + | ||
| 18 | +$td->report(1); |
libtests/qtest/obj_table/obj_table.out
0 → 100644