Commit 0bb1458f3899d524b1dc76e0542be08b75e5e9e4
Committed by
GitHub
Merge pull request #1161 from m-holger/writer
Tune QPDFWriter
Showing
12 changed files
with
715 additions
and
316 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -41,6 +41,7 @@ |
| 41 | 41 | #include <qpdf/QPDFObjectHandle.hh> |
| 42 | 42 | #include <qpdf/QPDFStreamFilter.hh> |
| 43 | 43 | #include <qpdf/QPDFTokenizer.hh> |
| 44 | +#include <qpdf/QPDFWriter.hh> | |
| 44 | 45 | #include <qpdf/QPDFXRefEntry.hh> |
| 45 | 46 | |
| 46 | 47 | class QPDF_Stream; |
| ... | ... | @@ -727,43 +728,62 @@ class QPDF |
| 727 | 728 | |
| 728 | 729 | private: |
| 729 | 730 | static void |
| 731 | + optimize( | |
| 732 | + QPDF& qpdf, | |
| 733 | + QPDFWriter::ObjTable const& obj, | |
| 734 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | |
| 735 | + { | |
| 736 | + return qpdf.optimize(obj, skip_stream_parameters); | |
| 737 | + } | |
| 738 | + | |
| 739 | + static void | |
| 730 | 740 | getLinearizedParts( |
| 731 | 741 | QPDF& qpdf, |
| 732 | - std::map<int, int> const& object_stream_data, | |
| 742 | + QPDFWriter::ObjTable const& obj, | |
| 733 | 743 | std::vector<QPDFObjectHandle>& part4, |
| 734 | 744 | std::vector<QPDFObjectHandle>& part6, |
| 735 | 745 | std::vector<QPDFObjectHandle>& part7, |
| 736 | 746 | std::vector<QPDFObjectHandle>& part8, |
| 737 | 747 | std::vector<QPDFObjectHandle>& part9) |
| 738 | 748 | { |
| 739 | - qpdf.getLinearizedParts(object_stream_data, part4, part6, part7, part8, part9); | |
| 749 | + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); | |
| 740 | 750 | } |
| 741 | 751 | |
| 742 | 752 | static void |
| 743 | 753 | generateHintStream( |
| 744 | 754 | QPDF& qpdf, |
| 745 | - std::map<int, QPDFXRefEntry> const& xref, | |
| 746 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 747 | - std::map<int, int> const& obj_renumber, | |
| 755 | + QPDFWriter::NewObjTable const& new_obj, | |
| 756 | + QPDFWriter::ObjTable const& obj, | |
| 748 | 757 | std::shared_ptr<Buffer>& hint_stream, |
| 749 | 758 | int& S, |
| 750 | 759 | int& O, |
| 751 | 760 | bool compressed) |
| 752 | 761 | { |
| 753 | - return qpdf.generateHintStream( | |
| 754 | - xref, lengths, obj_renumber, hint_stream, S, O, compressed); | |
| 762 | + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); | |
| 755 | 763 | } |
| 756 | 764 | |
| 757 | - static void | |
| 758 | - getObjectStreamData(QPDF& qpdf, std::map<int, int>& omap) | |
| 765 | + static std::vector<QPDFObjGen> | |
| 766 | + getCompressibleObjGens(QPDF& qpdf) | |
| 759 | 767 | { |
| 760 | - qpdf.getObjectStreamData(omap); | |
| 768 | + return qpdf.getCompressibleObjVector(); | |
| 761 | 769 | } |
| 762 | 770 | |
| 763 | - static std::vector<QPDFObjGen> | |
| 764 | - getCompressibleObjGens(QPDF& qpdf) | |
| 771 | + static std::vector<bool> | |
| 772 | + getCompressibleObjSet(QPDF& qpdf) | |
| 765 | 773 | { |
| 766 | - return qpdf.getCompressibleObjGens(); | |
| 774 | + return qpdf.getCompressibleObjSet(); | |
| 775 | + } | |
| 776 | + | |
| 777 | + static std::map<QPDFObjGen, QPDFXRefEntry> const& | |
| 778 | + getXRefTable(QPDF& qpdf) | |
| 779 | + { | |
| 780 | + return qpdf.getXRefTableInternal(); | |
| 781 | + } | |
| 782 | + | |
| 783 | + static size_t | |
| 784 | + tableSize(QPDF& qpdf) | |
| 785 | + { | |
| 786 | + return qpdf.tableSize(); | |
| 767 | 787 | } |
| 768 | 788 | }; |
| 769 | 789 | |
| ... | ... | @@ -1083,10 +1103,21 @@ class QPDF |
| 1083 | 1103 | |
| 1084 | 1104 | // For QPDFWriter: |
| 1085 | 1105 | |
| 1106 | + std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal(); | |
| 1107 | + template <typename T> | |
| 1108 | + void optimize_internal( | |
| 1109 | + T const& object_stream_data, | |
| 1110 | + bool allow_changes = true, | |
| 1111 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr); | |
| 1112 | + void optimize( | |
| 1113 | + QPDFWriter::ObjTable const& obj, | |
| 1114 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters); | |
| 1115 | + size_t tableSize(); | |
| 1116 | + | |
| 1086 | 1117 | // Get lists of all objects in order according to the part of a linearized file that they belong |
| 1087 | 1118 | // to. |
| 1088 | 1119 | void getLinearizedParts( |
| 1089 | - std::map<int, int> const& object_stream_data, | |
| 1120 | + QPDFWriter::ObjTable const& obj, | |
| 1090 | 1121 | std::vector<QPDFObjectHandle>& part4, |
| 1091 | 1122 | std::vector<QPDFObjectHandle>& part6, |
| 1092 | 1123 | std::vector<QPDFObjectHandle>& part7, |
| ... | ... | @@ -1094,19 +1125,18 @@ class QPDF |
| 1094 | 1125 | std::vector<QPDFObjectHandle>& part9); |
| 1095 | 1126 | |
| 1096 | 1127 | void generateHintStream( |
| 1097 | - std::map<int, QPDFXRefEntry> const& xref, | |
| 1098 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1099 | - std::map<int, int> const& obj_renumber, | |
| 1128 | + QPDFWriter::NewObjTable const& new_obj, | |
| 1129 | + QPDFWriter::ObjTable const& obj, | |
| 1100 | 1130 | std::shared_ptr<Buffer>& hint_stream, |
| 1101 | 1131 | int& S, |
| 1102 | 1132 | int& O, |
| 1103 | 1133 | bool compressed); |
| 1104 | 1134 | |
| 1105 | - // Map object to object stream that contains it | |
| 1106 | - void getObjectStreamData(std::map<int, int>&); | |
| 1107 | - | |
| 1108 | 1135 | // Get a list of objects that would be permitted in an object stream. |
| 1109 | - std::vector<QPDFObjGen> getCompressibleObjGens(); | |
| 1136 | + template <typename T> | |
| 1137 | + std::vector<T> getCompressibleObjGens(); | |
| 1138 | + std::vector<QPDFObjGen> getCompressibleObjVector(); | |
| 1139 | + std::vector<bool> getCompressibleObjSet(); | |
| 1110 | 1140 | |
| 1111 | 1141 | // methods to support page handling |
| 1112 | 1142 | |
| ... | ... | @@ -1352,6 +1382,7 @@ class QPDF |
| 1352 | 1382 | qpdf_offset_t getLinearizationOffset(QPDFObjGen const&); |
| 1353 | 1383 | QPDFObjectHandle |
| 1354 | 1384 | getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data); |
| 1385 | + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); | |
| 1355 | 1386 | int lengthNextN(int first_object, int n); |
| 1356 | 1387 | void |
| 1357 | 1388 | checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj); |
| ... | ... | @@ -1362,28 +1393,23 @@ class QPDF |
| 1362 | 1393 | void dumpHSharedObject(); |
| 1363 | 1394 | void dumpHGeneric(HGeneric&); |
| 1364 | 1395 | qpdf_offset_t adjusted_offset(qpdf_offset_t offset); |
| 1365 | - void calculateLinearizationData(std::map<int, int> const& object_stream_data); | |
| 1396 | + template <typename T> | |
| 1397 | + void calculateLinearizationData(T const& object_stream_data); | |
| 1398 | + template <typename T> | |
| 1366 | 1399 | void pushOutlinesToPart( |
| 1367 | 1400 | std::vector<QPDFObjectHandle>& part, |
| 1368 | 1401 | std::set<QPDFObjGen>& lc_outlines, |
| 1369 | - std::map<int, int> const& object_stream_data); | |
| 1402 | + T const& object_stream_data); | |
| 1370 | 1403 | int outputLengthNextN( |
| 1371 | 1404 | int in_object, |
| 1372 | 1405 | int n, |
| 1373 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1374 | - std::map<int, int> const& obj_renumber); | |
| 1375 | - void calculateHPageOffset( | |
| 1376 | - std::map<int, QPDFXRefEntry> const& xref, | |
| 1377 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1378 | - std::map<int, int> const& obj_renumber); | |
| 1379 | - void calculateHSharedObject( | |
| 1380 | - std::map<int, QPDFXRefEntry> const& xref, | |
| 1381 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1382 | - std::map<int, int> const& obj_renumber); | |
| 1383 | - void calculateHOutline( | |
| 1384 | - std::map<int, QPDFXRefEntry> const& xref, | |
| 1385 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1386 | - std::map<int, int> const& obj_renumber); | |
| 1406 | + QPDFWriter::NewObjTable const& new_obj, | |
| 1407 | + QPDFWriter::ObjTable const& obj); | |
| 1408 | + void | |
| 1409 | + calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); | |
| 1410 | + void | |
| 1411 | + calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); | |
| 1412 | + void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); | |
| 1387 | 1413 | void writeHPageOffset(BitWriter&); |
| 1388 | 1414 | void writeHSharedObject(BitWriter&); |
| 1389 | 1415 | void writeHGeneric(BitWriter&, HGeneric&); |
| ... | ... | @@ -1407,6 +1433,7 @@ class QPDF |
| 1407 | 1433 | QPDFObjGen::set& visited, |
| 1408 | 1434 | bool top); |
| 1409 | 1435 | void filterCompressedObjects(std::map<int, int> const& object_stream_data); |
| 1436 | + void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); | |
| 1410 | 1437 | |
| 1411 | 1438 | // JSON import |
| 1412 | 1439 | void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); | ... | ... |
include/qpdf/QPDFWriter.hh
| ... | ... | @@ -437,6 +437,12 @@ class QPDFWriter |
| 437 | 437 | QPDF_DLL |
| 438 | 438 | std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable(); |
| 439 | 439 | |
| 440 | + // The following structs / classes are not part of the public API. | |
| 441 | + struct Object; | |
| 442 | + struct NewObject; | |
| 443 | + class ObjTable; | |
| 444 | + class NewObjTable; | |
| 445 | + | |
| 440 | 446 | private: |
| 441 | 447 | // flags used by unparseObject |
| 442 | 448 | static int const f_stream = 1 << 0; |
| ... | ... | @@ -550,6 +556,7 @@ class QPDFWriter |
| 550 | 556 | void writeLinearized(); |
| 551 | 557 | void enqueuePart(std::vector<QPDFObjectHandle>& part); |
| 552 | 558 | void writeEncryptionDictionary(); |
| 559 | + void initializeTables(size_t extra = 0); | |
| 553 | 560 | void doWriteSetup(); |
| 554 | 561 | void writeHeader(); |
| 555 | 562 | void writeHintStream(int hint_id); |
| ... | ... | @@ -604,98 +611,7 @@ class QPDFWriter |
| 604 | 611 | void pushMD5Pipeline(PipelinePopper&); |
| 605 | 612 | void computeDeterministicIDData(); |
| 606 | 613 | |
| 607 | - void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out); | |
| 608 | - | |
| 609 | - class Members | |
| 610 | - { | |
| 611 | - friend class QPDFWriter; | |
| 612 | - | |
| 613 | - public: | |
| 614 | - QPDF_DLL | |
| 615 | - ~Members(); | |
| 616 | - | |
| 617 | - private: | |
| 618 | - Members(QPDF& pdf); | |
| 619 | - Members(Members const&) = delete; | |
| 620 | - | |
| 621 | - QPDF& pdf; | |
| 622 | - QPDFObjGen root_og{-1, 0}; | |
| 623 | - char const* filename{"unspecified"}; | |
| 624 | - FILE* file{nullptr}; | |
| 625 | - bool close_file{false}; | |
| 626 | - Pl_Buffer* buffer_pipeline{nullptr}; | |
| 627 | - Buffer* output_buffer{nullptr}; | |
| 628 | - bool normalize_content_set{false}; | |
| 629 | - bool normalize_content{false}; | |
| 630 | - bool compress_streams{true}; | |
| 631 | - bool compress_streams_set{false}; | |
| 632 | - qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none}; | |
| 633 | - bool stream_decode_level_set{false}; | |
| 634 | - bool recompress_flate{false}; | |
| 635 | - bool qdf_mode{false}; | |
| 636 | - bool preserve_unreferenced_objects{false}; | |
| 637 | - bool newline_before_endstream{false}; | |
| 638 | - bool static_id{false}; | |
| 639 | - bool suppress_original_object_ids{false}; | |
| 640 | - bool direct_stream_lengths{true}; | |
| 641 | - bool encrypted{false}; | |
| 642 | - bool preserve_encryption{true}; | |
| 643 | - bool linearized{false}; | |
| 644 | - bool pclm{false}; | |
| 645 | - qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; | |
| 646 | - std::string encryption_key; | |
| 647 | - bool encrypt_metadata{true}; | |
| 648 | - bool encrypt_use_aes{false}; | |
| 649 | - std::map<std::string, std::string> encryption_dictionary; | |
| 650 | - int encryption_V{0}; | |
| 651 | - int encryption_R{0}; | |
| 652 | - | |
| 653 | - std::string id1; // for /ID key of | |
| 654 | - std::string id2; // trailer dictionary | |
| 655 | - std::string final_pdf_version; | |
| 656 | - int final_extension_level{0}; | |
| 657 | - std::string min_pdf_version; | |
| 658 | - int min_extension_level{0}; | |
| 659 | - std::string forced_pdf_version; | |
| 660 | - int forced_extension_level{0}; | |
| 661 | - std::string extra_header_text; | |
| 662 | - int encryption_dict_objid{0}; | |
| 663 | - std::string cur_data_key; | |
| 664 | - std::list<std::shared_ptr<Pipeline>> to_delete; | |
| 665 | - Pl_Count* pipeline{nullptr}; | |
| 666 | - std::vector<QPDFObjectHandle> object_queue; | |
| 667 | - size_t object_queue_front{0}; | |
| 668 | - std::map<QPDFObjGen, int> obj_renumber; | |
| 669 | - std::map<int, QPDFXRefEntry> xref; | |
| 670 | - std::map<int, qpdf_offset_t> lengths; | |
| 671 | - int next_objid{1}; | |
| 672 | - int cur_stream_length_id{0}; | |
| 673 | - size_t cur_stream_length{0}; | |
| 674 | - bool added_newline{false}; | |
| 675 | - int max_ostream_index{0}; | |
| 676 | - std::set<QPDFObjGen> normalized_streams; | |
| 677 | - std::map<QPDFObjGen, int> page_object_to_seq; | |
| 678 | - std::map<QPDFObjGen, int> contents_to_page_seq; | |
| 679 | - std::map<QPDFObjGen, int> object_to_object_stream; | |
| 680 | - std::map<int, std::set<QPDFObjGen>> object_stream_to_objects; | |
| 681 | - std::list<Pipeline*> pipeline_stack; | |
| 682 | - unsigned long long next_stack_id{0}; | |
| 683 | - bool deterministic_id{false}; | |
| 684 | - Pl_MD5* md5_pipeline{nullptr}; | |
| 685 | - std::string deterministic_id_data; | |
| 686 | - bool did_write_setup{false}; | |
| 687 | - | |
| 688 | - // For linearization only | |
| 689 | - std::string lin_pass1_filename; | |
| 690 | - std::map<int, int> obj_renumber_no_gen; | |
| 691 | - std::map<int, int> object_to_object_stream_no_gen; | |
| 692 | - | |
| 693 | - // For progress reporting | |
| 694 | - std::shared_ptr<ProgressReporter> progress_reporter; | |
| 695 | - int events_expected{0}; | |
| 696 | - int events_seen{0}; | |
| 697 | - int next_progress_report{0}; | |
| 698 | - }; | |
| 614 | + class Members; | |
| 699 | 615 | |
| 700 | 616 | // Keep all member variables inside the Members object, which we dynamically allocate. This |
| 701 | 617 | // makes it possible to add new private members without breaking binary compatibility. | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -2370,6 +2370,12 @@ QPDF::getRoot() |
| 2370 | 2370 | std::map<QPDFObjGen, QPDFXRefEntry> |
| 2371 | 2371 | QPDF::getXRefTable() |
| 2372 | 2372 | { |
| 2373 | + return getXRefTableInternal(); | |
| 2374 | +} | |
| 2375 | + | |
| 2376 | +std::map<QPDFObjGen, QPDFXRefEntry> const& | |
| 2377 | +QPDF::getXRefTableInternal() | |
| 2378 | +{ | |
| 2373 | 2379 | if (!m->parsed) { |
| 2374 | 2380 | throw std::logic_error("QPDF::getXRefTable called before parsing."); |
| 2375 | 2381 | } |
| ... | ... | @@ -2377,19 +2383,33 @@ QPDF::getXRefTable() |
| 2377 | 2383 | return m->xref_table; |
| 2378 | 2384 | } |
| 2379 | 2385 | |
| 2380 | -void | |
| 2381 | -QPDF::getObjectStreamData(std::map<int, int>& omap) | |
| 2386 | +size_t | |
| 2387 | +QPDF::tableSize() | |
| 2382 | 2388 | { |
| 2383 | - for (auto const& iter: m->xref_table) { | |
| 2384 | - QPDFObjGen const& og = iter.first; | |
| 2385 | - QPDFXRefEntry const& entry = iter.second; | |
| 2386 | - if (entry.getType() == 2) { | |
| 2387 | - omap[og.getObj()] = entry.getObjStreamNumber(); | |
| 2388 | - } | |
| 2389 | + // If obj_cache is dense, accommodate all object in tables,else accommodate only original | |
| 2390 | + // objects. | |
| 2391 | + auto max_xref = m->xref_table.size() ? m->xref_table.crbegin()->first.getObj() : 0; | |
| 2392 | + auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0; | |
| 2393 | + if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) { | |
| 2394 | + return toS(++max_obj); | |
| 2389 | 2395 | } |
| 2396 | + return toS(++max_xref); | |
| 2390 | 2397 | } |
| 2391 | 2398 | |
| 2392 | 2399 | std::vector<QPDFObjGen> |
| 2400 | +QPDF::getCompressibleObjVector() | |
| 2401 | +{ | |
| 2402 | + return getCompressibleObjGens<QPDFObjGen>(); | |
| 2403 | +} | |
| 2404 | + | |
| 2405 | +std::vector<bool> | |
| 2406 | +QPDF::getCompressibleObjSet() | |
| 2407 | +{ | |
| 2408 | + return getCompressibleObjGens<bool>(); | |
| 2409 | +} | |
| 2410 | + | |
| 2411 | +template <typename T> | |
| 2412 | +std::vector<T> | |
| 2393 | 2413 | QPDF::getCompressibleObjGens() |
| 2394 | 2414 | { |
| 2395 | 2415 | // Return a list of objects that are allowed to be in object streams. Walk through the objects |
| ... | ... | @@ -2407,7 +2427,14 @@ QPDF::getCompressibleObjGens() |
| 2407 | 2427 | std::vector<QPDFObjectHandle> queue; |
| 2408 | 2428 | queue.reserve(512); |
| 2409 | 2429 | queue.push_back(m->trailer); |
| 2410 | - std::vector<QPDFObjGen> result; | |
| 2430 | + std::vector<T> result; | |
| 2431 | + if constexpr (std::is_same_v<T, QPDFObjGen>) { | |
| 2432 | + result.reserve(m->obj_cache.size()); | |
| 2433 | + } else if constexpr (std::is_same_v<T, bool>) { | |
| 2434 | + result.resize(max_obj + 1U, false); | |
| 2435 | + } else { | |
| 2436 | + throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens"); | |
| 2437 | + } | |
| 2411 | 2438 | while (!queue.empty()) { |
| 2412 | 2439 | auto obj = queue.back(); |
| 2413 | 2440 | queue.pop_back(); |
| ... | ... | @@ -2439,7 +2466,11 @@ QPDF::getCompressibleObjGens() |
| 2439 | 2466 | } else if (!(obj.isStream() || |
| 2440 | 2467 | (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") && |
| 2441 | 2468 | obj.hasKey("/Contents")))) { |
| 2442 | - result.push_back(og); | |
| 2469 | + if constexpr (std::is_same_v<T, QPDFObjGen>) { | |
| 2470 | + result.push_back(og); | |
| 2471 | + } else if constexpr (std::is_same_v<T, bool>) { | |
| 2472 | + result[id + 1U] = true; | |
| 2473 | + } | |
| 2443 | 2474 | } |
| 2444 | 2475 | } |
| 2445 | 2476 | if (obj.isStream()) { | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -2,7 +2,7 @@ |
| 2 | 2 | |
| 3 | 3 | #include <qpdf/qpdf-config.h> // include early for large file support |
| 4 | 4 | |
| 5 | -#include <qpdf/QPDFWriter.hh> | |
| 5 | +#include <qpdf/QPDFWriter_private.hh> | |
| 6 | 6 | |
| 7 | 7 | #include <qpdf/MD5.hh> |
| 8 | 8 | #include <qpdf/Pl_AES_PDF.hh> |
| ... | ... | @@ -1038,7 +1038,7 @@ QPDFWriter::openObject(int objid) |
| 1038 | 1038 | if (objid == 0) { |
| 1039 | 1039 | objid = m->next_objid++; |
| 1040 | 1040 | } |
| 1041 | - m->xref[objid] = QPDFXRefEntry(m->pipeline->getCount()); | |
| 1041 | + m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount()); | |
| 1042 | 1042 | writeString(std::to_string(objid)); |
| 1043 | 1043 | writeString(" 0 obj\n"); |
| 1044 | 1044 | return objid; |
| ... | ... | @@ -1050,7 +1050,8 @@ QPDFWriter::closeObject(int objid) |
| 1050 | 1050 | // Write a newline before endobj as it makes the file easier to repair. |
| 1051 | 1051 | writeString("\nendobj\n"); |
| 1052 | 1052 | writeStringQDF("\n"); |
| 1053 | - m->lengths[objid] = m->pipeline->getCount() - m->xref[objid].getOffset(); | |
| 1053 | + auto& new_obj = m->new_obj[objid]; | |
| 1054 | + new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset(); | |
| 1054 | 1055 | } |
| 1055 | 1056 | |
| 1056 | 1057 | void |
| ... | ... | @@ -1064,7 +1065,7 @@ QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og) |
| 1064 | 1065 | |
| 1065 | 1066 | // Reserve numbers for the objects that belong to this object stream. |
| 1066 | 1067 | for (auto const& iter: m->object_stream_to_objects[objid]) { |
| 1067 | - m->obj_renumber[iter] = m->next_objid++; | |
| 1068 | + m->obj[iter].renumber = m->next_objid++; | |
| 1068 | 1069 | } |
| 1069 | 1070 | } |
| 1070 | 1071 | |
| ... | ... | @@ -1093,18 +1094,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) |
| 1093 | 1094 | } |
| 1094 | 1095 | |
| 1095 | 1096 | QPDFObjGen og = object.getObjGen(); |
| 1097 | + auto& obj = m->obj[og]; | |
| 1096 | 1098 | |
| 1097 | - if (m->obj_renumber.count(og) == 0) { | |
| 1098 | - if (m->object_to_object_stream.count(og)) { | |
| 1099 | + if (obj.renumber == 0) { | |
| 1100 | + if (obj.object_stream > 0) { | |
| 1099 | 1101 | // This is in an object stream. Don't process it here. Instead, enqueue the object |
| 1100 | 1102 | // stream. Object streams always have generation 0. |
| 1101 | - int stream_id = m->object_to_object_stream[og]; | |
| 1102 | - // Detect loops by storing invalid object ID 0, which will get overwritten later. | |
| 1103 | - m->obj_renumber[og] = 0; | |
| 1104 | - enqueueObject(m->pdf.getObjectByID(stream_id, 0)); | |
| 1103 | + // Detect loops by storing invalid object ID -1, which will get overwritten later. | |
| 1104 | + obj.renumber = -1; | |
| 1105 | + enqueueObject(m->pdf.getObject(obj.object_stream, 0)); | |
| 1105 | 1106 | } else { |
| 1106 | 1107 | m->object_queue.push_back(object); |
| 1107 | - m->obj_renumber[og] = m->next_objid++; | |
| 1108 | + obj.renumber = m->next_objid++; | |
| 1108 | 1109 | |
| 1109 | 1110 | if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { |
| 1110 | 1111 | // For linearized files, uncompressed objects go at end, and we take care of |
| ... | ... | @@ -1117,7 +1118,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) |
| 1117 | 1118 | ++m->next_objid; |
| 1118 | 1119 | } |
| 1119 | 1120 | } |
| 1120 | - } else if (m->obj_renumber[og] == 0) { | |
| 1121 | + } else if (obj.renumber == -1) { | |
| 1121 | 1122 | // This can happen if a specially constructed file indicates that an object stream is |
| 1122 | 1123 | // inside itself. |
| 1123 | 1124 | QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); |
| ... | ... | @@ -1147,9 +1148,7 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) |
| 1147 | 1148 | enqueueObject(child); |
| 1148 | 1149 | } |
| 1149 | 1150 | if (child.isIndirect()) { |
| 1150 | - QPDFObjGen old_og = child.getObjGen(); | |
| 1151 | - int new_id = m->obj_renumber[old_og]; | |
| 1152 | - writeString(std::to_string(new_id)); | |
| 1151 | + writeString(std::to_string(m->obj[child].renumber)); | |
| 1153 | 1152 | writeString(" 0 R"); |
| 1154 | 1153 | } else { |
| 1155 | 1154 | unparseObject(child, level, flags); |
| ... | ... | @@ -1527,9 +1526,8 @@ QPDFWriter::unparseObject( |
| 1527 | 1526 | writeString(">>"); |
| 1528 | 1527 | } else if (tc == ::ot_stream) { |
| 1529 | 1528 | // Write stream data to a buffer. |
| 1530 | - int new_id = m->obj_renumber[old_og]; | |
| 1531 | 1529 | if (!m->direct_stream_lengths) { |
| 1532 | - m->cur_stream_length_id = new_id + 1; | |
| 1530 | + m->cur_stream_length_id = m->obj[old_og].renumber + 1; | |
| 1533 | 1531 | } |
| 1534 | 1532 | |
| 1535 | 1533 | flags |= f_stream; |
| ... | ... | @@ -1626,7 +1624,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1626 | 1624 | QPDFObjGen old_og = object.getObjGen(); |
| 1627 | 1625 | qpdf_assert_debug(old_og.getGen() == 0); |
| 1628 | 1626 | int old_id = old_og.getObj(); |
| 1629 | - int new_id = m->obj_renumber[old_og]; | |
| 1627 | + int new_stream_id = m->obj[old_og].renumber; | |
| 1630 | 1628 | |
| 1631 | 1629 | std::vector<qpdf_offset_t> offsets; |
| 1632 | 1630 | qpdf_offset_t first = 0; |
| ... | ... | @@ -1670,7 +1668,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1670 | 1668 | int count = -1; |
| 1671 | 1669 | for (auto const& obj: m->object_stream_to_objects[old_id]) { |
| 1672 | 1670 | ++count; |
| 1673 | - int new_obj = m->obj_renumber[obj]; | |
| 1671 | + int new_obj = m->obj[obj].renumber; | |
| 1674 | 1672 | if (first_obj == -1) { |
| 1675 | 1673 | first_obj = new_obj; |
| 1676 | 1674 | } |
| ... | ... | @@ -1706,13 +1704,13 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1706 | 1704 | } |
| 1707 | 1705 | writeObject(obj_to_write, count); |
| 1708 | 1706 | |
| 1709 | - m->xref[new_obj] = QPDFXRefEntry(new_id, count); | |
| 1707 | + m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count); | |
| 1710 | 1708 | } |
| 1711 | 1709 | } |
| 1712 | 1710 | |
| 1713 | 1711 | // Write the object |
| 1714 | - openObject(new_id); | |
| 1715 | - setDataKey(new_id); | |
| 1712 | + openObject(new_stream_id); | |
| 1713 | + setDataKey(new_stream_id); | |
| 1716 | 1714 | writeString("<<"); |
| 1717 | 1715 | writeStringQDF("\n "); |
| 1718 | 1716 | writeString(" /Type /ObjStm"); |
| ... | ... | @@ -1754,7 +1752,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1754 | 1752 | } |
| 1755 | 1753 | writeString("endstream"); |
| 1756 | 1754 | m->cur_data_key.clear(); |
| 1757 | - closeObject(new_id); | |
| 1755 | + closeObject(new_stream_id); | |
| 1758 | 1756 | } |
| 1759 | 1757 | |
| 1760 | 1758 | void |
| ... | ... | @@ -1769,7 +1767,7 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) |
| 1769 | 1767 | } |
| 1770 | 1768 | |
| 1771 | 1769 | indicateProgress(false, false); |
| 1772 | - int new_id = m->obj_renumber[old_og]; | |
| 1770 | + auto new_id = m->obj[old_og].renumber; | |
| 1773 | 1771 | if (m->qdf_mode) { |
| 1774 | 1772 | if (m->page_object_to_seq.count(old_og)) { |
| 1775 | 1773 | writeString("%% Page "); |
| ... | ... | @@ -1938,11 +1936,7 @@ QPDFWriter::initializeSpecialStreams() |
| 1938 | 1936 | void |
| 1939 | 1937 | QPDFWriter::preserveObjectStreams() |
| 1940 | 1938 | { |
| 1941 | - std::map<int, int> omap; | |
| 1942 | - QPDF::Writer::getObjectStreamData(m->pdf, omap); | |
| 1943 | - if (omap.empty()) { | |
| 1944 | - return; | |
| 1945 | - } | |
| 1939 | + auto const& xref = QPDF::Writer::getXRefTable(m->pdf); | |
| 1946 | 1940 | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object |
| 1947 | 1941 | // streams out of old objects that have generation numbers greater than zero. However in an |
| 1948 | 1942 | // existing PDF, all object stream objects and all objects in them must have generation 0 |
| ... | ... | @@ -1950,20 +1944,43 @@ QPDFWriter::preserveObjectStreams() |
| 1950 | 1944 | // that are not allowed to be in object streams. In addition to removing objects that were |
| 1951 | 1945 | // erroneously included in object streams in the source PDF, it also prevents unreferenced |
| 1952 | 1946 | // objects from being included. |
| 1953 | - std::set<QPDFObjGen> eligible; | |
| 1954 | - if (!m->preserve_unreferenced_objects) { | |
| 1955 | - std::vector<QPDFObjGen> eligible_v = QPDF::Writer::getCompressibleObjGens(m->pdf); | |
| 1956 | - eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end()); | |
| 1957 | - } | |
| 1958 | - QTC::TC("qpdf", "QPDFWriter preserve object streams", m->preserve_unreferenced_objects ? 0 : 1); | |
| 1959 | - for (auto iter: omap) { | |
| 1960 | - QPDFObjGen og(iter.first, 0); | |
| 1961 | - if (eligible.count(og) || m->preserve_unreferenced_objects) { | |
| 1962 | - m->object_to_object_stream[og] = iter.second; | |
| 1963 | - } else { | |
| 1964 | - QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | |
| 1947 | + auto iter = xref.cbegin(); | |
| 1948 | + auto end = xref.cend(); | |
| 1949 | + | |
| 1950 | + // Start by scanning for first compressed object in case we don't have any object streams to | |
| 1951 | + // process. | |
| 1952 | + for (; iter != end; ++iter) { | |
| 1953 | + if (iter->second.getType() == 2) { | |
| 1954 | + // Pdf contains object streams. | |
| 1955 | + QTC::TC( | |
| 1956 | + "qpdf", | |
| 1957 | + "QPDFWriter preserve object streams", | |
| 1958 | + m->preserve_unreferenced_objects ? 0 : 1); | |
| 1959 | + | |
| 1960 | + if (m->preserve_unreferenced_objects) { | |
| 1961 | + for (; iter != end; ++iter) { | |
| 1962 | + if (iter->second.getType() == 2) { | |
| 1963 | + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); | |
| 1964 | + } | |
| 1965 | + } | |
| 1966 | + } else { | |
| 1967 | + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); | |
| 1968 | + for (; iter != end; ++iter) { | |
| 1969 | + if (iter->second.getType() == 2) { | |
| 1970 | + auto id = static_cast<size_t>(iter->first.getObj()); | |
| 1971 | + if (id < eligible.size() && eligible[id]) { | |
| 1972 | + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); | |
| 1973 | + } else { | |
| 1974 | + QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | |
| 1975 | + } | |
| 1976 | + } | |
| 1977 | + } | |
| 1978 | + } | |
| 1979 | + return; | |
| 1965 | 1980 | } |
| 1966 | 1981 | } |
| 1982 | + // No compressed objects found. | |
| 1983 | + m->obj.streams_empty = true; | |
| 1967 | 1984 | } |
| 1968 | 1985 | |
| 1969 | 1986 | void |
| ... | ... | @@ -1979,7 +1996,10 @@ QPDFWriter::generateObjectStreams() |
| 1979 | 1996 | |
| 1980 | 1997 | std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); |
| 1981 | 1998 | size_t n_object_streams = (eligible.size() + 99U) / 100U; |
| 1999 | + | |
| 2000 | + initializeTables(2U * n_object_streams); | |
| 1982 | 2001 | if (n_object_streams == 0) { |
| 2002 | + m->obj.streams_empty = true; | |
| 1983 | 2003 | return; |
| 1984 | 2004 | } |
| 1985 | 2005 | size_t n_per = eligible.size() / n_object_streams; |
| ... | ... | @@ -1987,20 +2007,18 @@ QPDFWriter::generateObjectStreams() |
| 1987 | 2007 | ++n_per; |
| 1988 | 2008 | } |
| 1989 | 2009 | unsigned int n = 0; |
| 1990 | - int cur_ostream = 0; | |
| 1991 | - for (auto const& iter: eligible) { | |
| 1992 | - if ((n % n_per) == 0) { | |
| 1993 | - if (n > 0) { | |
| 1994 | - QTC::TC("qpdf", "QPDFWriter generate >1 ostream"); | |
| 1995 | - } | |
| 2010 | + int cur_ostream = m->pdf.newIndirectNull().getObjectID(); | |
| 2011 | + for (auto const& item: eligible) { | |
| 2012 | + if (n == n_per) { | |
| 2013 | + QTC::TC("qpdf", "QPDFWriter generate >1 ostream"); | |
| 1996 | 2014 | n = 0; |
| 1997 | - } | |
| 1998 | - if (n == 0) { | |
| 1999 | 2015 | // Construct a new null object as the "original" object stream. The rest of the code |
| 2000 | 2016 | // knows that this means we're creating the object stream from scratch. |
| 2001 | - cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); | |
| 2017 | + cur_ostream = m->pdf.newIndirectNull().getObjectID(); | |
| 2002 | 2018 | } |
| 2003 | - m->object_to_object_stream[iter] = cur_ostream; | |
| 2019 | + auto& obj = m->obj[item]; | |
| 2020 | + obj.object_stream = cur_ostream; | |
| 2021 | + obj.gen = item.getGen(); | |
| 2004 | 2022 | ++n; |
| 2005 | 2023 | } |
| 2006 | 2024 | } |
| ... | ... | @@ -2056,6 +2074,14 @@ QPDFWriter::prepareFileForWrite() |
| 2056 | 2074 | } |
| 2057 | 2075 | |
| 2058 | 2076 | void |
| 2077 | +QPDFWriter::initializeTables(size_t extra) | |
| 2078 | +{ | |
| 2079 | + auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra; | |
| 2080 | + m->obj.initialize(size); | |
| 2081 | + m->new_obj.initialize(size); | |
| 2082 | +} | |
| 2083 | + | |
| 2084 | +void | |
| 2059 | 2085 | QPDFWriter::doWriteSetup() |
| 2060 | 2086 | { |
| 2061 | 2087 | if (m->did_write_setup) { |
| ... | ... | @@ -2124,10 +2150,12 @@ QPDFWriter::doWriteSetup() |
| 2124 | 2150 | |
| 2125 | 2151 | switch (m->object_stream_mode) { |
| 2126 | 2152 | case qpdf_o_disable: |
| 2127 | - // no action required | |
| 2153 | + initializeTables(); | |
| 2154 | + m->obj.streams_empty = true; | |
| 2128 | 2155 | break; |
| 2129 | 2156 | |
| 2130 | 2157 | case qpdf_o_preserve: |
| 2158 | + initializeTables(); | |
| 2131 | 2159 | preserveObjectStreams(); |
| 2132 | 2160 | break; |
| 2133 | 2161 | |
| ... | ... | @@ -2138,39 +2166,45 @@ QPDFWriter::doWriteSetup() |
| 2138 | 2166 | // no default so gcc will warn for missing case tag |
| 2139 | 2167 | } |
| 2140 | 2168 | |
| 2141 | - if (m->linearized) { | |
| 2142 | - // Page dictionaries are not allowed to be compressed objects. | |
| 2143 | - for (auto& page: m->pdf.getAllPages()) { | |
| 2144 | - QPDFObjGen og = page.getObjGen(); | |
| 2145 | - if (m->object_to_object_stream.count(og)) { | |
| 2146 | - QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); | |
| 2147 | - m->object_to_object_stream.erase(og); | |
| 2169 | + if (!m->obj.streams_empty) { | |
| 2170 | + if (m->linearized) { | |
| 2171 | + // Page dictionaries are not allowed to be compressed objects. | |
| 2172 | + for (auto& page: m->pdf.getAllPages()) { | |
| 2173 | + if (m->obj[page].object_stream > 0) { | |
| 2174 | + QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); | |
| 2175 | + m->obj[page].object_stream = 0; | |
| 2176 | + } | |
| 2148 | 2177 | } |
| 2149 | 2178 | } |
| 2150 | - } | |
| 2151 | 2179 | |
| 2152 | - if (m->linearized || m->encrypted) { | |
| 2153 | - // The document catalog is not allowed to be compressed in linearized files either. It also | |
| 2154 | - // appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to handle | |
| 2155 | - // encrypted files with compressed document catalogs, so we disable them in that case as | |
| 2156 | - // well. | |
| 2157 | - if (m->object_to_object_stream.count(m->root_og)) { | |
| 2158 | - QTC::TC("qpdf", "QPDFWriter uncompressing root"); | |
| 2159 | - m->object_to_object_stream.erase(m->root_og); | |
| 2180 | + if (m->linearized || m->encrypted) { | |
| 2181 | + // The document catalog is not allowed to be compressed in linearized files either. It | |
| 2182 | + // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to | |
| 2183 | + // handle encrypted files with compressed document catalogs, so we disable them in that | |
| 2184 | + // case as well. | |
| 2185 | + if (m->obj[m->root_og].object_stream > 0) { | |
| 2186 | + QTC::TC("qpdf", "QPDFWriter uncompressing root"); | |
| 2187 | + m->obj[m->root_og].object_stream = 0; | |
| 2188 | + } | |
| 2160 | 2189 | } |
| 2161 | - } | |
| 2162 | 2190 | |
| 2163 | - // Generate reverse mapping from object stream to objects | |
| 2164 | - for (auto const& iter: m->object_to_object_stream) { | |
| 2165 | - QPDFObjGen const& obj = iter.first; | |
| 2166 | - int stream = iter.second; | |
| 2167 | - m->object_stream_to_objects[stream].insert(obj); | |
| 2168 | - m->max_ostream_index = std::max( | |
| 2169 | - m->max_ostream_index, QIntC::to_int(m->object_stream_to_objects[stream].size()) - 1); | |
| 2170 | - } | |
| 2191 | + // Generate reverse mapping from object stream to objects | |
| 2192 | + m->obj.forEach([this](auto id, auto const& item) -> void { | |
| 2193 | + if (item.object_stream > 0) { | |
| 2194 | + auto& vec = m->object_stream_to_objects[item.object_stream]; | |
| 2195 | + vec.emplace_back(id, item.gen); | |
| 2196 | + if (m->max_ostream_index < vec.size()) { | |
| 2197 | + ++m->max_ostream_index; | |
| 2198 | + } | |
| 2199 | + } | |
| 2200 | + }); | |
| 2201 | + --m->max_ostream_index; | |
| 2171 | 2202 | |
| 2172 | - if (!m->object_stream_to_objects.empty()) { | |
| 2173 | - setMinimumPDFVersion("1.5"); | |
| 2203 | + if (m->object_stream_to_objects.empty()) { | |
| 2204 | + m->obj.streams_empty = true; | |
| 2205 | + } else { | |
| 2206 | + setMinimumPDFVersion("1.5"); | |
| 2207 | + } | |
| 2174 | 2208 | } |
| 2175 | 2209 | |
| 2176 | 2210 | setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); |
| ... | ... | @@ -2215,7 +2249,7 @@ QPDFWriter::write() |
| 2215 | 2249 | QPDFObjGen |
| 2216 | 2250 | QPDFWriter::getRenumberedObjGen(QPDFObjGen og) |
| 2217 | 2251 | { |
| 2218 | - return QPDFObjGen(m->obj_renumber[og], 0); | |
| 2252 | + return QPDFObjGen(m->obj[og].renumber, 0); | |
| 2219 | 2253 | } |
| 2220 | 2254 | |
| 2221 | 2255 | std::map<QPDFObjGen, QPDFXRefEntry> |
| ... | ... | @@ -2223,12 +2257,12 @@ QPDFWriter::getWrittenXRefTable() |
| 2223 | 2257 | { |
| 2224 | 2258 | std::map<QPDFObjGen, QPDFXRefEntry> result; |
| 2225 | 2259 | |
| 2226 | - for (auto const& iter: m->xref) { | |
| 2227 | - if (iter.first != 0 && iter.second.getType() != 0) { | |
| 2228 | - result[QPDFObjGen(iter.first, 0)] = iter.second; | |
| 2260 | + auto it = result.begin(); | |
| 2261 | + m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void { | |
| 2262 | + if (item.xref.getType() != 0) { | |
| 2263 | + it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref); | |
| 2229 | 2264 | } |
| 2230 | - } | |
| 2231 | - | |
| 2265 | + }); | |
| 2232 | 2266 | return result; |
| 2233 | 2267 | } |
| 2234 | 2268 | |
| ... | ... | @@ -2290,8 +2324,7 @@ QPDFWriter::writeHintStream(int hint_id) |
| 2290 | 2324 | int S = 0; |
| 2291 | 2325 | int O = 0; |
| 2292 | 2326 | bool compressed = (m->compress_streams && !m->qdf_mode); |
| 2293 | - QPDF::Writer::generateHintStream( | |
| 2294 | - m->pdf, m->xref, m->lengths, m->obj_renumber_no_gen, hint_buffer, S, O, compressed); | |
| 2327 | + QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed); | |
| 2295 | 2328 | |
| 2296 | 2329 | openObject(hint_id); |
| 2297 | 2330 | setDataKey(hint_id); |
| ... | ... | @@ -2364,7 +2397,7 @@ QPDFWriter::writeXRefTable( |
| 2364 | 2397 | } else { |
| 2365 | 2398 | qpdf_offset_t offset = 0; |
| 2366 | 2399 | if (!suppress_offsets) { |
| 2367 | - offset = m->xref[i].getOffset(); | |
| 2400 | + offset = m->new_obj[i].xref.getOffset(); | |
| 2368 | 2401 | if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { |
| 2369 | 2402 | offset += hint_length; |
| 2370 | 2403 | } |
| ... | ... | @@ -2411,13 +2444,13 @@ QPDFWriter::writeXRefStream( |
| 2411 | 2444 | unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); |
| 2412 | 2445 | |
| 2413 | 2446 | // field 2 contains object stream indices |
| 2414 | - unsigned int f2_size = bytesNeeded(m->max_ostream_index); | |
| 2447 | + unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index)); | |
| 2415 | 2448 | |
| 2416 | 2449 | unsigned int esize = 1 + f1_size + f2_size; |
| 2417 | 2450 | |
| 2418 | 2451 | // Must store in xref table in advance of writing the actual data rather than waiting for |
| 2419 | 2452 | // openObject to do it. |
| 2420 | - m->xref[xref_id] = QPDFXRefEntry(m->pipeline->getCount()); | |
| 2453 | + m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount()); | |
| 2421 | 2454 | |
| 2422 | 2455 | Pipeline* p = pushPipeline(new Pl_Buffer("xref stream")); |
| 2423 | 2456 | bool compressed = false; |
| ... | ... | @@ -2435,7 +2468,7 @@ QPDFWriter::writeXRefStream( |
| 2435 | 2468 | PipelinePopper pp_xref(this, &xref_data); |
| 2436 | 2469 | activatePipelineStack(pp_xref); |
| 2437 | 2470 | for (int i = first; i <= last; ++i) { |
| 2438 | - QPDFXRefEntry& e = m->xref[i]; | |
| 2471 | + QPDFXRefEntry& e = m->new_obj[i].xref; | |
| 2439 | 2472 | switch (e.getType()) { |
| 2440 | 2473 | case 0: |
| 2441 | 2474 | writeBinary(0, 1); |
| ... | ... | @@ -2507,39 +2540,10 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) |
| 2507 | 2540 | } |
| 2508 | 2541 | |
| 2509 | 2542 | void |
| 2510 | -QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out) | |
| 2511 | -{ | |
| 2512 | - // There are deep assumptions in the linearization code in QPDF that there is only one object | |
| 2513 | - // with each object number; i.e., you can't have two objects with the same object number and | |
| 2514 | - // different generations. This is a pretty safe assumption because Adobe Reader and Acrobat | |
| 2515 | - // can't actually handle this case. There is not much if any code in QPDF outside linearization | |
| 2516 | - // that assumes this, but the linearization code as currently implemented would do weird things | |
| 2517 | - // if we found such a case. In order to avoid breaking ABI changes in QPDF, we will first | |
| 2518 | - // assert that this condition holds. Then we can create new maps for QPDF that throw away | |
| 2519 | - // generation numbers. | |
| 2520 | - | |
| 2521 | - out.clear(); | |
| 2522 | - for (auto const& iter: in) { | |
| 2523 | - if (out.count(iter.first.getObj())) { | |
| 2524 | - throw std::runtime_error("QPDF cannot currently linearize files that contain" | |
| 2525 | - " multiple objects with the same object ID and different" | |
| 2526 | - " generations. If you see this error message, please file" | |
| 2527 | - " a bug report and attach the file if possible. As a" | |
| 2528 | - " workaround, first convert the file with qpdf without" | |
| 2529 | - " linearizing, and then linearize the result of that" | |
| 2530 | - " conversion."); | |
| 2531 | - } | |
| 2532 | - out[iter.first.getObj()] = iter.second; | |
| 2533 | - } | |
| 2534 | -} | |
| 2535 | - | |
| 2536 | -void | |
| 2537 | 2543 | QPDFWriter::writeLinearized() |
| 2538 | 2544 | { |
| 2539 | 2545 | // Optimize file and enqueue objects in order |
| 2540 | 2546 | |
| 2541 | - discardGeneration(m->object_to_object_stream, m->object_to_object_stream_no_gen); | |
| 2542 | - | |
| 2543 | 2547 | auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { |
| 2544 | 2548 | bool compress_stream; |
| 2545 | 2549 | bool is_metadata; |
| ... | ... | @@ -2550,15 +2554,14 @@ QPDFWriter::writeLinearized() |
| 2550 | 2554 | } |
| 2551 | 2555 | }; |
| 2552 | 2556 | |
| 2553 | - m->pdf.optimize(m->object_to_object_stream_no_gen, true, skip_stream_parameters); | |
| 2557 | + QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters); | |
| 2554 | 2558 | |
| 2555 | 2559 | std::vector<QPDFObjectHandle> part4; |
| 2556 | 2560 | std::vector<QPDFObjectHandle> part6; |
| 2557 | 2561 | std::vector<QPDFObjectHandle> part7; |
| 2558 | 2562 | std::vector<QPDFObjectHandle> part8; |
| 2559 | 2563 | std::vector<QPDFObjectHandle> part9; |
| 2560 | - QPDF::Writer::getLinearizedParts( | |
| 2561 | - m->pdf, m->object_to_object_stream_no_gen, part4, part6, part7, part8, part9); | |
| 2564 | + QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9); | |
| 2562 | 2565 | |
| 2563 | 2566 | // Object number sequence: |
| 2564 | 2567 | // |
| ... | ... | @@ -2582,7 +2585,7 @@ QPDFWriter::writeLinearized() |
| 2582 | 2585 | int after_second_half = 1 + second_half_uncompressed; |
| 2583 | 2586 | m->next_objid = after_second_half; |
| 2584 | 2587 | int second_half_xref = 0; |
| 2585 | - bool need_xref_stream = (!m->object_to_object_stream.empty()); | |
| 2588 | + bool need_xref_stream = !m->obj.streams_empty; | |
| 2586 | 2589 | if (need_xref_stream) { |
| 2587 | 2590 | second_half_xref = m->next_objid++; |
| 2588 | 2591 | } |
| ... | ... | @@ -2690,14 +2693,14 @@ QPDFWriter::writeLinearized() |
| 2690 | 2693 | writeString("<<"); |
| 2691 | 2694 | if (pass == 2) { |
| 2692 | 2695 | std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages(); |
| 2693 | - int first_page_object = m->obj_renumber[pages.at(0).getObjGen()]; | |
| 2696 | + int first_page_object = m->obj[pages.at(0)].renumber; | |
| 2694 | 2697 | int npages = QIntC::to_int(pages.size()); |
| 2695 | 2698 | |
| 2696 | 2699 | writeString(" /Linearized 1 /L "); |
| 2697 | 2700 | writeString(std::to_string(file_size + hint_length)); |
| 2698 | 2701 | // Implementation note 121 states that a space is mandatory after this open bracket. |
| 2699 | 2702 | writeString(" /H [ "); |
| 2700 | - writeString(std::to_string(m->xref[hint_id].getOffset())); | |
| 2703 | + writeString(std::to_string(m->new_obj[hint_id].xref.getOffset())); | |
| 2701 | 2704 | writeString(" "); |
| 2702 | 2705 | writeString(std::to_string(hint_length)); |
| 2703 | 2706 | writeString(" ] /O "); |
| ... | ... | @@ -2724,7 +2727,7 @@ QPDFWriter::writeLinearized() |
| 2724 | 2727 | qpdf_offset_t first_xref_offset = m->pipeline->getCount(); |
| 2725 | 2728 | qpdf_offset_t hint_offset = 0; |
| 2726 | 2729 | if (pass == 2) { |
| 2727 | - hint_offset = m->xref[hint_id].getOffset(); | |
| 2730 | + hint_offset = m->new_obj[hint_id].xref.getOffset(); | |
| 2728 | 2731 | } |
| 2729 | 2732 | if (need_xref_stream) { |
| 2730 | 2733 | // Must pad here too. |
| ... | ... | @@ -2795,7 +2798,7 @@ QPDFWriter::writeLinearized() |
| 2795 | 2798 | writeEncryptionDictionary(); |
| 2796 | 2799 | } |
| 2797 | 2800 | if (pass == 1) { |
| 2798 | - m->xref[hint_id] = QPDFXRefEntry(m->pipeline->getCount()); | |
| 2801 | + m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount()); | |
| 2799 | 2802 | } else { |
| 2800 | 2803 | // Part 5: hint stream |
| 2801 | 2804 | writeBuffer(hint_buffer); |
| ... | ... | @@ -2855,8 +2858,6 @@ QPDFWriter::writeLinearized() |
| 2855 | 2858 | writeString(std::to_string(first_xref_offset)); |
| 2856 | 2859 | writeString("\n%%EOF\n"); |
| 2857 | 2860 | |
| 2858 | - discardGeneration(m->obj_renumber, m->obj_renumber_no_gen); | |
| 2859 | - | |
| 2860 | 2861 | if (pass == 1) { |
| 2861 | 2862 | if (m->deterministic_id) { |
| 2862 | 2863 | QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); |
| ... | ... | @@ -2870,7 +2871,7 @@ QPDFWriter::writeLinearized() |
| 2870 | 2871 | pp_pass1 = nullptr; |
| 2871 | 2872 | |
| 2872 | 2873 | // Save hint offset since it will be set to zero by calling openObject. |
| 2873 | - qpdf_offset_t hint_offset1 = m->xref[hint_id].getOffset(); | |
| 2874 | + qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset(); | |
| 2874 | 2875 | |
| 2875 | 2876 | // Write hint stream to a buffer |
| 2876 | 2877 | { |
| ... | ... | @@ -2882,7 +2883,7 @@ QPDFWriter::writeLinearized() |
| 2882 | 2883 | hint_length = QIntC::to_offset(hint_buffer->getSize()); |
| 2883 | 2884 | |
| 2884 | 2885 | // Restore hint offset |
| 2885 | - m->xref[hint_id] = QPDFXRefEntry(hint_offset1); | |
| 2886 | + m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1); | |
| 2886 | 2887 | if (lin_pass1_file) { |
| 2887 | 2888 | // Write some debugging information |
| 2888 | 2889 | fprintf( | ... | ... |
libqpdf/QPDF_linearization.cc
| ... | ... | @@ -9,6 +9,7 @@ |
| 9 | 9 | #include <qpdf/Pl_Flate.hh> |
| 10 | 10 | #include <qpdf/QPDFExc.hh> |
| 11 | 11 | #include <qpdf/QPDFLogger.hh> |
| 12 | +#include <qpdf/QPDFWriter_private.hh> | |
| 12 | 13 | #include <qpdf/QTC.hh> |
| 13 | 14 | #include <qpdf/QUtil.hh> |
| 14 | 15 | |
| ... | ... | @@ -585,6 +586,17 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map<int, int> const& obj |
| 585 | 586 | } |
| 586 | 587 | } |
| 587 | 588 | |
| 589 | +QPDFObjectHandle | |
| 590 | +QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj) | |
| 591 | +{ | |
| 592 | + if (obj.contains(oh)) { | |
| 593 | + if (auto id = obj[oh].object_stream; id > 0) { | |
| 594 | + return oh.isNull() ? oh : getObject(id, 0); | |
| 595 | + } | |
| 596 | + } | |
| 597 | + return oh; | |
| 598 | +} | |
| 599 | + | |
| 588 | 600 | int |
| 589 | 601 | QPDF::lengthNextN(int first_object, int n) |
| 590 | 602 | { |
| ... | ... | @@ -959,8 +971,9 @@ QPDF::dumpHGeneric(HGeneric& t) |
| 959 | 971 | << "group_length: " << t.group_length << "\n"; |
| 960 | 972 | } |
| 961 | 973 | |
| 974 | +template <typename T> | |
| 962 | 975 | void |
| 963 | -QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) | |
| 976 | +QPDF::calculateLinearizationData(T const& object_stream_data) | |
| 964 | 977 | { |
| 965 | 978 | // This function calculates the ordering of objects, divides them into the appropriate parts, |
| 966 | 979 | // and computes some values for the linearization parameter dictionary and hint tables. The |
| ... | ... | @@ -1402,11 +1415,12 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) |
| 1402 | 1415 | } |
| 1403 | 1416 | } |
| 1404 | 1417 | |
| 1418 | +template <typename T> | |
| 1405 | 1419 | void |
| 1406 | 1420 | QPDF::pushOutlinesToPart( |
| 1407 | 1421 | std::vector<QPDFObjectHandle>& part, |
| 1408 | 1422 | std::set<QPDFObjGen>& lc_outlines, |
| 1409 | - std::map<int, int> const& object_stream_data) | |
| 1423 | + T const& object_stream_data) | |
| 1410 | 1424 | { |
| 1411 | 1425 | QPDFObjectHandle root = getRoot(); |
| 1412 | 1426 | QPDFObjectHandle outlines = root.getKey("/Outlines"); |
| ... | ... | @@ -1433,14 +1447,14 @@ QPDF::pushOutlinesToPart( |
| 1433 | 1447 | |
| 1434 | 1448 | void |
| 1435 | 1449 | QPDF::getLinearizedParts( |
| 1436 | - std::map<int, int> const& object_stream_data, | |
| 1450 | + QPDFWriter::ObjTable const& obj, | |
| 1437 | 1451 | std::vector<QPDFObjectHandle>& part4, |
| 1438 | 1452 | std::vector<QPDFObjectHandle>& part6, |
| 1439 | 1453 | std::vector<QPDFObjectHandle>& part7, |
| 1440 | 1454 | std::vector<QPDFObjectHandle>& part8, |
| 1441 | 1455 | std::vector<QPDFObjectHandle>& part9) |
| 1442 | 1456 | { |
| 1443 | - calculateLinearizationData(object_stream_data); | |
| 1457 | + calculateLinearizationData(obj); | |
| 1444 | 1458 | part4 = m->part4; |
| 1445 | 1459 | part6 = m->part6; |
| 1446 | 1460 | part7 = m->part7; |
| ... | ... | @@ -1456,33 +1470,29 @@ nbits(int val) |
| 1456 | 1470 | |
| 1457 | 1471 | int |
| 1458 | 1472 | QPDF::outputLengthNextN( |
| 1459 | - int in_object, | |
| 1460 | - int n, | |
| 1461 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1462 | - std::map<int, int> const& obj_renumber) | |
| 1473 | + int in_object, int n, QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) | |
| 1463 | 1474 | { |
| 1464 | 1475 | // Figure out the length of a series of n consecutive objects in the output file starting with |
| 1465 | 1476 | // whatever object in_object from the input file mapped to. |
| 1466 | 1477 | |
| 1467 | - if (obj_renumber.count(in_object) == 0) { | |
| 1478 | + int first = obj[in_object].renumber; | |
| 1479 | + int last = first + n; | |
| 1480 | + if (first <= 0) { | |
| 1468 | 1481 | stopOnError("found object that is not renumbered while writing linearization data"); |
| 1469 | 1482 | } |
| 1470 | - int first = (*(obj_renumber.find(in_object))).second; | |
| 1471 | - int length = 0; | |
| 1472 | - for (int i = 0; i < n; ++i) { | |
| 1473 | - if (lengths.count(first + i) == 0) { | |
| 1483 | + qpdf_offset_t length = 0; | |
| 1484 | + for (int i = first; i < last; ++i) { | |
| 1485 | + auto l = new_obj[i].length; | |
| 1486 | + if (l == 0) { | |
| 1474 | 1487 | stopOnError("found item with unknown length while writing linearization data"); |
| 1475 | 1488 | } |
| 1476 | - length += toI((*(lengths.find(first + toI(i)))).second); | |
| 1489 | + length += l; | |
| 1477 | 1490 | } |
| 1478 | - return length; | |
| 1491 | + return toI(length); | |
| 1479 | 1492 | } |
| 1480 | 1493 | |
| 1481 | 1494 | void |
| 1482 | -QPDF::calculateHPageOffset( | |
| 1483 | - std::map<int, QPDFXRefEntry> const& xref, | |
| 1484 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1485 | - std::map<int, int> const& obj_renumber) | |
| 1495 | +QPDF::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) | |
| 1486 | 1496 | { |
| 1487 | 1497 | // Page Offset Hint Table |
| 1488 | 1498 | |
| ... | ... | @@ -1497,8 +1507,7 @@ QPDF::calculateHPageOffset( |
| 1497 | 1507 | |
| 1498 | 1508 | int min_nobjects = cphe.at(0).nobjects; |
| 1499 | 1509 | int max_nobjects = min_nobjects; |
| 1500 | - int min_length = | |
| 1501 | - outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, lengths, obj_renumber); | |
| 1510 | + int min_length = outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, new_obj, obj); | |
| 1502 | 1511 | int max_length = min_length; |
| 1503 | 1512 | int max_shared = cphe.at(0).nshared_objects; |
| 1504 | 1513 | |
| ... | ... | @@ -1515,7 +1524,7 @@ QPDF::calculateHPageOffset( |
| 1515 | 1524 | // assignments. |
| 1516 | 1525 | |
| 1517 | 1526 | int nobjects = cphe.at(i).nobjects; |
| 1518 | - int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); | |
| 1527 | + int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, new_obj, obj); | |
| 1519 | 1528 | int nshared = cphe.at(i).nshared_objects; |
| 1520 | 1529 | |
| 1521 | 1530 | min_nobjects = std::min(min_nobjects, nobjects); |
| ... | ... | @@ -1530,9 +1539,7 @@ QPDF::calculateHPageOffset( |
| 1530 | 1539 | } |
| 1531 | 1540 | |
| 1532 | 1541 | ph.min_nobjects = min_nobjects; |
| 1533 | - int in_page0_id = pages.at(0).getObjectID(); | |
| 1534 | - int out_page0_id = (*(obj_renumber.find(in_page0_id))).second; | |
| 1535 | - ph.first_page_offset = (*(xref.find(out_page0_id))).second.getOffset(); | |
| 1542 | + ph.first_page_offset = new_obj[obj[pages.at(0)].renumber].xref.getOffset(); | |
| 1536 | 1543 | ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects); |
| 1537 | 1544 | ph.min_page_length = min_length; |
| 1538 | 1545 | ph.nbits_delta_page_length = nbits(max_length - min_length); |
| ... | ... | @@ -1567,9 +1574,7 @@ QPDF::calculateHPageOffset( |
| 1567 | 1574 | |
| 1568 | 1575 | void |
| 1569 | 1576 | QPDF::calculateHSharedObject( |
| 1570 | - std::map<int, QPDFXRefEntry> const& xref, | |
| 1571 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1572 | - std::map<int, int> const& obj_renumber) | |
| 1577 | + QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) | |
| 1573 | 1578 | { |
| 1574 | 1579 | CHSharedObject& cso = m->c_shared_object_data; |
| 1575 | 1580 | std::vector<CHSharedObjectEntry>& csoe = cso.entries; |
| ... | ... | @@ -1577,12 +1582,12 @@ QPDF::calculateHSharedObject( |
| 1577 | 1582 | std::vector<HSharedObjectEntry>& soe = so.entries; |
| 1578 | 1583 | soe.clear(); |
| 1579 | 1584 | |
| 1580 | - int min_length = outputLengthNextN(csoe.at(0).object, 1, lengths, obj_renumber); | |
| 1585 | + int min_length = outputLengthNextN(csoe.at(0).object, 1, new_obj, obj); | |
| 1581 | 1586 | int max_length = min_length; |
| 1582 | 1587 | |
| 1583 | 1588 | for (size_t i = 0; i < toS(cso.nshared_total); ++i) { |
| 1584 | 1589 | // Assign absolute numbers to deltas; adjust later |
| 1585 | - int length = outputLengthNextN(csoe.at(i).object, 1, lengths, obj_renumber); | |
| 1590 | + int length = outputLengthNextN(csoe.at(i).object, 1, new_obj, obj); | |
| 1586 | 1591 | min_length = std::min(min_length, length); |
| 1587 | 1592 | max_length = std::max(max_length, length); |
| 1588 | 1593 | soe.emplace_back(); |
| ... | ... | @@ -1595,8 +1600,9 @@ QPDF::calculateHSharedObject( |
| 1595 | 1600 | so.nshared_total = cso.nshared_total; |
| 1596 | 1601 | so.nshared_first_page = cso.nshared_first_page; |
| 1597 | 1602 | if (so.nshared_total > so.nshared_first_page) { |
| 1598 | - so.first_shared_obj = (*(obj_renumber.find(cso.first_shared_obj))).second; | |
| 1599 | - so.first_shared_offset = (*(xref.find(so.first_shared_obj))).second.getOffset(); | |
| 1603 | + so.first_shared_obj = obj[cso.first_shared_obj].renumber; | |
| 1604 | + so.min_group_length = min_length; | |
| 1605 | + so.first_shared_offset = new_obj[so.first_shared_obj].xref.getOffset(); | |
| 1600 | 1606 | } |
| 1601 | 1607 | so.min_group_length = min_length; |
| 1602 | 1608 | so.nbits_delta_group_length = nbits(max_length - min_length); |
| ... | ... | @@ -1611,10 +1617,7 @@ QPDF::calculateHSharedObject( |
| 1611 | 1617 | } |
| 1612 | 1618 | |
| 1613 | 1619 | void |
| 1614 | -QPDF::calculateHOutline( | |
| 1615 | - std::map<int, QPDFXRefEntry> const& xref, | |
| 1616 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1617 | - std::map<int, int> const& obj_renumber) | |
| 1620 | +QPDF::calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) | |
| 1618 | 1621 | { |
| 1619 | 1622 | HGeneric& cho = m->c_outline_data; |
| 1620 | 1623 | |
| ... | ... | @@ -1624,10 +1627,10 @@ QPDF::calculateHOutline( |
| 1624 | 1627 | |
| 1625 | 1628 | HGeneric& ho = m->outline_hints; |
| 1626 | 1629 | |
| 1627 | - ho.first_object = (*(obj_renumber.find(cho.first_object))).second; | |
| 1628 | - ho.first_object_offset = (*(xref.find(ho.first_object))).second.getOffset(); | |
| 1630 | + ho.first_object = obj[cho.first_object].renumber; | |
| 1631 | + ho.first_object_offset = new_obj[ho.first_object].xref.getOffset(); | |
| 1629 | 1632 | ho.nobjects = cho.nobjects; |
| 1630 | - ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, lengths, obj_renumber); | |
| 1633 | + ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, new_obj, obj); | |
| 1631 | 1634 | } |
| 1632 | 1635 | |
| 1633 | 1636 | template <class T, class int_type> |
| ... | ... | @@ -1756,18 +1759,17 @@ QPDF::writeHGeneric(BitWriter& w, HGeneric& t) |
| 1756 | 1759 | |
| 1757 | 1760 | void |
| 1758 | 1761 | QPDF::generateHintStream( |
| 1759 | - std::map<int, QPDFXRefEntry> const& xref, | |
| 1760 | - std::map<int, qpdf_offset_t> const& lengths, | |
| 1761 | - std::map<int, int> const& obj_renumber, | |
| 1762 | + QPDFWriter::NewObjTable const& new_obj, | |
| 1763 | + QPDFWriter::ObjTable const& obj, | |
| 1762 | 1764 | std::shared_ptr<Buffer>& hint_buffer, |
| 1763 | 1765 | int& S, |
| 1764 | 1766 | int& O, |
| 1765 | 1767 | bool compressed) |
| 1766 | 1768 | { |
| 1767 | 1769 | // Populate actual hint table values |
| 1768 | - calculateHPageOffset(xref, lengths, obj_renumber); | |
| 1769 | - calculateHSharedObject(xref, lengths, obj_renumber); | |
| 1770 | - calculateHOutline(xref, lengths, obj_renumber); | |
| 1770 | + calculateHPageOffset(new_obj, obj); | |
| 1771 | + calculateHSharedObject(new_obj, obj); | |
| 1772 | + calculateHOutline(new_obj, obj); | |
| 1771 | 1773 | |
| 1772 | 1774 | // Write the hint stream itself into a compressed memory buffer. Write through a counter so we |
| 1773 | 1775 | // can get offsets. | ... | ... |
libqpdf/QPDF_optimization.cc
| ... | ... | @@ -5,6 +5,7 @@ |
| 5 | 5 | #include <qpdf/QPDF.hh> |
| 6 | 6 | |
| 7 | 7 | #include <qpdf/QPDFExc.hh> |
| 8 | +#include <qpdf/QPDFWriter_private.hh> | |
| 8 | 9 | #include <qpdf/QPDF_Array.hh> |
| 9 | 10 | #include <qpdf/QPDF_Dictionary.hh> |
| 10 | 11 | #include <qpdf/QTC.hh> |
| ... | ... | @@ -59,6 +60,23 @@ QPDF::optimize( |
| 59 | 60 | bool allow_changes, |
| 60 | 61 | std::function<int(QPDFObjectHandle&)> skip_stream_parameters) |
| 61 | 62 | { |
| 63 | + optimize_internal(object_stream_data, allow_changes, skip_stream_parameters); | |
| 64 | +} | |
| 65 | + | |
| 66 | +void | |
| 67 | +QPDF::optimize( | |
| 68 | + QPDFWriter::ObjTable const& obj, std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | |
| 69 | +{ | |
| 70 | + optimize_internal(obj, true, skip_stream_parameters); | |
| 71 | +} | |
| 72 | + | |
| 73 | +template <typename T> | |
| 74 | +void | |
| 75 | +QPDF::optimize_internal( | |
| 76 | + T const& object_stream_data, | |
| 77 | + bool allow_changes, | |
| 78 | + std::function<int(QPDFObjectHandle&)> skip_stream_parameters) | |
| 79 | +{ | |
| 62 | 80 | if (!m->obj_user_to_objects.empty()) { |
| 63 | 81 | // already optimized |
| 64 | 82 | return; |
| ... | ... | @@ -379,3 +397,45 @@ QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data) |
| 379 | 397 | m->obj_user_to_objects = t_obj_user_to_objects; |
| 380 | 398 | m->object_to_obj_users = t_object_to_obj_users; |
| 381 | 399 | } |
| 400 | + | |
| 401 | +void | |
| 402 | +QPDF::filterCompressedObjects(QPDFWriter::ObjTable const& obj) | |
| 403 | +{ | |
| 404 | + if (obj.getStreamsEmpty()) { | |
| 405 | + return; | |
| 406 | + } | |
| 407 | + | |
| 408 | + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed | |
| 409 | + // objects. If something is a user of a compressed object, then it is really a user of the | |
| 410 | + // object stream that contains it. | |
| 411 | + | |
| 412 | + std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects; | |
| 413 | + std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users; | |
| 414 | + | |
| 415 | + for (auto const& i1: m->obj_user_to_objects) { | |
| 416 | + ObjUser const& ou = i1.first; | |
| 417 | + // Loop over objects. | |
| 418 | + for (auto const& og: i1.second) { | |
| 419 | + if (auto const& i2 = obj[og].object_stream; i2 <= 0) { | |
| 420 | + t_obj_user_to_objects[ou].insert(og); | |
| 421 | + } else { | |
| 422 | + t_obj_user_to_objects[ou].insert(QPDFObjGen(i2, 0)); | |
| 423 | + } | |
| 424 | + } | |
| 425 | + } | |
| 426 | + | |
| 427 | + for (auto const& i1: m->object_to_obj_users) { | |
| 428 | + QPDFObjGen const& og = i1.first; | |
| 429 | + // Loop over obj_users. | |
| 430 | + for (auto const& ou: i1.second) { | |
| 431 | + if (auto i2 = obj[og].object_stream; i2 <= 0) { | |
| 432 | + t_object_to_obj_users[og].insert(ou); | |
| 433 | + } else { | |
| 434 | + t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou); | |
| 435 | + } | |
| 436 | + } | |
| 437 | + } | |
| 438 | + | |
| 439 | + m->obj_user_to_objects = t_obj_user_to_objects; | |
| 440 | + m->object_to_obj_users = t_object_to_obj_users; | |
| 441 | +} | ... | ... |
libqpdf/qpdf/ObjTable.hh
0 โ 100644
| 1 | +#ifndef OBJTABLE_HH | |
| 2 | +#define OBJTABLE_HH | |
| 3 | + | |
| 4 | +#include <qpdf/QPDFObjGen.hh> | |
| 5 | +#include <qpdf/QPDFObjectHandle.hh> | |
| 6 | + | |
| 7 | +#include "qpdf/QIntC.hh" | |
| 8 | +#include <limits> | |
| 9 | + | |
| 10 | +// A table of objects indexed by object id. This is intended as a more efficient replacement for | |
| 11 | +// std::map<QPDFObjGen, T> containers. | |
| 12 | +// | |
| 13 | +// The table is implemented as a std::vector, with the object id implicitly represented by the index | |
| 14 | +// of the object. This has a number of implications, including: | |
| 15 | +// - operations that change the index of existing elements such as insertion and deletions are not | |
| 16 | +// permitted. | |
| 17 | +// - operations that extend the table may invalidate iterators and references to objects. | |
| 18 | +// | |
| 19 | +// The provided overloads of the access operator[] are safe. For out of bounds access they will | |
| 20 | +// either extend the table or throw a runtime error. | |
| 21 | +// | |
| 22 | +// ObjTable has a map 'sparse_elements' to deal with very sparse / extremely large object tables | |
| 23 | +// (usually as the result of invalid dangling references). This map may contain objects not found in | |
| 24 | +// the xref table of the original pdf if there are dangling references with an id significantly | |
| 25 | +// larger than the largest valid object id found in original pdf. | |
| 26 | + | |
| 27 | +template <class T> | |
| 28 | +class ObjTable: public std::vector<T> | |
| 29 | +{ | |
| 30 | + public: | |
| 31 | + ObjTable() = default; | |
| 32 | + ObjTable(const ObjTable&) = delete; | |
| 33 | + ObjTable(ObjTable&&) = delete; | |
| 34 | + ObjTable& operator[](const ObjTable&) = delete; | |
| 35 | + ObjTable& operator[](ObjTable&&) = delete; | |
| 36 | + | |
| 37 | + // Remove unchecked access. | |
| 38 | + T& operator[](unsigned long idx) = delete; | |
| 39 | + T const& operator[](unsigned long idx) const = delete; | |
| 40 | + | |
| 41 | + inline T const& | |
| 42 | + operator[](int idx) const | |
| 43 | + { | |
| 44 | + return element(static_cast<size_t>(idx)); | |
| 45 | + } | |
| 46 | + | |
| 47 | + inline T const& | |
| 48 | + operator[](QPDFObjGen og) const | |
| 49 | + { | |
| 50 | + return element(static_cast<size_t>(og.getObj())); | |
| 51 | + } | |
| 52 | + | |
| 53 | + inline T const& | |
| 54 | + operator[](QPDFObjectHandle oh) const | |
| 55 | + { | |
| 56 | + return element(static_cast<size_t>(oh.getObjectID())); | |
| 57 | + } | |
| 58 | + | |
| 59 | + inline bool | |
| 60 | + contains(size_t idx) const | |
| 61 | + { | |
| 62 | + return idx < std::vector<T>::size() || sparse_elements.count(idx); | |
| 63 | + } | |
| 64 | + | |
| 65 | + inline bool | |
| 66 | + contains(QPDFObjectHandle oh) const | |
| 67 | + { | |
| 68 | + return contains(static_cast<size_t>(oh.getObjectID())); | |
| 69 | + } | |
| 70 | + | |
| 71 | + protected: | |
| 72 | + inline T& | |
| 73 | + operator[](int id) | |
| 74 | + { | |
| 75 | + return element(static_cast<size_t>(id)); | |
| 76 | + } | |
| 77 | + | |
| 78 | + inline T& | |
| 79 | + operator[](QPDFObjGen og) | |
| 80 | + { | |
| 81 | + return element(static_cast<size_t>(og.getObj())); | |
| 82 | + } | |
| 83 | + | |
| 84 | + inline T& | |
| 85 | + operator[](QPDFObjectHandle oh) | |
| 86 | + { | |
| 87 | + return element(static_cast<size_t>(oh.getObjectID())); | |
| 88 | + } | |
| 89 | + | |
| 90 | + inline T& | |
| 91 | + operator[](unsigned int id) | |
| 92 | + { | |
| 93 | + return element(id); | |
| 94 | + } | |
| 95 | + | |
| 96 | + void | |
| 97 | + initialize(size_t idx) | |
| 98 | + { | |
| 99 | + if (std::vector<T>::size() > 0 || sparse_elements.size() > 0) { | |
| 100 | + throw ::std::logic_error("ObjTable accessed before initialization"); | |
| 101 | + } else if ( | |
| 102 | + idx >= static_cast<size_t>(std::numeric_limits<int>::max()) || | |
| 103 | + idx >= std::vector<T>::max_size()) { | |
| 104 | + throw std::runtime_error("Invalid maximum object id initializing ObjTable."); | |
| 105 | + } else { | |
| 106 | + std::vector<T>::resize(++idx); | |
| 107 | + } | |
| 108 | + } | |
| 109 | + | |
| 110 | + inline void | |
| 111 | + forEach(std::function<void(int, const T&)> fn) | |
| 112 | + { | |
| 113 | + int i = 0; | |
| 114 | + for (auto const& item: *this) { | |
| 115 | + fn(i++, item); | |
| 116 | + } | |
| 117 | + for (auto const& [id, item]: sparse_elements) { | |
| 118 | + fn(QIntC::to_int(id), item); | |
| 119 | + } | |
| 120 | + } | |
| 121 | + | |
| 122 | + private: | |
| 123 | + std::map<size_t, T> sparse_elements; | |
| 124 | + | |
| 125 | + inline T& | |
| 126 | + element(size_t idx) | |
| 127 | + { | |
| 128 | + if (idx < std::vector<T>::size()) { | |
| 129 | + return std::vector<T>::operator[](idx); | |
| 130 | + } else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) { | |
| 131 | + return sparse_elements[idx]; | |
| 132 | + } | |
| 133 | + throw std::runtime_error("Invalid object id accessing ObjTable."); | |
| 134 | + return element(0); // doesn't return | |
| 135 | + } | |
| 136 | + | |
| 137 | + inline T const& | |
| 138 | + element(size_t idx) const | |
| 139 | + { | |
| 140 | + if (idx < std::vector<T>::size()) { | |
| 141 | + return std::vector<T>::operator[](idx); | |
| 142 | + } else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) { | |
| 143 | + return sparse_elements.at(idx); | |
| 144 | + } | |
| 145 | + throw std::runtime_error("Invalid object id accessing ObjTable."); | |
| 146 | + return element(0); // doesn't return | |
| 147 | + } | |
| 148 | +}; | |
| 149 | + | |
| 150 | +#endif // OBJTABLE_HH | ... | ... |
libqpdf/qpdf/QPDFWriter_private.hh
0 โ 100644
| 1 | +#ifndef QPDFWRITER_PRIVATE_HH | |
| 2 | +#define QPDFWRITER_PRIVATE_HH | |
| 3 | + | |
| 4 | +#include <qpdf/QPDFWriter.hh> | |
| 5 | + | |
| 6 | +#include <qpdf/ObjTable.hh> | |
| 7 | + | |
| 8 | +// This file is intended for inclusion by QPDFWriter, QPDF, QPDF_optimization and QPDF_linearization | |
| 9 | +// only. | |
| 10 | + | |
| 11 | +struct QPDFWriter::Object | |
| 12 | +{ | |
| 13 | + int renumber{0}; | |
| 14 | + int gen{0}; | |
| 15 | + int object_stream{0}; | |
| 16 | +}; | |
| 17 | + | |
| 18 | +struct QPDFWriter::NewObject | |
| 19 | +{ | |
| 20 | + QPDFXRefEntry xref; | |
| 21 | + qpdf_offset_t length{0}; | |
| 22 | +}; | |
| 23 | + | |
| 24 | +class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object> | |
| 25 | +{ | |
| 26 | + friend class QPDFWriter; | |
| 27 | + | |
| 28 | + public: | |
| 29 | + bool | |
| 30 | + getStreamsEmpty() const noexcept | |
| 31 | + { | |
| 32 | + return streams_empty; | |
| 33 | + } | |
| 34 | + | |
| 35 | + private: | |
| 36 | + // For performance, set by QPDFWriter rather than tracked by ObjTable. | |
| 37 | + bool streams_empty{false}; | |
| 38 | +}; | |
| 39 | + | |
| 40 | +class QPDFWriter::NewObjTable: public ::ObjTable<QPDFWriter::NewObject> | |
| 41 | +{ | |
| 42 | + friend class QPDFWriter; | |
| 43 | +}; | |
| 44 | + | |
| 45 | +class QPDFWriter::Members | |
| 46 | +{ | |
| 47 | + friend class QPDFWriter; | |
| 48 | + | |
| 49 | + public: | |
| 50 | + QPDF_DLL | |
| 51 | + ~Members(); | |
| 52 | + | |
| 53 | + private: | |
| 54 | + Members(QPDF& pdf); | |
| 55 | + Members(Members const&) = delete; | |
| 56 | + | |
| 57 | + QPDF& pdf; | |
| 58 | + QPDFObjGen root_og{-1, 0}; | |
| 59 | + char const* filename{"unspecified"}; | |
| 60 | + FILE* file{nullptr}; | |
| 61 | + bool close_file{false}; | |
| 62 | + Pl_Buffer* buffer_pipeline{nullptr}; | |
| 63 | + Buffer* output_buffer{nullptr}; | |
| 64 | + bool normalize_content_set{false}; | |
| 65 | + bool normalize_content{false}; | |
| 66 | + bool compress_streams{true}; | |
| 67 | + bool compress_streams_set{false}; | |
| 68 | + qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none}; | |
| 69 | + bool stream_decode_level_set{false}; | |
| 70 | + bool recompress_flate{false}; | |
| 71 | + bool qdf_mode{false}; | |
| 72 | + bool preserve_unreferenced_objects{false}; | |
| 73 | + bool newline_before_endstream{false}; | |
| 74 | + bool static_id{false}; | |
| 75 | + bool suppress_original_object_ids{false}; | |
| 76 | + bool direct_stream_lengths{true}; | |
| 77 | + bool encrypted{false}; | |
| 78 | + bool preserve_encryption{true}; | |
| 79 | + bool linearized{false}; | |
| 80 | + bool pclm{false}; | |
| 81 | + qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; | |
| 82 | + std::string encryption_key; | |
| 83 | + bool encrypt_metadata{true}; | |
| 84 | + bool encrypt_use_aes{false}; | |
| 85 | + std::map<std::string, std::string> encryption_dictionary; | |
| 86 | + int encryption_V{0}; | |
| 87 | + int encryption_R{0}; | |
| 88 | + | |
| 89 | + std::string id1; // for /ID key of | |
| 90 | + std::string id2; // trailer dictionary | |
| 91 | + std::string final_pdf_version; | |
| 92 | + int final_extension_level{0}; | |
| 93 | + std::string min_pdf_version; | |
| 94 | + int min_extension_level{0}; | |
| 95 | + std::string forced_pdf_version; | |
| 96 | + int forced_extension_level{0}; | |
| 97 | + std::string extra_header_text; | |
| 98 | + int encryption_dict_objid{0}; | |
| 99 | + std::string cur_data_key; | |
| 100 | + std::list<std::shared_ptr<Pipeline>> to_delete; | |
| 101 | + Pl_Count* pipeline{nullptr}; | |
| 102 | + std::vector<QPDFObjectHandle> object_queue; | |
| 103 | + size_t object_queue_front{0}; | |
| 104 | + QPDFWriter::ObjTable obj; | |
| 105 | + QPDFWriter::NewObjTable new_obj; | |
| 106 | + int next_objid{1}; | |
| 107 | + int cur_stream_length_id{0}; | |
| 108 | + size_t cur_stream_length{0}; | |
| 109 | + bool added_newline{false}; | |
| 110 | + size_t max_ostream_index{0}; | |
| 111 | + std::set<QPDFObjGen> normalized_streams; | |
| 112 | + std::map<QPDFObjGen, int> page_object_to_seq; | |
| 113 | + std::map<QPDFObjGen, int> contents_to_page_seq; | |
| 114 | + std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects; | |
| 115 | + std::list<Pipeline*> pipeline_stack; | |
| 116 | + unsigned long long next_stack_id{0}; | |
| 117 | + bool deterministic_id{false}; | |
| 118 | + Pl_MD5* md5_pipeline{nullptr}; | |
| 119 | + std::string deterministic_id_data; | |
| 120 | + bool did_write_setup{false}; | |
| 121 | + | |
| 122 | + // For linearization only | |
| 123 | + std::string lin_pass1_filename; | |
| 124 | + | |
| 125 | + // For progress reporting | |
| 126 | + std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter; | |
| 127 | + int events_expected{0}; | |
| 128 | + int events_seen{0}; | |
| 129 | + int next_progress_report{0}; | |
| 130 | +}; | |
| 131 | + | |
| 132 | +#endif // QPDFWRITER_PRIVATE_HH | ... | ... |
libtests/CMakeLists.txt
libtests/obj_table.cc
0 โ 100644
| 1 | +#include <qpdf/ObjTable.hh> | |
| 2 | + | |
| 3 | +struct Test | |
| 4 | +{ | |
| 5 | + int value{0}; | |
| 6 | +}; | |
| 7 | + | |
| 8 | +class Table: public ObjTable<Test> | |
| 9 | +{ | |
| 10 | + public: | |
| 11 | + Table() | |
| 12 | + { | |
| 13 | + initialize(5); | |
| 14 | + } | |
| 15 | + | |
| 16 | + void | |
| 17 | + test() | |
| 18 | + { | |
| 19 | + for (int i = 0; i < 10; ++i) { | |
| 20 | + (*this)[i].value = 2 * i; | |
| 21 | + (*this)[1000 + i].value = 2 * (1000 + i); | |
| 22 | + } | |
| 23 | + | |
| 24 | + forEach([](auto i, auto const& item) -> void { | |
| 25 | + std::cout << std::to_string(i) << " : " << std::to_string(item.value) << "\n"; | |
| 26 | + }); | |
| 27 | + | |
| 28 | + std::cout << "2000 : " << std::to_string((*this)[2000].value) << "\n"; | |
| 29 | + } | |
| 30 | +}; | |
| 31 | + | |
| 32 | +int | |
| 33 | +main() | |
| 34 | +{ | |
| 35 | + Table().test(); | |
| 36 | + | |
| 37 | + std::cout << "object table tests done\n"; | |
| 38 | + return 0; | |
| 39 | +} | ... | ... |
libtests/qtest/obj_table.test
0 โ 100644
| 1 | +#!/usr/bin/env perl | |
| 2 | +require 5.008; | |
| 3 | +use warnings; | |
| 4 | +use strict; | |
| 5 | + | |
| 6 | +chdir("obj_table") or die "chdir testdir failed: $!\n"; | |
| 7 | + | |
| 8 | +require TestDriver; | |
| 9 | + | |
| 10 | +my $td = new TestDriver('object table'); | |
| 11 | + | |
| 12 | +$td->runtest("obj_table", | |
| 13 | + {$td->COMMAND => "obj_table"}, | |
| 14 | + {$td->FILE => "obj_table.out", | |
| 15 | + $td->EXIT_STATUS => 0}, | |
| 16 | + $td->NORMALIZE_NEWLINES); | |
| 17 | + | |
| 18 | +$td->report(1); | ... | ... |
libtests/qtest/obj_table/obj_table.out
0 โ 100644
| 1 | +0 : 0 | |
| 2 | +1 : 2 | |
| 3 | +2 : 4 | |
| 4 | +3 : 6 | |
| 5 | +4 : 8 | |
| 6 | +5 : 10 | |
| 7 | +6 : 12 | |
| 8 | +7 : 14 | |
| 9 | +8 : 16 | |
| 10 | +9 : 18 | |
| 11 | +1000 : 2000 | |
| 12 | +1001 : 2002 | |
| 13 | +1002 : 2004 | |
| 14 | +1003 : 2006 | |
| 15 | +1004 : 2008 | |
| 16 | +1005 : 2010 | |
| 17 | +1006 : 2012 | |
| 18 | +1007 : 2014 | |
| 19 | +1008 : 2016 | |
| 20 | +1009 : 2018 | |
| 21 | +2000 : 0 | |
| 22 | +object table tests done | ... | ... |