Commit 9c7c7413dd21ab703d6cf5e5e93711293cc1e6a8
1 parent
80631cb0
Enhance pl::Count to support buffered writing to next
Showing
6 changed files
with
167 additions
and
65 deletions
include/qpdf/QPDFWriter.hh
| ... | ... | @@ -43,6 +43,11 @@ |
| 43 | 43 | #include <qpdf/QPDFObjectHandle.hh> |
| 44 | 44 | #include <qpdf/QPDFXRefEntry.hh> |
| 45 | 45 | |
| 46 | +namespace qpdf::pl | |
| 47 | +{ | |
| 48 | + struct Link; | |
| 49 | +} | |
| 50 | + | |
| 46 | 51 | class QPDF; |
| 47 | 52 | class Pl_Count; |
| 48 | 53 | class Pl_MD5; |
| ... | ... | @@ -597,14 +602,17 @@ class QPDFWriter |
| 597 | 602 | // activate the pipeline stack. When the passed in PipelinePopper goes out of scope, the stack |
| 598 | 603 | // is popped. |
| 599 | 604 | Pipeline* pushPipeline(Pipeline*); |
| 600 | - void | |
| 601 | - activatePipelineStack(PipelinePopper& pp, bool discard = false, std::string* str = nullptr); | |
| 605 | + void activatePipelineStack(PipelinePopper& pp, std::string& str); | |
| 606 | + void activatePipelineStack(PipelinePopper& pp, std::unique_ptr<qpdf::pl::Link> link); | |
| 607 | + void activatePipelineStack( | |
| 608 | + PipelinePopper& pp, | |
| 609 | + bool discard = false, | |
| 610 | + std::string* str = nullptr, | |
| 611 | + std::unique_ptr<qpdf::pl::Link> link = nullptr); | |
| 602 | 612 | void initializePipelineStack(Pipeline*); |
| 603 | 613 | |
| 604 | 614 | void adjustAESStreamLength(size_t& length); |
| 605 | 615 | void pushEncryptionFilter(PipelinePopper&); |
| 606 | - void pushDiscardFilter(PipelinePopper&); | |
| 607 | - void pushStringPipeline(PipelinePopper&, std::string& str); | |
| 608 | 616 | void pushMD5Pipeline(PipelinePopper&); |
| 609 | 617 | void computeDeterministicIDData(); |
| 610 | 618 | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -917,11 +917,33 @@ QPDFWriter::initializePipelineStack(Pipeline* p) |
| 917 | 917 | } |
| 918 | 918 | |
| 919 | 919 | void |
| 920 | -QPDFWriter::activatePipelineStack(PipelinePopper& pp, bool discard, std::string* str) | |
| 920 | +QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::string& str) | |
| 921 | +{ | |
| 922 | + activatePipelineStack(pp, false, &str, nullptr); | |
| 923 | +} | |
| 924 | + | |
| 925 | +void | |
| 926 | +QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::unique_ptr<pl::Link> link) | |
| 927 | +{ | |
| 928 | + m->count_buffer.clear(); | |
| 929 | + activatePipelineStack(pp, false, &m->count_buffer, std::move(link)); | |
| 930 | +} | |
| 931 | + | |
| 932 | +void | |
| 933 | +QPDFWriter::activatePipelineStack( | |
| 934 | + PipelinePopper& pp, bool discard, std::string* str, std::unique_ptr<pl::Link> link) | |
| 921 | 935 | { |
| 922 | 936 | std::string stack_id("stack " + std::to_string(m->next_stack_id)); |
| 923 | - auto* c = str ? new pl::Count(stack_id.c_str(), str) | |
| 924 | - : new pl::Count(stack_id.c_str(), discard ? nullptr : m->pipeline_stack.back()); | |
| 937 | + pl::Count* c; | |
| 938 | + if (link) { | |
| 939 | + c = new pl::Count(stack_id.c_str(), m->count_buffer, std::move(link)); | |
| 940 | + } else if (discard) { | |
| 941 | + c = new pl::Count(stack_id.c_str(), nullptr); | |
| 942 | + } else if (!str) { | |
| 943 | + c = new pl::Count(stack_id.c_str(), m->pipeline_stack.back()); | |
| 944 | + } else { | |
| 945 | + c = new pl::Count(stack_id.c_str(), *str); | |
| 946 | + } | |
| 925 | 947 | ++m->next_stack_id; |
| 926 | 948 | m->pipeline_stack.emplace_back(c); |
| 927 | 949 | m->pipeline = c; |
| ... | ... | @@ -991,18 +1013,6 @@ QPDFWriter::pushEncryptionFilter(PipelinePopper& pp) |
| 991 | 1013 | } |
| 992 | 1014 | |
| 993 | 1015 | void |
| 994 | -QPDFWriter::pushDiscardFilter(PipelinePopper& pp) | |
| 995 | -{ | |
| 996 | - activatePipelineStack(pp, true); | |
| 997 | -} | |
| 998 | - | |
| 999 | -void | |
| 1000 | -QPDFWriter::pushStringPipeline(PipelinePopper& pp, std::string& str) | |
| 1001 | -{ | |
| 1002 | - activatePipelineStack(pp, true, &str); | |
| 1003 | -} | |
| 1004 | - | |
| 1005 | -void | |
| 1006 | 1016 | QPDFWriter::pushMD5Pipeline(PipelinePopper& pp) |
| 1007 | 1017 | { |
| 1008 | 1018 | if (!m->id2.empty()) { |
| ... | ... | @@ -1287,9 +1297,9 @@ QPDFWriter::willFilterStream( |
| 1287 | 1297 | for (bool first_attempt: {true, false}) { |
| 1288 | 1298 | PipelinePopper pp_stream_data(this); |
| 1289 | 1299 | if (stream_data != nullptr) { |
| 1290 | - pushStringPipeline(pp_stream_data, *stream_data); | |
| 1300 | + activatePipelineStack(pp_stream_data, *stream_data); | |
| 1291 | 1301 | } else { |
| 1292 | - pushDiscardFilter(pp_stream_data); | |
| 1302 | + activatePipelineStack(pp_stream_data, true); | |
| 1293 | 1303 | } |
| 1294 | 1304 | try { |
| 1295 | 1305 | filtered = stream.pipeStreamData( |
| ... | ... | @@ -1646,11 +1656,11 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1646 | 1656 | std::string stream_buffer_pass1; |
| 1647 | 1657 | std::string stream_buffer_pass2; |
| 1648 | 1658 | int first_obj = -1; |
| 1649 | - bool compressed = false; | |
| 1659 | + const bool compressed = m->compress_streams && !m->qdf_mode; | |
| 1650 | 1660 | { |
| 1651 | 1661 | // Pass 1 |
| 1652 | 1662 | PipelinePopper pp_ostream_pass1(this); |
| 1653 | - pushStringPipeline(pp_ostream_pass1, stream_buffer_pass1); | |
| 1663 | + activatePipelineStack(pp_ostream_pass1, stream_buffer_pass1); | |
| 1654 | 1664 | |
| 1655 | 1665 | int count = -1; |
| 1656 | 1666 | for (auto const& obj: m->object_stream_to_objects[old_id]) { |
| ... | ... | @@ -1704,18 +1714,20 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) |
| 1704 | 1714 | // Take one pass at writing pairs of numbers so we can get their size information |
| 1705 | 1715 | { |
| 1706 | 1716 | PipelinePopper pp_discard(this); |
| 1707 | - pushDiscardFilter(pp_discard); | |
| 1717 | + activatePipelineStack(pp_discard, true); | |
| 1708 | 1718 | writeObjectStreamOffsets(offsets, first_obj); |
| 1709 | 1719 | first += m->pipeline->getCount(); |
| 1710 | 1720 | } |
| 1711 | 1721 | |
| 1712 | 1722 | // Set up a stream to write the stream data into a buffer. |
| 1713 | - Pipeline* next = pushPipeline(new Pl_String("object stream", nullptr, stream_buffer_pass2)); | |
| 1714 | - if (m->compress_streams && !m->qdf_mode) { | |
| 1715 | - compressed = true; | |
| 1716 | - next = pushPipeline(new Pl_Flate("compress object stream", next, Pl_Flate::a_deflate)); | |
| 1723 | + if (compressed) { | |
| 1724 | + activatePipelineStack( | |
| 1725 | + pp_ostream, | |
| 1726 | + pl::create<Pl_Flate>( | |
| 1727 | + pl::create<pl::String>(stream_buffer_pass2), Pl_Flate::a_deflate)); | |
| 1728 | + } else { | |
| 1729 | + activatePipelineStack(pp_ostream, stream_buffer_pass2); | |
| 1717 | 1730 | } |
| 1718 | - activatePipelineStack(pp_ostream); | |
| 1719 | 1731 | writeObjectStreamOffsets(offsets, first_obj); |
| 1720 | 1732 | writeString(stream_buffer_pass1); |
| 1721 | 1733 | stream_buffer_pass1.clear(); |
| ... | ... | @@ -2464,20 +2476,23 @@ QPDFWriter::writeXRefStream( |
| 2464 | 2476 | m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount()); |
| 2465 | 2477 | |
| 2466 | 2478 | std::string xref_data; |
| 2467 | - Pipeline* p = pushPipeline(new Pl_String("xref stream", nullptr, xref_data)); | |
| 2468 | - bool compressed = false; | |
| 2469 | - if (m->compress_streams && !m->qdf_mode) { | |
| 2470 | - compressed = true; | |
| 2471 | - if (!skip_compression) { | |
| 2472 | - // Write the stream dictionary for compression but don't actually compress. This helps | |
| 2473 | - // us with computation of padding for pass 1 of linearization. | |
| 2474 | - p = pushPipeline(new Pl_Flate("compress xref", p, Pl_Flate::a_deflate)); | |
| 2475 | - } | |
| 2476 | - p = pushPipeline(new Pl_PNGFilter("pngify xref", p, Pl_PNGFilter::a_encode, esize)); | |
| 2477 | - } | |
| 2479 | + const bool compressed = m->compress_streams && !m->qdf_mode; | |
| 2478 | 2480 | { |
| 2479 | 2481 | PipelinePopper pp_xref(this); |
| 2480 | - activatePipelineStack(pp_xref); | |
| 2482 | + if (compressed) { | |
| 2483 | + m->count_buffer.clear(); | |
| 2484 | + auto link = pl::create<pl::String>(xref_data); | |
| 2485 | + if (!skip_compression) { | |
| 2486 | + // Write the stream dictionary for compression but don't actually compress. This | |
| 2487 | + // helps us with computation of padding for pass 1 of linearization. | |
| 2488 | + link = pl::create<Pl_Flate>(std::move(link), Pl_Flate::a_deflate); | |
| 2489 | + } | |
| 2490 | + activatePipelineStack( | |
| 2491 | + pp_xref, pl::create<Pl_PNGFilter>(std::move(link), Pl_PNGFilter::a_encode, esize)); | |
| 2492 | + } else { | |
| 2493 | + activatePipelineStack(pp_xref, xref_data); | |
| 2494 | + } | |
| 2495 | + | |
| 2481 | 2496 | for (int i = first; i <= last; ++i) { |
| 2482 | 2497 | QPDFXRefEntry& e = m->new_obj[i].xref; |
| 2483 | 2498 | switch (e.getType()) { |
| ... | ... | @@ -2688,7 +2703,7 @@ QPDFWriter::writeLinearized() |
| 2688 | 2703 | pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file)); |
| 2689 | 2704 | activatePipelineStack(*pp_pass1); |
| 2690 | 2705 | } else { |
| 2691 | - pushDiscardFilter(*pp_pass1); | |
| 2706 | + activatePipelineStack(*pp_pass1, true); | |
| 2692 | 2707 | } |
| 2693 | 2708 | if (m->deterministic_id) { |
| 2694 | 2709 | pushMD5Pipeline(*pp_md5); |
| ... | ... | @@ -2893,7 +2908,7 @@ QPDFWriter::writeLinearized() |
| 2893 | 2908 | // Write hint stream to a buffer |
| 2894 | 2909 | { |
| 2895 | 2910 | PipelinePopper pp_hint(this); |
| 2896 | - pushStringPipeline(pp_hint, hint_buffer); | |
| 2911 | + activatePipelineStack(pp_hint, hint_buffer); | |
| 2897 | 2912 | writeHintStream(hint_id); |
| 2898 | 2913 | } |
| 2899 | 2914 | hint_length = QIntC::to_offset(hint_buffer.size()); | ... | ... |
libqpdf/QPDF_linearization.cc
| ... | ... | @@ -1755,24 +1755,23 @@ QPDF::generateHintStream( |
| 1755 | 1755 | |
| 1756 | 1756 | // Write the hint stream itself into a compressed memory buffer. Write through a counter so we |
| 1757 | 1757 | // can get offsets. |
| 1758 | - Pl_String hint_stream("hint stream", nullptr, hint_buffer); | |
| 1759 | - Pipeline* next = &hint_stream; | |
| 1760 | - std::unique_ptr<Pipeline> flate; | |
| 1761 | - if (compressed) { | |
| 1762 | - flate = | |
| 1763 | - std::make_unique<Pl_Flate>("compress hint stream", &hint_stream, Pl_Flate::a_deflate); | |
| 1764 | - next = flate.get(); | |
| 1765 | - } | |
| 1766 | - pl::Count c("count", next); | |
| 1767 | - BitWriter w(&c); | |
| 1758 | + std::string b; | |
| 1759 | + auto c = compressed | |
| 1760 | + ? std::make_unique<pl::Count>( | |
| 1761 | + "count", | |
| 1762 | + b, | |
| 1763 | + pl::create<Pl_Flate>(pl::create<pl::String>(hint_buffer), Pl_Flate::a_deflate)) | |
| 1764 | + : std::make_unique<pl::Count>("count", hint_buffer); | |
| 1765 | + | |
| 1766 | + BitWriter w(c.get()); | |
| 1768 | 1767 | |
| 1769 | 1768 | writeHPageOffset(w); |
| 1770 | - S = toI(c.getCount()); | |
| 1769 | + S = toI(c->getCount()); | |
| 1771 | 1770 | writeHSharedObject(w); |
| 1772 | 1771 | O = 0; |
| 1773 | 1772 | if (m->outline_hints.nobjects > 0) { |
| 1774 | - O = toI(c.getCount()); | |
| 1773 | + O = toI(c->getCount()); | |
| 1775 | 1774 | writeHGeneric(w, m->outline_hints); |
| 1776 | 1775 | } |
| 1777 | - c.finish(); | |
| 1776 | + c->finish(); | |
| 1778 | 1777 | } | ... | ... |
libqpdf/qpdf/Pipeline_private.hh
| ... | ... | @@ -3,22 +3,96 @@ |
| 3 | 3 | |
| 4 | 4 | #include <qpdf/Pipeline.hh> |
| 5 | 5 | |
| 6 | +#include <qpdf/Pl_Flate.hh> | |
| 7 | + | |
| 6 | 8 | namespace qpdf::pl |
| 7 | 9 | { |
| 8 | - class Count final: public Pipeline | |
| 10 | + struct Link | |
| 9 | 11 | { |
| 10 | - public: | |
| 11 | - Count(char const* identifier, Pipeline* next) : | |
| 12 | - Pipeline(identifier, next) | |
| 12 | + Link(std::unique_ptr<Link> next_link, std::unique_ptr<Pipeline> next_pl) : | |
| 13 | + next_link(std::move(next_link)), | |
| 14 | + next_pl(std::move(next_pl)) | |
| 13 | 15 | { |
| 14 | 16 | } |
| 15 | 17 | |
| 16 | - Count(char const* identifier, std::string* str) : | |
| 18 | + std::unique_ptr<Link> next_link{nullptr}; | |
| 19 | + std::unique_ptr<Pipeline> next_pl{nullptr}; | |
| 20 | + }; | |
| 21 | + | |
| 22 | + template <typename P, typename... Args> | |
| 23 | + std::unique_ptr<Link> | |
| 24 | + create(Args&&... args) | |
| 25 | + { | |
| 26 | + return std::make_unique<Link>( | |
| 27 | + nullptr, std::make_unique<P>("", nullptr, std::forward<Args>(args)...)); | |
| 28 | + } | |
| 29 | + | |
| 30 | + template <typename P, typename... Args> | |
| 31 | + std::unique_ptr<Link> | |
| 32 | + create(std::unique_ptr<Link> link, Args&&... args) | |
| 33 | + { | |
| 34 | + auto* next = link->next_pl.get(); | |
| 35 | + return std::make_unique<Link>( | |
| 36 | + std::move(link), std::make_unique<P>("", next, std::forward<Args>(args)...)); | |
| 37 | + } | |
| 38 | + | |
| 39 | + class String final: public Pipeline | |
| 40 | + { | |
| 41 | + public: | |
| 42 | + String(char const* identifier, Pipeline*, std::string& str) : | |
| 17 | 43 | Pipeline(identifier, nullptr), |
| 18 | 44 | str(str) |
| 19 | 45 | { |
| 20 | 46 | } |
| 21 | 47 | |
| 48 | + ~String() final = default; | |
| 49 | + | |
| 50 | + void | |
| 51 | + write(unsigned char const* buf, size_t len) final | |
| 52 | + { | |
| 53 | + if (len) { | |
| 54 | + str.append(reinterpret_cast<char const*>(buf), len); | |
| 55 | + } | |
| 56 | + } | |
| 57 | + | |
| 58 | + void | |
| 59 | + finish() final | |
| 60 | + { | |
| 61 | + } | |
| 62 | + | |
| 63 | + private: | |
| 64 | + std::string& str; | |
| 65 | + }; | |
| 66 | + | |
| 67 | + class Count final: public Pipeline | |
| 68 | + { | |
| 69 | + public: | |
| 70 | + // Count the number of characters written. If 'next' is not set, the content written will be | |
| 71 | + // discarded. | |
| 72 | + Count(char const* identifier, Pipeline* next = nullptr) : | |
| 73 | + Pipeline(identifier, next), | |
| 74 | + pass_immediately_to_next(next) | |
| 75 | + { | |
| 76 | + } | |
| 77 | + | |
| 78 | + // Count the number of characters written. If 'next' is not set, the content written will be | |
| 79 | + // discarded. | |
| 80 | + Count(char const* identifier, std::unique_ptr<Link> link = nullptr) : | |
| 81 | + Pipeline(identifier, link ? link->next_pl.get() : nullptr), | |
| 82 | + link(std::move(link)), | |
| 83 | + pass_immediately_to_next(link) | |
| 84 | + { | |
| 85 | + } | |
| 86 | + | |
| 87 | + // Write to 'str'. If 'next' is set, 'str' will be written to 'next' when 'finish' is | |
| 88 | + // called. | |
| 89 | + Count(char const* identifier, std::string& str, std::unique_ptr<Link> link = nullptr) : | |
| 90 | + Pipeline(identifier, link ? link->next_pl.get() : nullptr), | |
| 91 | + str(&str), | |
| 92 | + link(std::move(link)) | |
| 93 | + { | |
| 94 | + } | |
| 95 | + | |
| 22 | 96 | ~Count() final = default; |
| 23 | 97 | |
| 24 | 98 | void |
| ... | ... | @@ -30,7 +104,7 @@ namespace qpdf::pl |
| 30 | 104 | return; |
| 31 | 105 | } |
| 32 | 106 | count += static_cast<qpdf_offset_t>(len); |
| 33 | - if (next()) { | |
| 107 | + if (pass_immediately_to_next) { | |
| 34 | 108 | next()->write(buf, len); |
| 35 | 109 | } |
| 36 | 110 | } |
| ... | ... | @@ -40,6 +114,9 @@ namespace qpdf::pl |
| 40 | 114 | finish() final |
| 41 | 115 | { |
| 42 | 116 | if (next()) { |
| 117 | + if (!pass_immediately_to_next) { | |
| 118 | + next()->write(reinterpret_cast<unsigned char const*>(str->data()), str->size()); | |
| 119 | + } | |
| 43 | 120 | next()->finish(); |
| 44 | 121 | } |
| 45 | 122 | } |
| ... | ... | @@ -53,6 +130,8 @@ namespace qpdf::pl |
| 53 | 130 | private: |
| 54 | 131 | qpdf_offset_t count{0}; |
| 55 | 132 | std::string* str{nullptr}; |
| 133 | + std::unique_ptr<Link> link{nullptr}; | |
| 134 | + bool pass_immediately_to_next{false}; | |
| 56 | 135 | }; |
| 57 | 136 | } // namespace qpdf::pl |
| 58 | 137 | ... | ... |
libqpdf/qpdf/QPDFWriter_private.hh
| ... | ... | @@ -115,6 +115,7 @@ class QPDFWriter::Members |
| 115 | 115 | std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects; |
| 116 | 116 | std::vector<Pipeline*> pipeline_stack; |
| 117 | 117 | unsigned long long next_stack_id{0}; |
| 118 | + std::string count_buffer; | |
| 118 | 119 | bool deterministic_id{false}; |
| 119 | 120 | Pl_MD5* md5_pipeline{nullptr}; |
| 120 | 121 | std::string deterministic_id_data; | ... | ... |
manual/release-notes.rst
| ... | ... | @@ -21,7 +21,7 @@ more detail. |
| 21 | 21 | integer object. Previously the method returned false if the first |
| 22 | 22 | dictionary object was not a linearization parameter dictionary. |
| 23 | 23 | |
| 24 | - = Fix parsing of object streams containing objects not seperated by | |
| 24 | + - Fix parsing of object streams containing objects not seperated by | |
| 25 | 25 | white-space. Pre-2020 editions of the PDF specification incorrectly |
| 26 | 26 | stated that white-space was required between objects. qpdf relied on this |
| 27 | 27 | when parsing object streams. |
| ... | ... | @@ -40,8 +40,8 @@ more detail. |
| 40 | 40 | messages and object descriptions has been refactored with some |
| 41 | 41 | improvement both in runtime and memory usage. |
| 42 | 42 | |
| 43 | - - There has been some refactoring of how object streams are written with | |
| 44 | - some performance improvement. | |
| 43 | + - There has been some refactoring of QPDFWriter including how object | |
| 44 | + streams are written with some performance improvement. | |
| 45 | 45 | |
| 46 | 46 | .. cSpell:ignore substract |
| 47 | 47 | ... | ... |