Commit 9c7c7413dd21ab703d6cf5e5e93711293cc1e6a8

Authored by m-holger
1 parent 80631cb0

Enhance pl::Count to support buffered writing to next

include/qpdf/QPDFWriter.hh
... ... @@ -43,6 +43,11 @@
43 43 #include <qpdf/QPDFObjectHandle.hh>
44 44 #include <qpdf/QPDFXRefEntry.hh>
45 45  
  46 +namespace qpdf::pl
  47 +{
  48 + struct Link;
  49 +}
  50 +
46 51 class QPDF;
47 52 class Pl_Count;
48 53 class Pl_MD5;
... ... @@ -597,14 +602,17 @@ class QPDFWriter
597 602 // activate the pipeline stack. When the passed in PipelinePopper goes out of scope, the stack
598 603 // is popped.
599 604 Pipeline* pushPipeline(Pipeline*);
600   - void
601   - activatePipelineStack(PipelinePopper& pp, bool discard = false, std::string* str = nullptr);
  605 + void activatePipelineStack(PipelinePopper& pp, std::string& str);
  606 + void activatePipelineStack(PipelinePopper& pp, std::unique_ptr<qpdf::pl::Link> link);
  607 + void activatePipelineStack(
  608 + PipelinePopper& pp,
  609 + bool discard = false,
  610 + std::string* str = nullptr,
  611 + std::unique_ptr<qpdf::pl::Link> link = nullptr);
602 612 void initializePipelineStack(Pipeline*);
603 613  
604 614 void adjustAESStreamLength(size_t& length);
605 615 void pushEncryptionFilter(PipelinePopper&);
606   - void pushDiscardFilter(PipelinePopper&);
607   - void pushStringPipeline(PipelinePopper&, std::string& str);
608 616 void pushMD5Pipeline(PipelinePopper&);
609 617 void computeDeterministicIDData();
610 618  
... ...
libqpdf/QPDFWriter.cc
... ... @@ -917,11 +917,33 @@ QPDFWriter::initializePipelineStack(Pipeline* p)
917 917 }
918 918  
919 919 void
920   -QPDFWriter::activatePipelineStack(PipelinePopper& pp, bool discard, std::string* str)
  920 +QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::string& str)
  921 +{
  922 + activatePipelineStack(pp, false, &str, nullptr);
  923 +}
  924 +
  925 +void
  926 +QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::unique_ptr<pl::Link> link)
  927 +{
  928 + m->count_buffer.clear();
  929 + activatePipelineStack(pp, false, &m->count_buffer, std::move(link));
  930 +}
  931 +
  932 +void
  933 +QPDFWriter::activatePipelineStack(
  934 + PipelinePopper& pp, bool discard, std::string* str, std::unique_ptr<pl::Link> link)
921 935 {
922 936 std::string stack_id("stack " + std::to_string(m->next_stack_id));
923   - auto* c = str ? new pl::Count(stack_id.c_str(), str)
924   - : new pl::Count(stack_id.c_str(), discard ? nullptr : m->pipeline_stack.back());
  937 + pl::Count* c;
  938 + if (link) {
  939 + c = new pl::Count(stack_id.c_str(), m->count_buffer, std::move(link));
  940 + } else if (discard) {
  941 + c = new pl::Count(stack_id.c_str(), nullptr);
  942 + } else if (!str) {
  943 + c = new pl::Count(stack_id.c_str(), m->pipeline_stack.back());
  944 + } else {
  945 + c = new pl::Count(stack_id.c_str(), *str);
  946 + }
925 947 ++m->next_stack_id;
926 948 m->pipeline_stack.emplace_back(c);
927 949 m->pipeline = c;
... ... @@ -991,18 +1013,6 @@ QPDFWriter::pushEncryptionFilter(PipelinePopper&amp; pp)
991 1013 }
992 1014  
993 1015 void
994   -QPDFWriter::pushDiscardFilter(PipelinePopper& pp)
995   -{
996   - activatePipelineStack(pp, true);
997   -}
998   -
999   -void
1000   -QPDFWriter::pushStringPipeline(PipelinePopper& pp, std::string& str)
1001   -{
1002   - activatePipelineStack(pp, true, &str);
1003   -}
1004   -
1005   -void
1006 1016 QPDFWriter::pushMD5Pipeline(PipelinePopper& pp)
1007 1017 {
1008 1018 if (!m->id2.empty()) {
... ... @@ -1287,9 +1297,9 @@ QPDFWriter::willFilterStream(
1287 1297 for (bool first_attempt: {true, false}) {
1288 1298 PipelinePopper pp_stream_data(this);
1289 1299 if (stream_data != nullptr) {
1290   - pushStringPipeline(pp_stream_data, *stream_data);
  1300 + activatePipelineStack(pp_stream_data, *stream_data);
1291 1301 } else {
1292   - pushDiscardFilter(pp_stream_data);
  1302 + activatePipelineStack(pp_stream_data, true);
1293 1303 }
1294 1304 try {
1295 1305 filtered = stream.pipeStreamData(
... ... @@ -1646,11 +1656,11 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1646 1656 std::string stream_buffer_pass1;
1647 1657 std::string stream_buffer_pass2;
1648 1658 int first_obj = -1;
1649   - bool compressed = false;
  1659 + const bool compressed = m->compress_streams && !m->qdf_mode;
1650 1660 {
1651 1661 // Pass 1
1652 1662 PipelinePopper pp_ostream_pass1(this);
1653   - pushStringPipeline(pp_ostream_pass1, stream_buffer_pass1);
  1663 + activatePipelineStack(pp_ostream_pass1, stream_buffer_pass1);
1654 1664  
1655 1665 int count = -1;
1656 1666 for (auto const& obj: m->object_stream_to_objects[old_id]) {
... ... @@ -1704,18 +1714,20 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1704 1714 // Take one pass at writing pairs of numbers so we can get their size information
1705 1715 {
1706 1716 PipelinePopper pp_discard(this);
1707   - pushDiscardFilter(pp_discard);
  1717 + activatePipelineStack(pp_discard, true);
1708 1718 writeObjectStreamOffsets(offsets, first_obj);
1709 1719 first += m->pipeline->getCount();
1710 1720 }
1711 1721  
1712 1722 // Set up a stream to write the stream data into a buffer.
1713   - Pipeline* next = pushPipeline(new Pl_String("object stream", nullptr, stream_buffer_pass2));
1714   - if (m->compress_streams && !m->qdf_mode) {
1715   - compressed = true;
1716   - next = pushPipeline(new Pl_Flate("compress object stream", next, Pl_Flate::a_deflate));
  1723 + if (compressed) {
  1724 + activatePipelineStack(
  1725 + pp_ostream,
  1726 + pl::create<Pl_Flate>(
  1727 + pl::create<pl::String>(stream_buffer_pass2), Pl_Flate::a_deflate));
  1728 + } else {
  1729 + activatePipelineStack(pp_ostream, stream_buffer_pass2);
1717 1730 }
1718   - activatePipelineStack(pp_ostream);
1719 1731 writeObjectStreamOffsets(offsets, first_obj);
1720 1732 writeString(stream_buffer_pass1);
1721 1733 stream_buffer_pass1.clear();
... ... @@ -2464,20 +2476,23 @@ QPDFWriter::writeXRefStream(
2464 2476 m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2465 2477  
2466 2478 std::string xref_data;
2467   - Pipeline* p = pushPipeline(new Pl_String("xref stream", nullptr, xref_data));
2468   - bool compressed = false;
2469   - if (m->compress_streams && !m->qdf_mode) {
2470   - compressed = true;
2471   - if (!skip_compression) {
2472   - // Write the stream dictionary for compression but don't actually compress. This helps
2473   - // us with computation of padding for pass 1 of linearization.
2474   - p = pushPipeline(new Pl_Flate("compress xref", p, Pl_Flate::a_deflate));
2475   - }
2476   - p = pushPipeline(new Pl_PNGFilter("pngify xref", p, Pl_PNGFilter::a_encode, esize));
2477   - }
  2479 + const bool compressed = m->compress_streams && !m->qdf_mode;
2478 2480 {
2479 2481 PipelinePopper pp_xref(this);
2480   - activatePipelineStack(pp_xref);
  2482 + if (compressed) {
  2483 + m->count_buffer.clear();
  2484 + auto link = pl::create<pl::String>(xref_data);
  2485 + if (!skip_compression) {
  2486 + // Write the stream dictionary for compression but don't actually compress. This
  2487 + // helps us with computation of padding for pass 1 of linearization.
  2488 + link = pl::create<Pl_Flate>(std::move(link), Pl_Flate::a_deflate);
  2489 + }
  2490 + activatePipelineStack(
  2491 + pp_xref, pl::create<Pl_PNGFilter>(std::move(link), Pl_PNGFilter::a_encode, esize));
  2492 + } else {
  2493 + activatePipelineStack(pp_xref, xref_data);
  2494 + }
  2495 +
2481 2496 for (int i = first; i <= last; ++i) {
2482 2497 QPDFXRefEntry& e = m->new_obj[i].xref;
2483 2498 switch (e.getType()) {
... ... @@ -2688,7 +2703,7 @@ QPDFWriter::writeLinearized()
2688 2703 pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file));
2689 2704 activatePipelineStack(*pp_pass1);
2690 2705 } else {
2691   - pushDiscardFilter(*pp_pass1);
  2706 + activatePipelineStack(*pp_pass1, true);
2692 2707 }
2693 2708 if (m->deterministic_id) {
2694 2709 pushMD5Pipeline(*pp_md5);
... ... @@ -2893,7 +2908,7 @@ QPDFWriter::writeLinearized()
2893 2908 // Write hint stream to a buffer
2894 2909 {
2895 2910 PipelinePopper pp_hint(this);
2896   - pushStringPipeline(pp_hint, hint_buffer);
  2911 + activatePipelineStack(pp_hint, hint_buffer);
2897 2912 writeHintStream(hint_id);
2898 2913 }
2899 2914 hint_length = QIntC::to_offset(hint_buffer.size());
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -1755,24 +1755,23 @@ QPDF::generateHintStream(
1755 1755  
1756 1756 // Write the hint stream itself into a compressed memory buffer. Write through a counter so we
1757 1757 // can get offsets.
1758   - Pl_String hint_stream("hint stream", nullptr, hint_buffer);
1759   - Pipeline* next = &hint_stream;
1760   - std::unique_ptr<Pipeline> flate;
1761   - if (compressed) {
1762   - flate =
1763   - std::make_unique<Pl_Flate>("compress hint stream", &hint_stream, Pl_Flate::a_deflate);
1764   - next = flate.get();
1765   - }
1766   - pl::Count c("count", next);
1767   - BitWriter w(&c);
  1758 + std::string b;
  1759 + auto c = compressed
  1760 + ? std::make_unique<pl::Count>(
  1761 + "count",
  1762 + b,
  1763 + pl::create<Pl_Flate>(pl::create<pl::String>(hint_buffer), Pl_Flate::a_deflate))
  1764 + : std::make_unique<pl::Count>("count", hint_buffer);
  1765 +
  1766 + BitWriter w(c.get());
1768 1767  
1769 1768 writeHPageOffset(w);
1770   - S = toI(c.getCount());
  1769 + S = toI(c->getCount());
1771 1770 writeHSharedObject(w);
1772 1771 O = 0;
1773 1772 if (m->outline_hints.nobjects > 0) {
1774   - O = toI(c.getCount());
  1773 + O = toI(c->getCount());
1775 1774 writeHGeneric(w, m->outline_hints);
1776 1775 }
1777   - c.finish();
  1776 + c->finish();
1778 1777 }
... ...
libqpdf/qpdf/Pipeline_private.hh
... ... @@ -3,22 +3,96 @@
3 3  
4 4 #include <qpdf/Pipeline.hh>
5 5  
  6 +#include <qpdf/Pl_Flate.hh>
  7 +
6 8 namespace qpdf::pl
7 9 {
8   - class Count final: public Pipeline
  10 + struct Link
9 11 {
10   - public:
11   - Count(char const* identifier, Pipeline* next) :
12   - Pipeline(identifier, next)
  12 + Link(std::unique_ptr<Link> next_link, std::unique_ptr<Pipeline> next_pl) :
  13 + next_link(std::move(next_link)),
  14 + next_pl(std::move(next_pl))
13 15 {
14 16 }
15 17  
16   - Count(char const* identifier, std::string* str) :
  18 + std::unique_ptr<Link> next_link{nullptr};
  19 + std::unique_ptr<Pipeline> next_pl{nullptr};
  20 + };
  21 +
  22 + template <typename P, typename... Args>
  23 + std::unique_ptr<Link>
  24 + create(Args&&... args)
  25 + {
  26 + return std::make_unique<Link>(
  27 + nullptr, std::make_unique<P>("", nullptr, std::forward<Args>(args)...));
  28 + }
  29 +
  30 + template <typename P, typename... Args>
  31 + std::unique_ptr<Link>
  32 + create(std::unique_ptr<Link> link, Args&&... args)
  33 + {
  34 + auto* next = link->next_pl.get();
  35 + return std::make_unique<Link>(
  36 + std::move(link), std::make_unique<P>("", next, std::forward<Args>(args)...));
  37 + }
  38 +
  39 + class String final: public Pipeline
  40 + {
  41 + public:
  42 + String(char const* identifier, Pipeline*, std::string& str) :
17 43 Pipeline(identifier, nullptr),
18 44 str(str)
19 45 {
20 46 }
21 47  
  48 + ~String() final = default;
  49 +
  50 + void
  51 + write(unsigned char const* buf, size_t len) final
  52 + {
  53 + if (len) {
  54 + str.append(reinterpret_cast<char const*>(buf), len);
  55 + }
  56 + }
  57 +
  58 + void
  59 + finish() final
  60 + {
  61 + }
  62 +
  63 + private:
  64 + std::string& str;
  65 + };
  66 +
  67 + class Count final: public Pipeline
  68 + {
  69 + public:
  70 + // Count the number of characters written. If 'next' is not set, the content written will be
  71 + // discarded.
  72 + Count(char const* identifier, Pipeline* next = nullptr) :
  73 + Pipeline(identifier, next),
  74 + pass_immediately_to_next(next)
  75 + {
  76 + }
  77 +
  78 + // Count the number of characters written. If 'next' is not set, the content written will be
  79 + // discarded.
  80 + Count(char const* identifier, std::unique_ptr<Link> link = nullptr) :
  81 + Pipeline(identifier, link ? link->next_pl.get() : nullptr),
  82 + link(std::move(link)),
  83 + pass_immediately_to_next(link)
  84 + {
  85 + }
  86 +
  87 + // Write to 'str'. If 'next' is set, 'str' will be written to 'next' when 'finish' is
  88 + // called.
  89 + Count(char const* identifier, std::string& str, std::unique_ptr<Link> link = nullptr) :
  90 + Pipeline(identifier, link ? link->next_pl.get() : nullptr),
  91 + str(&str),
  92 + link(std::move(link))
  93 + {
  94 + }
  95 +
22 96 ~Count() final = default;
23 97  
24 98 void
... ... @@ -30,7 +104,7 @@ namespace qpdf::pl
30 104 return;
31 105 }
32 106 count += static_cast<qpdf_offset_t>(len);
33   - if (next()) {
  107 + if (pass_immediately_to_next) {
34 108 next()->write(buf, len);
35 109 }
36 110 }
... ... @@ -40,6 +114,9 @@ namespace qpdf::pl
40 114 finish() final
41 115 {
42 116 if (next()) {
  117 + if (!pass_immediately_to_next) {
  118 + next()->write(reinterpret_cast<unsigned char const*>(str->data()), str->size());
  119 + }
43 120 next()->finish();
44 121 }
45 122 }
... ... @@ -53,6 +130,8 @@ namespace qpdf::pl
53 130 private:
54 131 qpdf_offset_t count{0};
55 132 std::string* str{nullptr};
  133 + std::unique_ptr<Link> link{nullptr};
  134 + bool pass_immediately_to_next{false};
56 135 };
57 136 } // namespace qpdf::pl
58 137  
... ...
libqpdf/qpdf/QPDFWriter_private.hh
... ... @@ -115,6 +115,7 @@ class QPDFWriter::Members
115 115 std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
116 116 std::vector<Pipeline*> pipeline_stack;
117 117 unsigned long long next_stack_id{0};
  118 + std::string count_buffer;
118 119 bool deterministic_id{false};
119 120 Pl_MD5* md5_pipeline{nullptr};
120 121 std::string deterministic_id_data;
... ...
manual/release-notes.rst
... ... @@ -21,7 +21,7 @@ more detail.
21 21 integer object. Previously the method returned false if the first
22 22 dictionary object was not a linearization parameter dictionary.
23 23  
24   - = Fix parsing of object streams containing objects not seperated by
  24 + - Fix parsing of object streams containing objects not seperated by
25 25 white-space. Pre-2020 editions of the PDF specification incorrectly
26 26 stated that white-space was required between objects. qpdf relied on this
27 27 when parsing object streams.
... ... @@ -40,8 +40,8 @@ more detail.
40 40 messages and object descriptions has been refactored with some
41 41 improvement both in runtime and memory usage.
42 42  
43   - - There has been some refactoring of how object streams are written with
44   - some performance improvement.
  43 + - There has been some refactoring of QPDFWriter including how object
  44 + streams are written with some performance improvement.
45 45  
46 46 .. cSpell:ignore substract
47 47  
... ...