Commit 9c7c7413dd21ab703d6cf5e5e93711293cc1e6a8

Authored by m-holger
1 parent 80631cb0

Enhance pl::Count to support buffered writing to next

include/qpdf/QPDFWriter.hh
@@ -43,6 +43,11 @@ @@ -43,6 +43,11 @@
43 #include <qpdf/QPDFObjectHandle.hh> 43 #include <qpdf/QPDFObjectHandle.hh>
44 #include <qpdf/QPDFXRefEntry.hh> 44 #include <qpdf/QPDFXRefEntry.hh>
45 45
  46 +namespace qpdf::pl
  47 +{
  48 + struct Link;
  49 +}
  50 +
46 class QPDF; 51 class QPDF;
47 class Pl_Count; 52 class Pl_Count;
48 class Pl_MD5; 53 class Pl_MD5;
@@ -597,14 +602,17 @@ class QPDFWriter @@ -597,14 +602,17 @@ class QPDFWriter
597 // activate the pipeline stack. When the passed in PipelinePopper goes out of scope, the stack 602 // activate the pipeline stack. When the passed in PipelinePopper goes out of scope, the stack
598 // is popped. 603 // is popped.
599 Pipeline* pushPipeline(Pipeline*); 604 Pipeline* pushPipeline(Pipeline*);
600 - void  
601 - activatePipelineStack(PipelinePopper& pp, bool discard = false, std::string* str = nullptr); 605 + void activatePipelineStack(PipelinePopper& pp, std::string& str);
  606 + void activatePipelineStack(PipelinePopper& pp, std::unique_ptr<qpdf::pl::Link> link);
  607 + void activatePipelineStack(
  608 + PipelinePopper& pp,
  609 + bool discard = false,
  610 + std::string* str = nullptr,
  611 + std::unique_ptr<qpdf::pl::Link> link = nullptr);
602 void initializePipelineStack(Pipeline*); 612 void initializePipelineStack(Pipeline*);
603 613
604 void adjustAESStreamLength(size_t& length); 614 void adjustAESStreamLength(size_t& length);
605 void pushEncryptionFilter(PipelinePopper&); 615 void pushEncryptionFilter(PipelinePopper&);
606 - void pushDiscardFilter(PipelinePopper&);  
607 - void pushStringPipeline(PipelinePopper&, std::string& str);  
608 void pushMD5Pipeline(PipelinePopper&); 616 void pushMD5Pipeline(PipelinePopper&);
609 void computeDeterministicIDData(); 617 void computeDeterministicIDData();
610 618
libqpdf/QPDFWriter.cc
@@ -917,11 +917,33 @@ QPDFWriter::initializePipelineStack(Pipeline* p) @@ -917,11 +917,33 @@ QPDFWriter::initializePipelineStack(Pipeline* p)
917 } 917 }
918 918
919 void 919 void
920 -QPDFWriter::activatePipelineStack(PipelinePopper& pp, bool discard, std::string* str) 920 +QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::string& str)
  921 +{
  922 + activatePipelineStack(pp, false, &str, nullptr);
  923 +}
  924 +
  925 +void
  926 +QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::unique_ptr<pl::Link> link)
  927 +{
  928 + m->count_buffer.clear();
  929 + activatePipelineStack(pp, false, &m->count_buffer, std::move(link));
  930 +}
  931 +
  932 +void
  933 +QPDFWriter::activatePipelineStack(
  934 + PipelinePopper& pp, bool discard, std::string* str, std::unique_ptr<pl::Link> link)
921 { 935 {
922 std::string stack_id("stack " + std::to_string(m->next_stack_id)); 936 std::string stack_id("stack " + std::to_string(m->next_stack_id));
923 - auto* c = str ? new pl::Count(stack_id.c_str(), str)  
924 - : new pl::Count(stack_id.c_str(), discard ? nullptr : m->pipeline_stack.back()); 937 + pl::Count* c;
  938 + if (link) {
  939 + c = new pl::Count(stack_id.c_str(), m->count_buffer, std::move(link));
  940 + } else if (discard) {
  941 + c = new pl::Count(stack_id.c_str(), nullptr);
  942 + } else if (!str) {
  943 + c = new pl::Count(stack_id.c_str(), m->pipeline_stack.back());
  944 + } else {
  945 + c = new pl::Count(stack_id.c_str(), *str);
  946 + }
925 ++m->next_stack_id; 947 ++m->next_stack_id;
926 m->pipeline_stack.emplace_back(c); 948 m->pipeline_stack.emplace_back(c);
927 m->pipeline = c; 949 m->pipeline = c;
@@ -991,18 +1013,6 @@ QPDFWriter::pushEncryptionFilter(PipelinePopper&amp; pp) @@ -991,18 +1013,6 @@ QPDFWriter::pushEncryptionFilter(PipelinePopper&amp; pp)
991 } 1013 }
992 1014
993 void 1015 void
994 -QPDFWriter::pushDiscardFilter(PipelinePopper& pp)  
995 -{  
996 - activatePipelineStack(pp, true);  
997 -}  
998 -  
999 -void  
1000 -QPDFWriter::pushStringPipeline(PipelinePopper& pp, std::string& str)  
1001 -{  
1002 - activatePipelineStack(pp, true, &str);  
1003 -}  
1004 -  
1005 -void  
1006 QPDFWriter::pushMD5Pipeline(PipelinePopper& pp) 1016 QPDFWriter::pushMD5Pipeline(PipelinePopper& pp)
1007 { 1017 {
1008 if (!m->id2.empty()) { 1018 if (!m->id2.empty()) {
@@ -1287,9 +1297,9 @@ QPDFWriter::willFilterStream( @@ -1287,9 +1297,9 @@ QPDFWriter::willFilterStream(
1287 for (bool first_attempt: {true, false}) { 1297 for (bool first_attempt: {true, false}) {
1288 PipelinePopper pp_stream_data(this); 1298 PipelinePopper pp_stream_data(this);
1289 if (stream_data != nullptr) { 1299 if (stream_data != nullptr) {
1290 - pushStringPipeline(pp_stream_data, *stream_data); 1300 + activatePipelineStack(pp_stream_data, *stream_data);
1291 } else { 1301 } else {
1292 - pushDiscardFilter(pp_stream_data); 1302 + activatePipelineStack(pp_stream_data, true);
1293 } 1303 }
1294 try { 1304 try {
1295 filtered = stream.pipeStreamData( 1305 filtered = stream.pipeStreamData(
@@ -1646,11 +1656,11 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1646,11 +1656,11 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1646 std::string stream_buffer_pass1; 1656 std::string stream_buffer_pass1;
1647 std::string stream_buffer_pass2; 1657 std::string stream_buffer_pass2;
1648 int first_obj = -1; 1658 int first_obj = -1;
1649 - bool compressed = false; 1659 + const bool compressed = m->compress_streams && !m->qdf_mode;
1650 { 1660 {
1651 // Pass 1 1661 // Pass 1
1652 PipelinePopper pp_ostream_pass1(this); 1662 PipelinePopper pp_ostream_pass1(this);
1653 - pushStringPipeline(pp_ostream_pass1, stream_buffer_pass1); 1663 + activatePipelineStack(pp_ostream_pass1, stream_buffer_pass1);
1654 1664
1655 int count = -1; 1665 int count = -1;
1656 for (auto const& obj: m->object_stream_to_objects[old_id]) { 1666 for (auto const& obj: m->object_stream_to_objects[old_id]) {
@@ -1704,18 +1714,20 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) @@ -1704,18 +1714,20 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1704 // Take one pass at writing pairs of numbers so we can get their size information 1714 // Take one pass at writing pairs of numbers so we can get their size information
1705 { 1715 {
1706 PipelinePopper pp_discard(this); 1716 PipelinePopper pp_discard(this);
1707 - pushDiscardFilter(pp_discard); 1717 + activatePipelineStack(pp_discard, true);
1708 writeObjectStreamOffsets(offsets, first_obj); 1718 writeObjectStreamOffsets(offsets, first_obj);
1709 first += m->pipeline->getCount(); 1719 first += m->pipeline->getCount();
1710 } 1720 }
1711 1721
1712 // Set up a stream to write the stream data into a buffer. 1722 // Set up a stream to write the stream data into a buffer.
1713 - Pipeline* next = pushPipeline(new Pl_String("object stream", nullptr, stream_buffer_pass2));  
1714 - if (m->compress_streams && !m->qdf_mode) {  
1715 - compressed = true;  
1716 - next = pushPipeline(new Pl_Flate("compress object stream", next, Pl_Flate::a_deflate)); 1723 + if (compressed) {
  1724 + activatePipelineStack(
  1725 + pp_ostream,
  1726 + pl::create<Pl_Flate>(
  1727 + pl::create<pl::String>(stream_buffer_pass2), Pl_Flate::a_deflate));
  1728 + } else {
  1729 + activatePipelineStack(pp_ostream, stream_buffer_pass2);
1717 } 1730 }
1718 - activatePipelineStack(pp_ostream);  
1719 writeObjectStreamOffsets(offsets, first_obj); 1731 writeObjectStreamOffsets(offsets, first_obj);
1720 writeString(stream_buffer_pass1); 1732 writeString(stream_buffer_pass1);
1721 stream_buffer_pass1.clear(); 1733 stream_buffer_pass1.clear();
@@ -2464,20 +2476,23 @@ QPDFWriter::writeXRefStream( @@ -2464,20 +2476,23 @@ QPDFWriter::writeXRefStream(
2464 m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount()); 2476 m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
2465 2477
2466 std::string xref_data; 2478 std::string xref_data;
2467 - Pipeline* p = pushPipeline(new Pl_String("xref stream", nullptr, xref_data));  
2468 - bool compressed = false;  
2469 - if (m->compress_streams && !m->qdf_mode) {  
2470 - compressed = true;  
2471 - if (!skip_compression) {  
2472 - // Write the stream dictionary for compression but don't actually compress. This helps  
2473 - // us with computation of padding for pass 1 of linearization.  
2474 - p = pushPipeline(new Pl_Flate("compress xref", p, Pl_Flate::a_deflate));  
2475 - }  
2476 - p = pushPipeline(new Pl_PNGFilter("pngify xref", p, Pl_PNGFilter::a_encode, esize));  
2477 - } 2479 + const bool compressed = m->compress_streams && !m->qdf_mode;
2478 { 2480 {
2479 PipelinePopper pp_xref(this); 2481 PipelinePopper pp_xref(this);
2480 - activatePipelineStack(pp_xref); 2482 + if (compressed) {
  2483 + m->count_buffer.clear();
  2484 + auto link = pl::create<pl::String>(xref_data);
  2485 + if (!skip_compression) {
  2486 + // Write the stream dictionary for compression but don't actually compress. This
  2487 + // helps us with computation of padding for pass 1 of linearization.
  2488 + link = pl::create<Pl_Flate>(std::move(link), Pl_Flate::a_deflate);
  2489 + }
  2490 + activatePipelineStack(
  2491 + pp_xref, pl::create<Pl_PNGFilter>(std::move(link), Pl_PNGFilter::a_encode, esize));
  2492 + } else {
  2493 + activatePipelineStack(pp_xref, xref_data);
  2494 + }
  2495 +
2481 for (int i = first; i <= last; ++i) { 2496 for (int i = first; i <= last; ++i) {
2482 QPDFXRefEntry& e = m->new_obj[i].xref; 2497 QPDFXRefEntry& e = m->new_obj[i].xref;
2483 switch (e.getType()) { 2498 switch (e.getType()) {
@@ -2688,7 +2703,7 @@ QPDFWriter::writeLinearized() @@ -2688,7 +2703,7 @@ QPDFWriter::writeLinearized()
2688 pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file)); 2703 pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file));
2689 activatePipelineStack(*pp_pass1); 2704 activatePipelineStack(*pp_pass1);
2690 } else { 2705 } else {
2691 - pushDiscardFilter(*pp_pass1); 2706 + activatePipelineStack(*pp_pass1, true);
2692 } 2707 }
2693 if (m->deterministic_id) { 2708 if (m->deterministic_id) {
2694 pushMD5Pipeline(*pp_md5); 2709 pushMD5Pipeline(*pp_md5);
@@ -2893,7 +2908,7 @@ QPDFWriter::writeLinearized() @@ -2893,7 +2908,7 @@ QPDFWriter::writeLinearized()
2893 // Write hint stream to a buffer 2908 // Write hint stream to a buffer
2894 { 2909 {
2895 PipelinePopper pp_hint(this); 2910 PipelinePopper pp_hint(this);
2896 - pushStringPipeline(pp_hint, hint_buffer); 2911 + activatePipelineStack(pp_hint, hint_buffer);
2897 writeHintStream(hint_id); 2912 writeHintStream(hint_id);
2898 } 2913 }
2899 hint_length = QIntC::to_offset(hint_buffer.size()); 2914 hint_length = QIntC::to_offset(hint_buffer.size());
libqpdf/QPDF_linearization.cc
@@ -1755,24 +1755,23 @@ QPDF::generateHintStream( @@ -1755,24 +1755,23 @@ QPDF::generateHintStream(
1755 1755
1756 // Write the hint stream itself into a compressed memory buffer. Write through a counter so we 1756 // Write the hint stream itself into a compressed memory buffer. Write through a counter so we
1757 // can get offsets. 1757 // can get offsets.
1758 - Pl_String hint_stream("hint stream", nullptr, hint_buffer);  
1759 - Pipeline* next = &hint_stream;  
1760 - std::unique_ptr<Pipeline> flate;  
1761 - if (compressed) {  
1762 - flate =  
1763 - std::make_unique<Pl_Flate>("compress hint stream", &hint_stream, Pl_Flate::a_deflate);  
1764 - next = flate.get();  
1765 - }  
1766 - pl::Count c("count", next);  
1767 - BitWriter w(&c); 1758 + std::string b;
  1759 + auto c = compressed
  1760 + ? std::make_unique<pl::Count>(
  1761 + "count",
  1762 + b,
  1763 + pl::create<Pl_Flate>(pl::create<pl::String>(hint_buffer), Pl_Flate::a_deflate))
  1764 + : std::make_unique<pl::Count>("count", hint_buffer);
  1765 +
  1766 + BitWriter w(c.get());
1768 1767
1769 writeHPageOffset(w); 1768 writeHPageOffset(w);
1770 - S = toI(c.getCount()); 1769 + S = toI(c->getCount());
1771 writeHSharedObject(w); 1770 writeHSharedObject(w);
1772 O = 0; 1771 O = 0;
1773 if (m->outline_hints.nobjects > 0) { 1772 if (m->outline_hints.nobjects > 0) {
1774 - O = toI(c.getCount()); 1773 + O = toI(c->getCount());
1775 writeHGeneric(w, m->outline_hints); 1774 writeHGeneric(w, m->outline_hints);
1776 } 1775 }
1777 - c.finish(); 1776 + c->finish();
1778 } 1777 }
libqpdf/qpdf/Pipeline_private.hh
@@ -3,22 +3,96 @@ @@ -3,22 +3,96 @@
3 3
4 #include <qpdf/Pipeline.hh> 4 #include <qpdf/Pipeline.hh>
5 5
  6 +#include <qpdf/Pl_Flate.hh>
  7 +
6 namespace qpdf::pl 8 namespace qpdf::pl
7 { 9 {
8 - class Count final: public Pipeline 10 + struct Link
9 { 11 {
10 - public:  
11 - Count(char const* identifier, Pipeline* next) :  
12 - Pipeline(identifier, next) 12 + Link(std::unique_ptr<Link> next_link, std::unique_ptr<Pipeline> next_pl) :
  13 + next_link(std::move(next_link)),
  14 + next_pl(std::move(next_pl))
13 { 15 {
14 } 16 }
15 17
16 - Count(char const* identifier, std::string* str) : 18 + std::unique_ptr<Link> next_link{nullptr};
  19 + std::unique_ptr<Pipeline> next_pl{nullptr};
  20 + };
  21 +
  22 + template <typename P, typename... Args>
  23 + std::unique_ptr<Link>
  24 + create(Args&&... args)
  25 + {
  26 + return std::make_unique<Link>(
  27 + nullptr, std::make_unique<P>("", nullptr, std::forward<Args>(args)...));
  28 + }
  29 +
  30 + template <typename P, typename... Args>
  31 + std::unique_ptr<Link>
  32 + create(std::unique_ptr<Link> link, Args&&... args)
  33 + {
  34 + auto* next = link->next_pl.get();
  35 + return std::make_unique<Link>(
  36 + std::move(link), std::make_unique<P>("", next, std::forward<Args>(args)...));
  37 + }
  38 +
  39 + class String final: public Pipeline
  40 + {
  41 + public:
  42 + String(char const* identifier, Pipeline*, std::string& str) :
17 Pipeline(identifier, nullptr), 43 Pipeline(identifier, nullptr),
18 str(str) 44 str(str)
19 { 45 {
20 } 46 }
21 47
  48 + ~String() final = default;
  49 +
  50 + void
  51 + write(unsigned char const* buf, size_t len) final
  52 + {
  53 + if (len) {
  54 + str.append(reinterpret_cast<char const*>(buf), len);
  55 + }
  56 + }
  57 +
  58 + void
  59 + finish() final
  60 + {
  61 + }
  62 +
  63 + private:
  64 + std::string& str;
  65 + };
  66 +
  67 + class Count final: public Pipeline
  68 + {
  69 + public:
  70 + // Count the number of characters written. If 'next' is not set, the content written will be
  71 + // discarded.
  72 + Count(char const* identifier, Pipeline* next = nullptr) :
  73 + Pipeline(identifier, next),
  74 + pass_immediately_to_next(next)
  75 + {
  76 + }
  77 +
  78 + // Count the number of characters written. If 'next' is not set, the content written will be
  79 + // discarded.
  80 + Count(char const* identifier, std::unique_ptr<Link> link = nullptr) :
  81 + Pipeline(identifier, link ? link->next_pl.get() : nullptr),
  82 + link(std::move(link)),
  83 + pass_immediately_to_next(link)
  84 + {
  85 + }
  86 +
  87 + // Write to 'str'. If 'next' is set, 'str' will be written to 'next' when 'finish' is
  88 + // called.
  89 + Count(char const* identifier, std::string& str, std::unique_ptr<Link> link = nullptr) :
  90 + Pipeline(identifier, link ? link->next_pl.get() : nullptr),
  91 + str(&str),
  92 + link(std::move(link))
  93 + {
  94 + }
  95 +
22 ~Count() final = default; 96 ~Count() final = default;
23 97
24 void 98 void
@@ -30,7 +104,7 @@ namespace qpdf::pl @@ -30,7 +104,7 @@ namespace qpdf::pl
30 return; 104 return;
31 } 105 }
32 count += static_cast<qpdf_offset_t>(len); 106 count += static_cast<qpdf_offset_t>(len);
33 - if (next()) { 107 + if (pass_immediately_to_next) {
34 next()->write(buf, len); 108 next()->write(buf, len);
35 } 109 }
36 } 110 }
@@ -40,6 +114,9 @@ namespace qpdf::pl @@ -40,6 +114,9 @@ namespace qpdf::pl
40 finish() final 114 finish() final
41 { 115 {
42 if (next()) { 116 if (next()) {
  117 + if (!pass_immediately_to_next) {
  118 + next()->write(reinterpret_cast<unsigned char const*>(str->data()), str->size());
  119 + }
43 next()->finish(); 120 next()->finish();
44 } 121 }
45 } 122 }
@@ -53,6 +130,8 @@ namespace qpdf::pl @@ -53,6 +130,8 @@ namespace qpdf::pl
53 private: 130 private:
54 qpdf_offset_t count{0}; 131 qpdf_offset_t count{0};
55 std::string* str{nullptr}; 132 std::string* str{nullptr};
  133 + std::unique_ptr<Link> link{nullptr};
  134 + bool pass_immediately_to_next{false};
56 }; 135 };
57 } // namespace qpdf::pl 136 } // namespace qpdf::pl
58 137
libqpdf/qpdf/QPDFWriter_private.hh
@@ -115,6 +115,7 @@ class QPDFWriter::Members @@ -115,6 +115,7 @@ class QPDFWriter::Members
115 std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects; 115 std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
116 std::vector<Pipeline*> pipeline_stack; 116 std::vector<Pipeline*> pipeline_stack;
117 unsigned long long next_stack_id{0}; 117 unsigned long long next_stack_id{0};
  118 + std::string count_buffer;
118 bool deterministic_id{false}; 119 bool deterministic_id{false};
119 Pl_MD5* md5_pipeline{nullptr}; 120 Pl_MD5* md5_pipeline{nullptr};
120 std::string deterministic_id_data; 121 std::string deterministic_id_data;
manual/release-notes.rst
@@ -21,7 +21,7 @@ more detail. @@ -21,7 +21,7 @@ more detail.
21 integer object. Previously the method returned false if the first 21 integer object. Previously the method returned false if the first
22 dictionary object was not a linearization parameter dictionary. 22 dictionary object was not a linearization parameter dictionary.
23 23
24 - = Fix parsing of object streams containing objects not seperated by 24 + - Fix parsing of object streams containing objects not seperated by
25 white-space. Pre-2020 editions of the PDF specification incorrectly 25 white-space. Pre-2020 editions of the PDF specification incorrectly
26 stated that white-space was required between objects. qpdf relied on this 26 stated that white-space was required between objects. qpdf relied on this
27 when parsing object streams. 27 when parsing object streams.
@@ -40,8 +40,8 @@ more detail. @@ -40,8 +40,8 @@ more detail.
40 messages and object descriptions has been refactored with some 40 messages and object descriptions has been refactored with some
41 improvement both in runtime and memory usage. 41 improvement both in runtime and memory usage.
42 42
43 - - There has been some refactoring of how object streams are written with  
44 - some performance improvement. 43 + - There has been some refactoring of QPDFWriter including how object
  44 + streams are written with some performance improvement.
45 45
46 .. cSpell:ignore substract 46 .. cSpell:ignore substract
47 47