diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index 5628a36..a33f916 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -43,6 +43,11 @@ #include #include +namespace qpdf::pl +{ + struct Link; +} + class QPDF; class Pl_Count; class Pl_MD5; @@ -597,14 +602,17 @@ class QPDFWriter // activate the pipeline stack. When the passed in PipelinePopper goes out of scope, the stack // is popped. Pipeline* pushPipeline(Pipeline*); - void - activatePipelineStack(PipelinePopper& pp, bool discard = false, std::string* str = nullptr); + void activatePipelineStack(PipelinePopper& pp, std::string& str); + void activatePipelineStack(PipelinePopper& pp, std::unique_ptr link); + void activatePipelineStack( + PipelinePopper& pp, + bool discard = false, + std::string* str = nullptr, + std::unique_ptr link = nullptr); void initializePipelineStack(Pipeline*); void adjustAESStreamLength(size_t& length); void pushEncryptionFilter(PipelinePopper&); - void pushDiscardFilter(PipelinePopper&); - void pushStringPipeline(PipelinePopper&, std::string& str); void pushMD5Pipeline(PipelinePopper&); void computeDeterministicIDData(); diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 43e1ef4..79d4498 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -917,11 +917,33 @@ QPDFWriter::initializePipelineStack(Pipeline* p) } void -QPDFWriter::activatePipelineStack(PipelinePopper& pp, bool discard, std::string* str) +QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::string& str) +{ + activatePipelineStack(pp, false, &str, nullptr); +} + +void +QPDFWriter::activatePipelineStack(PipelinePopper& pp, std::unique_ptr link) +{ + m->count_buffer.clear(); + activatePipelineStack(pp, false, &m->count_buffer, std::move(link)); +} + +void +QPDFWriter::activatePipelineStack( + PipelinePopper& pp, bool discard, std::string* str, std::unique_ptr link) { std::string stack_id("stack " + std::to_string(m->next_stack_id)); - auto* c = str ? new pl::Count(stack_id.c_str(), str) - : new pl::Count(stack_id.c_str(), discard ? nullptr : m->pipeline_stack.back()); + pl::Count* c; + if (link) { + c = new pl::Count(stack_id.c_str(), m->count_buffer, std::move(link)); + } else if (discard) { + c = new pl::Count(stack_id.c_str(), nullptr); + } else if (!str) { + c = new pl::Count(stack_id.c_str(), m->pipeline_stack.back()); + } else { + c = new pl::Count(stack_id.c_str(), *str); + } ++m->next_stack_id; m->pipeline_stack.emplace_back(c); m->pipeline = c; @@ -991,18 +1013,6 @@ QPDFWriter::pushEncryptionFilter(PipelinePopper& pp) } void -QPDFWriter::pushDiscardFilter(PipelinePopper& pp) -{ - activatePipelineStack(pp, true); -} - -void -QPDFWriter::pushStringPipeline(PipelinePopper& pp, std::string& str) -{ - activatePipelineStack(pp, true, &str); -} - -void QPDFWriter::pushMD5Pipeline(PipelinePopper& pp) { if (!m->id2.empty()) { @@ -1287,9 +1297,9 @@ QPDFWriter::willFilterStream( for (bool first_attempt: {true, false}) { PipelinePopper pp_stream_data(this); if (stream_data != nullptr) { - pushStringPipeline(pp_stream_data, *stream_data); + activatePipelineStack(pp_stream_data, *stream_data); } else { - pushDiscardFilter(pp_stream_data); + activatePipelineStack(pp_stream_data, true); } try { filtered = stream.pipeStreamData( @@ -1646,11 +1656,11 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) std::string stream_buffer_pass1; std::string stream_buffer_pass2; int first_obj = -1; - bool compressed = false; + const bool compressed = m->compress_streams && !m->qdf_mode; { // Pass 1 PipelinePopper pp_ostream_pass1(this); - pushStringPipeline(pp_ostream_pass1, stream_buffer_pass1); + activatePipelineStack(pp_ostream_pass1, stream_buffer_pass1); int count = -1; for (auto const& obj: m->object_stream_to_objects[old_id]) { @@ -1704,18 +1714,20 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) // Take one pass at writing pairs of numbers so we can get their size information { PipelinePopper pp_discard(this); - pushDiscardFilter(pp_discard); + activatePipelineStack(pp_discard, true); writeObjectStreamOffsets(offsets, first_obj); first += m->pipeline->getCount(); } // Set up a stream to write the stream data into a buffer. - Pipeline* next = pushPipeline(new Pl_String("object stream", nullptr, stream_buffer_pass2)); - if (m->compress_streams && !m->qdf_mode) { - compressed = true; - next = pushPipeline(new Pl_Flate("compress object stream", next, Pl_Flate::a_deflate)); + if (compressed) { + activatePipelineStack( + pp_ostream, + pl::create( + pl::create(stream_buffer_pass2), Pl_Flate::a_deflate)); + } else { + activatePipelineStack(pp_ostream, stream_buffer_pass2); } - activatePipelineStack(pp_ostream); writeObjectStreamOffsets(offsets, first_obj); writeString(stream_buffer_pass1); stream_buffer_pass1.clear(); @@ -2464,20 +2476,23 @@ QPDFWriter::writeXRefStream( m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount()); std::string xref_data; - Pipeline* p = pushPipeline(new Pl_String("xref stream", nullptr, xref_data)); - bool compressed = false; - if (m->compress_streams && !m->qdf_mode) { - compressed = true; - if (!skip_compression) { - // Write the stream dictionary for compression but don't actually compress. This helps - // us with computation of padding for pass 1 of linearization. - p = pushPipeline(new Pl_Flate("compress xref", p, Pl_Flate::a_deflate)); - } - p = pushPipeline(new Pl_PNGFilter("pngify xref", p, Pl_PNGFilter::a_encode, esize)); - } + const bool compressed = m->compress_streams && !m->qdf_mode; { PipelinePopper pp_xref(this); - activatePipelineStack(pp_xref); + if (compressed) { + m->count_buffer.clear(); + auto link = pl::create(xref_data); + if (!skip_compression) { + // Write the stream dictionary for compression but don't actually compress. This + // helps us with computation of padding for pass 1 of linearization. + link = pl::create(std::move(link), Pl_Flate::a_deflate); + } + activatePipelineStack( + pp_xref, pl::create(std::move(link), Pl_PNGFilter::a_encode, esize)); + } else { + activatePipelineStack(pp_xref, xref_data); + } + for (int i = first; i <= last; ++i) { QPDFXRefEntry& e = m->new_obj[i].xref; switch (e.getType()) { @@ -2688,7 +2703,7 @@ QPDFWriter::writeLinearized() pushPipeline(new Pl_StdioFile("linearization pass1", lin_pass1_file)); activatePipelineStack(*pp_pass1); } else { - pushDiscardFilter(*pp_pass1); + activatePipelineStack(*pp_pass1, true); } if (m->deterministic_id) { pushMD5Pipeline(*pp_md5); @@ -2893,7 +2908,7 @@ QPDFWriter::writeLinearized() // Write hint stream to a buffer { PipelinePopper pp_hint(this); - pushStringPipeline(pp_hint, hint_buffer); + activatePipelineStack(pp_hint, hint_buffer); writeHintStream(hint_id); } hint_length = QIntC::to_offset(hint_buffer.size()); diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index c307648..50fe8b6 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -1755,24 +1755,23 @@ QPDF::generateHintStream( // Write the hint stream itself into a compressed memory buffer. Write through a counter so we // can get offsets. - Pl_String hint_stream("hint stream", nullptr, hint_buffer); - Pipeline* next = &hint_stream; - std::unique_ptr flate; - if (compressed) { - flate = - std::make_unique("compress hint stream", &hint_stream, Pl_Flate::a_deflate); - next = flate.get(); - } - pl::Count c("count", next); - BitWriter w(&c); + std::string b; + auto c = compressed + ? std::make_unique( + "count", + b, + pl::create(pl::create(hint_buffer), Pl_Flate::a_deflate)) + : std::make_unique("count", hint_buffer); + + BitWriter w(c.get()); writeHPageOffset(w); - S = toI(c.getCount()); + S = toI(c->getCount()); writeHSharedObject(w); O = 0; if (m->outline_hints.nobjects > 0) { - O = toI(c.getCount()); + O = toI(c->getCount()); writeHGeneric(w, m->outline_hints); } - c.finish(); + c->finish(); } diff --git a/libqpdf/qpdf/Pipeline_private.hh b/libqpdf/qpdf/Pipeline_private.hh index 4cc5823..b632e29 100644 --- a/libqpdf/qpdf/Pipeline_private.hh +++ b/libqpdf/qpdf/Pipeline_private.hh @@ -3,22 +3,96 @@ #include +#include + namespace qpdf::pl { - class Count final: public Pipeline + struct Link { - public: - Count(char const* identifier, Pipeline* next) : - Pipeline(identifier, next) + Link(std::unique_ptr next_link, std::unique_ptr next_pl) : + next_link(std::move(next_link)), + next_pl(std::move(next_pl)) { } - Count(char const* identifier, std::string* str) : + std::unique_ptr next_link{nullptr}; + std::unique_ptr next_pl{nullptr}; + }; + + template + std::unique_ptr + create(Args&&... args) + { + return std::make_unique( + nullptr, std::make_unique

("", nullptr, std::forward(args)...)); + } + + template + std::unique_ptr + create(std::unique_ptr link, Args&&... args) + { + auto* next = link->next_pl.get(); + return std::make_unique( + std::move(link), std::make_unique

("", next, std::forward(args)...)); + } + + class String final: public Pipeline + { + public: + String(char const* identifier, Pipeline*, std::string& str) : Pipeline(identifier, nullptr), str(str) { } + ~String() final = default; + + void + write(unsigned char const* buf, size_t len) final + { + if (len) { + str.append(reinterpret_cast(buf), len); + } + } + + void + finish() final + { + } + + private: + std::string& str; + }; + + class Count final: public Pipeline + { + public: + // Count the number of characters written. If 'next' is not set, the content written will be + // discarded. + Count(char const* identifier, Pipeline* next = nullptr) : + Pipeline(identifier, next), + pass_immediately_to_next(next) + { + } + + // Count the number of characters written. If 'next' is not set, the content written will be + // discarded. + Count(char const* identifier, std::unique_ptr link = nullptr) : + Pipeline(identifier, link ? link->next_pl.get() : nullptr), + link(std::move(link)), + pass_immediately_to_next(link) + { + } + + // Write to 'str'. If 'next' is set, 'str' will be written to 'next' when 'finish' is + // called. + Count(char const* identifier, std::string& str, std::unique_ptr link = nullptr) : + Pipeline(identifier, link ? link->next_pl.get() : nullptr), + str(&str), + link(std::move(link)) + { + } + ~Count() final = default; void @@ -30,7 +104,7 @@ namespace qpdf::pl return; } count += static_cast(len); - if (next()) { + if (pass_immediately_to_next) { next()->write(buf, len); } } @@ -40,6 +114,9 @@ namespace qpdf::pl finish() final { if (next()) { + if (!pass_immediately_to_next) { + next()->write(reinterpret_cast(str->data()), str->size()); + } next()->finish(); } } @@ -53,6 +130,8 @@ namespace qpdf::pl private: qpdf_offset_t count{0}; std::string* str{nullptr}; + std::unique_ptr link{nullptr}; + bool pass_immediately_to_next{false}; }; } // namespace qpdf::pl diff --git a/libqpdf/qpdf/QPDFWriter_private.hh b/libqpdf/qpdf/QPDFWriter_private.hh index c59ecf8..767e263 100644 --- a/libqpdf/qpdf/QPDFWriter_private.hh +++ b/libqpdf/qpdf/QPDFWriter_private.hh @@ -115,6 +115,7 @@ class QPDFWriter::Members std::map> object_stream_to_objects; std::vector pipeline_stack; unsigned long long next_stack_id{0}; + std::string count_buffer; bool deterministic_id{false}; Pl_MD5* md5_pipeline{nullptr}; std::string deterministic_id_data; diff --git a/manual/release-notes.rst b/manual/release-notes.rst index bfb502d..d55e555 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -21,7 +21,7 @@ more detail. integer object. Previously the method returned false if the first dictionary object was not a linearization parameter dictionary. - = Fix parsing of object streams containing objects not seperated by + - Fix parsing of object streams containing objects not seperated by white-space. Pre-2020 editions of the PDF specification incorrectly stated that white-space was required between objects. qpdf relied on this when parsing object streams. @@ -40,8 +40,8 @@ more detail. messages and object descriptions has been refactored with some improvement both in runtime and memory usage. - - There has been some refactoring of how object streams are written with - some performance improvement. + - There has been some refactoring of QPDFWriter including how object + streams are written with some performance improvement. .. cSpell:ignore substract