diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 4dd52d2..7dfcbeb 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -743,8 +743,6 @@ class QPDF class ObjCache; class EncryptionParameters; - class ForeignStreamData; - class CopiedStreamDataProvider; class StringDecrypter; class ResolveRecorder; class JSONReactor; @@ -774,8 +772,6 @@ class QPDF Pipeline* pipeline, bool suppress_warnings, bool will_retry); - bool - pipeForeignStreamData(ForeignStreamData&, Pipeline*, bool suppress_warnings, bool will_retry); static bool pipeStreamData( std::shared_ptr encp, std::shared_ptr file, @@ -804,9 +800,6 @@ class QPDF bool is_root_metadata, std::unique_ptr& heap); - // Methods to support object copying - void copyStreamData(QPDFObjectHandle dest_stream, QPDFObjectHandle src_stream); - struct HPageOffsetEntry; struct HPageOffset; struct HSharedObjectEntry; diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 5a7b912..010e101 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -28,6 +28,8 @@ using namespace qpdf; using namespace std::literals; using Objects = QPDF::Doc::Objects; +using Foreign = Objects::Foreign; +using Streams = Objects::Streams; // This must be a fixed value. This API returns a const reference to it, and the C API relies on its // being static as well. @@ -110,43 +112,6 @@ namespace }; } // namespace -QPDF::ForeignStreamData::ForeignStreamData( - Stream& foreign, qpdf_offset_t offset, QPDFObjectHandle local_dict) : - encp(foreign.qpdf()->m->encp), - file(foreign.qpdf()->m->file), - foreign_og(foreign.id_gen()), - offset(offset), - length(foreign.getLength()), - local_dict(local_dict), - is_root_metadata(foreign.isRootMetadata()) -{ -} - -QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) : - QPDFObjectHandle::StreamDataProvider(true), - destination_qpdf(destination_qpdf) -{ -} - -bool -QPDF::CopiedStreamDataProvider::provideStreamData( - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) -{ - auto foreign_data = foreign_stream_data.find(og); - bool result = false; - if (foreign_data != foreign_stream_data.end()) { - result = destination_qpdf.pipeForeignStreamData( - foreign_data->second, pipeline, suppress_warnings, will_retry); - QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1); - } else { - auto foreign_stream = foreign_streams[og]; - result = foreign_stream.pipeStreamData( - pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry); - QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1); - } - return result; -} - QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) : qpdf(qpdf), og(og) @@ -651,7 +616,7 @@ Objects::Foreign::Copier::replace_indirect_object(QPDFObjectHandle const& foreig dict.replaceKey(key, replace_indirect_object(value)); } } - qpdf.copyStreamData(result, foreign); + stream.copy_data_to(result); return result; } @@ -661,53 +626,6 @@ Objects::Foreign::Copier::replace_indirect_object(QPDFObjectHandle const& foreig return result; } -void -QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign_oh) -{ - // This method was originally written for copying foreign streams, but it is used by - // Stream::copy to copy streams from the same QPDF object as well. - - Dictionary dict = result.getDict(); - Dictionary old_dict = foreign_oh.getDict(); - if (!m->copied_stream_data_provider) { - m->copied_stream_data_provider = std::make_shared(*this); - } - QPDFObjGen local_og(result.getObjGen()); - // Copy information from the foreign stream so we can pipe its data later without keeping the - // original QPDF object around. - - QPDF& foreign_stream_qpdf = - foreign_oh.getQPDF("unable to retrieve owning qpdf from foreign stream"); - - Stream foreign = foreign_oh; - if (!foreign) { - throw std::logic_error("unable to retrieve underlying stream object from foreign stream"); - } - std::shared_ptr stream_buffer = foreign.getStreamDataBuffer(); - if (foreign_stream_qpdf.m->immediate_copy_from && !stream_buffer) { - // Pull the stream data into a buffer before attempting the copy operation. Do it on the - // source stream so that if the source stream is copied multiple times, we don't have to - // keep duplicating the memory. - foreign.replaceStreamData( - foreign.getRawStreamData(), old_dict["/Filter"], old_dict["/DecodeParms"]); - stream_buffer = foreign.getStreamDataBuffer(); - } - auto stream_provider = foreign.getStreamDataProvider(); - if (stream_buffer) { - result.replaceStreamData(stream_buffer, dict["/Filter"], dict["/DecodeParms"]); - } else if (stream_provider) { - // In this case, the remote stream's QPDF must stay in scope. - m->copied_stream_data_provider->registerForeignStream(local_og, foreign_oh); - result.replaceStreamData( - m->copied_stream_data_provider, dict["/Filter"], dict["/DecodeParms"]); - } else { - auto foreign_stream_data = ForeignStreamData(foreign, foreign_oh.offset(), dict); - m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data); - result.replaceStreamData( - m->copied_stream_data_provider, dict["/Filter"], dict["/DecodeParms"]); - } -} - unsigned long long QPDF::getUniqueId() const { @@ -893,27 +811,6 @@ QPDF::pipeStreamData( will_retry); } -bool -QPDF::pipeForeignStreamData( - ForeignStreamData& foreign, Pipeline* pipeline, bool suppress_warnings, bool will_retry) -{ - if (foreign.encp->encrypted) { - QTC::TC("qpdf", "QPDF pipe foreign encrypted stream"); - } - return pipeStreamData( - foreign.encp, - foreign.file, - *this, - foreign.foreign_og, - foreign.offset, - foreign.length, - foreign.local_dict, - foreign.is_root_metadata, - pipeline, - suppress_warnings, - will_retry); -} - // Throw a generic exception when we lack context for something more specific. New code should not // use this. void diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 99332ea..e47fe59 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -25,33 +25,115 @@ using namespace std::literals; using namespace qpdf; -// Pipe class is restricted to QPDF_Stream. -class QPDF::Doc::Streams +using Streams = QPDF::Doc::Objects::Streams; + +bool +Streams::immediate_copy_from() const +{ + return qpdf_.m->immediate_copy_from; +} + +class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider { + class Data + { + friend class Streams; + + public: + Data(Stream& source, Dictionary const& dest_dict) : + encp(source.qpdf()->m->encp), + file(source.qpdf()->m->file), + source_og(source.id_gen()), + offset(source.offset()), + length(source.getLength()), + dest_dict(dest_dict), + is_root_metadata(source.isRootMetadata()) + { + } + + private: + std::shared_ptr encp; + std::shared_ptr file; + QPDFObjGen source_og; + qpdf_offset_t offset; + size_t length; + QPDFObjectHandle dest_dict; + bool is_root_metadata{false}; + }; + public: - static bool - pipeStreamData( - QPDF* qpdf, - QPDFObjGen og, - qpdf_offset_t offset, - size_t length, - QPDFObjectHandle dict, - bool is_root_metadata, - Pipeline* pipeline, - bool suppress_warnings, - bool will_retry) + Copier() = delete; + Copier(StreamDataProvider const&) = delete; + Copier(StreamDataProvider&&) = delete; + Copier& operator=(StreamDataProvider const&) = delete; + Copier& operator=(StreamDataProvider&&) = delete; + ~Copier() final = default; + + Copier(Streams& streams) : + QPDFObjectHandle::StreamDataProvider(true), + streams(streams) { - return qpdf->pipeStreamData( - og, offset, length, dict, is_root_metadata, pipeline, suppress_warnings, will_retry); } - static void - copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src) + bool + provideStreamData( + QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) final { - qpdf->copyStreamData(dest, src); + auto data = copied_data.find(og); + if (data != copied_data.end()) { + auto& fd = data->second; + QTC::TC("qpdf", "QPDF pipe foreign encrypted stream", fd.encp->encrypted ? 0 : 1); + if (streams.qpdf().pipeStreamData( + fd.encp, + fd.file, + streams.qpdf(), + fd.source_og, + fd.offset, + fd.length, + fd.dest_dict, + fd.is_root_metadata, + pipeline, + suppress_warnings, + will_retry)) { + return true; // for CI coverage + } else { + return false; + } + } + auto stream = copied_streams.find(og); + qpdf_invariant(stream == copied_streams.end() || stream->second); + if (stream != copied_streams.end() && + stream->second.pipeStreamData( + pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry)) { + return true; // for CI coverage + } + return false; } + + void + register_copy(Stream& dest, Stream& source, bool provider) + { + qpdf_expect(source); + qpdf_expect(dest); + if (provider) { + copied_streams.insert_or_assign(dest, source); + } else { + copied_data.insert_or_assign(dest, Data(source, dest.getDict())); + } + } + + private: + Streams& streams; + std::map copied_streams; + std::map copied_data; }; +Streams::Streams(QPDF& qpdf) : + qpdf_(qpdf), + copier_(std::make_shared(*this)) +{ +} + namespace { class SF_Crypt final: public QPDFStreamFilter @@ -213,15 +295,42 @@ Stream::Stream( } Stream -Stream::copy() const +Stream::copy() { Stream result = qpdf()->newStream(); result.stream()->stream_dict = getDict().copy(); - QPDF::Doc::Streams::copyStreamData(qpdf(), result, *this); + copy_data_to(result); return result; } void +Stream::copy_data_to(Stream& dest) +{ + qpdf_expect(dest); + auto s = stream(); + auto& streams = qpdf()->doc().objects().streams(); + auto& d_streams = dest.qpdf()->doc().objects().streams(); + + auto dict = dest.getDict(); + + // Copy information from the foreign stream so we can pipe its data later without keeping the + // original QPDF object around. + if (streams.immediate_copy_from() && !s->stream_data) { + // Pull the stream data into a buffer before attempting the copy operation. Do it on the + // source stream so that if the source stream is copied multiple times, we don't have to + // keep duplicating the memory. + replaceStreamData( + getRawStreamData(), s->stream_dict["/Filter"], s->stream_dict["/DecodeParms"]); + } + if (s->stream_data) { + dest.replaceStreamData(s->stream_data, dict["/Filter"], dict["/DecodeParms"]); + } else { + d_streams.copier()->register_copy(dest, *this, s->stream_provider.get()); + dest.replaceStreamData(d_streams.copier(), dict["/Filter"], dict["/DecodeParms"]); + } +} + +void Stream::registerStreamFilter( std::string const& filter_name, std::function()> factory) { @@ -593,7 +702,7 @@ Stream::pipeStreamData( if (offset() == 0) { throw std::logic_error("pipeStreamData called for stream with no data"); } - if (!QPDF::Doc::Streams::pipeStreamData( + if (!Streams::pipeStreamData( qpdf(), id_gen(), offset(), diff --git a/libqpdf/qpdf/QPDFObjectHandle_private.hh b/libqpdf/qpdf/QPDFObjectHandle_private.hh index 2073456..6cf3424 100644 --- a/libqpdf/qpdf/QPDFObjectHandle_private.hh +++ b/libqpdf/qpdf/QPDFObjectHandle_private.hh @@ -469,7 +469,9 @@ namespace qpdf qpdf_offset_t offset, size_t length); - Stream copy() const; + Stream copy(); + + void copy_data_to(Stream& target); Dictionary getDict() const diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 8832cbd..5c15132 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -89,48 +89,6 @@ class QPDF::EncryptionParameters bool owner_password_matched{false}; }; -class QPDF::ForeignStreamData -{ - friend class QPDF; - - public: - ForeignStreamData(Stream& foreign, qpdf_offset_t offset, QPDFObjectHandle local_dict); - - private: - std::shared_ptr encp; - std::shared_ptr file; - QPDFObjGen foreign_og; - qpdf_offset_t offset; - size_t length; - QPDFObjectHandle local_dict; - bool is_root_metadata{false}; -}; - -class QPDF::CopiedStreamDataProvider final: public QPDFObjectHandle::StreamDataProvider -{ - public: - CopiedStreamDataProvider(QPDF& destination_qpdf); - ~CopiedStreamDataProvider() final = default; - bool provideStreamData( - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) final; - void - registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream) - { - foreign_streams.insert_or_assign(local_og, foreign_stream); - } - - void - registerForeignStream(QPDFObjGen local_og, ForeignStreamData foreign_stream) - { - foreign_stream_data.insert_or_assign(local_og, foreign_stream); - } - - private: - QPDF& destination_qpdf; - std::map foreign_streams; - std::map foreign_stream_data; -}; - class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter { friend class QPDF; @@ -340,8 +298,6 @@ class QPDF::Doc class JobSetter; class ParseGuard; class Resolver; - class StreamCopier; - class Streams; class Writer; class Encryption @@ -620,6 +576,66 @@ class QPDF::Doc std::map copiers; }; // class QPDF::Doc::Objects::Foreign + class Streams + { + // Copier manages the copying of streams into this PDF. It is used both for copying + // local and foreign streams. + class Copier; + + public: + Streams(QPDF& qpdf); + + Streams() = delete; + Streams(Streams const&) = delete; + Streams(Streams&&) = delete; + Streams& operator=(Streams const&) = delete; + Streams& operator=(Streams&&) = delete; + ~Streams() = default; + + public: + static bool + pipeStreamData( + QPDF* qpdf, + QPDFObjGen og, + qpdf_offset_t offset, + size_t length, + QPDFObjectHandle dict, + bool is_root_metadata, + Pipeline* pipeline, + bool suppress_warnings, + bool will_retry) + { + return qpdf->pipeStreamData( + og, + offset, + length, + dict, + is_root_metadata, + pipeline, + suppress_warnings, + will_retry); + } + + QPDF& + qpdf() const + { + return qpdf_; + } + + std::shared_ptr& + copier() + { + return copier_; + } + + bool immediate_copy_from() const; + + private: + QPDF& qpdf_; + + std::shared_ptr copier_; + }; // class QPDF::Doc::Objects::Streams + public: Objects() = delete; Objects(Objects const&) = delete; @@ -631,7 +647,8 @@ class QPDF::Doc Objects(QPDF& qpdf, QPDF::Members* m) : qpdf(qpdf), m(m), - foreign_(qpdf) + foreign_(qpdf), + streams_(qpdf) { } @@ -641,6 +658,12 @@ class QPDF::Doc return foreign_; } + Streams& + streams() + { + return streams_; + } + void parse(char const* password); std::shared_ptr const& resolve(QPDFObjGen og); void inParse(bool); @@ -717,6 +740,7 @@ class QPDF::Doc QPDF::Members* m; Foreign foreign_; + Streams streams_; }; // class QPDF::Doc::Objects // This class is used to represent a PDF Pages tree. @@ -898,7 +922,6 @@ class QPDF::Members bool ever_pushed_inherited_attributes_to_pages{false}; bool ever_called_get_all_pages{false}; std::vector warnings; - std::shared_ptr copied_stream_data_provider; bool reconstructed_xref{false}; bool in_read_xref_stream{false}; bool fixed_dangling_refs{false}; diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index e4d3e82..d73deef 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -244,7 +244,7 @@ QPDFFormFieldObjectHelper list last too high 0 QPDFJob image optimize no pipeline 0 QPDFJob image optimize no shrink 0 QPDFJob image optimize too small 0 -QPDF pipe foreign encrypted stream 0 +QPDF pipe foreign encrypted stream 1 QPDFJob copy same page more than once 1 QPDFPageObjectHelper bad token finding names 0 QPDFJob password mode bytes 0 @@ -277,8 +277,6 @@ QPDFPageObjectHelper filter form xobject 0 QPDFJob found resources in non-leaf 0 QPDFJob found shared resources in leaf 0 QPDFJob found shared xobject in leaf 0 -QPDF copy foreign with data 1 -QPDF copy foreign with foreign_stream 1 QPDFObjectHandle need_newline 1 QPDFJob pages range omitted with . 0 qpdf-c invalid object handle 0