Commit 0c8af4a30c27d5cec119ccf7c86b713cf725e7c2

Authored by m-holger
Committed by GitHub
2 parents 2dc837f4 c99a4eaf

Merge pull request #1556 from m-holger/foreign_stream

Refactor stream copying
include/qpdf/QPDF.hh
... ... @@ -743,8 +743,6 @@ class QPDF
743 743  
744 744 class ObjCache;
745 745 class EncryptionParameters;
746   - class ForeignStreamData;
747   - class CopiedStreamDataProvider;
748 746 class StringDecrypter;
749 747 class ResolveRecorder;
750 748 class JSONReactor;
... ... @@ -774,8 +772,6 @@ class QPDF
774 772 Pipeline* pipeline,
775 773 bool suppress_warnings,
776 774 bool will_retry);
777   - bool
778   - pipeForeignStreamData(ForeignStreamData&, Pipeline*, bool suppress_warnings, bool will_retry);
779 775 static bool pipeStreamData(
780 776 std::shared_ptr<QPDF::EncryptionParameters> encp,
781 777 std::shared_ptr<InputSource> file,
... ... @@ -804,9 +800,6 @@ class QPDF
804 800 bool is_root_metadata,
805 801 std::unique_ptr<Pipeline>& heap);
806 802  
807   - // Methods to support object copying
808   - void copyStreamData(QPDFObjectHandle dest_stream, QPDFObjectHandle src_stream);
809   -
810 803 struct HPageOffsetEntry;
811 804 struct HPageOffset;
812 805 struct HSharedObjectEntry;
... ...
libqpdf/QPDF.cc
... ... @@ -28,6 +28,8 @@ using namespace qpdf;
28 28 using namespace std::literals;
29 29  
30 30 using Objects = QPDF::Doc::Objects;
  31 +using Foreign = Objects::Foreign;
  32 +using Streams = Objects::Streams;
31 33  
32 34 // This must be a fixed value. This API returns a const reference to it, and the C API relies on its
33 35 // being static as well.
... ... @@ -110,43 +112,6 @@ namespace
110 112 };
111 113 } // namespace
112 114  
113   -QPDF::ForeignStreamData::ForeignStreamData(
114   - Stream& foreign, qpdf_offset_t offset, QPDFObjectHandle local_dict) :
115   - encp(foreign.qpdf()->m->encp),
116   - file(foreign.qpdf()->m->file),
117   - foreign_og(foreign.id_gen()),
118   - offset(offset),
119   - length(foreign.getLength()),
120   - local_dict(local_dict),
121   - is_root_metadata(foreign.isRootMetadata())
122   -{
123   -}
124   -
125   -QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :
126   - QPDFObjectHandle::StreamDataProvider(true),
127   - destination_qpdf(destination_qpdf)
128   -{
129   -}
130   -
131   -bool
132   -QPDF::CopiedStreamDataProvider::provideStreamData(
133   - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
134   -{
135   - auto foreign_data = foreign_stream_data.find(og);
136   - bool result = false;
137   - if (foreign_data != foreign_stream_data.end()) {
138   - result = destination_qpdf.pipeForeignStreamData(
139   - foreign_data->second, pipeline, suppress_warnings, will_retry);
140   - QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);
141   - } else {
142   - auto foreign_stream = foreign_streams[og];
143   - result = foreign_stream.pipeStreamData(
144   - pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);
145   - QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);
146   - }
147   - return result;
148   -}
149   -
150 115 QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) :
151 116 qpdf(qpdf),
152 117 og(og)
... ... @@ -651,7 +616,7 @@ Objects::Foreign::Copier::replace_indirect_object(QPDFObjectHandle const&amp; foreig
651 616 dict.replaceKey(key, replace_indirect_object(value));
652 617 }
653 618 }
654   - qpdf.copyStreamData(result, foreign);
  619 + stream.copy_data_to(result);
655 620 return result;
656 621 }
657 622  
... ... @@ -661,53 +626,6 @@ Objects::Foreign::Copier::replace_indirect_object(QPDFObjectHandle const&amp; foreig
661 626 return result;
662 627 }
663 628  
664   -void
665   -QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign_oh)
666   -{
667   - // This method was originally written for copying foreign streams, but it is used by
668   - // Stream::copy to copy streams from the same QPDF object as well.
669   -
670   - Dictionary dict = result.getDict();
671   - Dictionary old_dict = foreign_oh.getDict();
672   - if (!m->copied_stream_data_provider) {
673   - m->copied_stream_data_provider = std::make_shared<CopiedStreamDataProvider>(*this);
674   - }
675   - QPDFObjGen local_og(result.getObjGen());
676   - // Copy information from the foreign stream so we can pipe its data later without keeping the
677   - // original QPDF object around.
678   -
679   - QPDF& foreign_stream_qpdf =
680   - foreign_oh.getQPDF("unable to retrieve owning qpdf from foreign stream");
681   -
682   - Stream foreign = foreign_oh;
683   - if (!foreign) {
684   - throw std::logic_error("unable to retrieve underlying stream object from foreign stream");
685   - }
686   - std::shared_ptr<Buffer> stream_buffer = foreign.getStreamDataBuffer();
687   - if (foreign_stream_qpdf.m->immediate_copy_from && !stream_buffer) {
688   - // Pull the stream data into a buffer before attempting the copy operation. Do it on the
689   - // source stream so that if the source stream is copied multiple times, we don't have to
690   - // keep duplicating the memory.
691   - foreign.replaceStreamData(
692   - foreign.getRawStreamData(), old_dict["/Filter"], old_dict["/DecodeParms"]);
693   - stream_buffer = foreign.getStreamDataBuffer();
694   - }
695   - auto stream_provider = foreign.getStreamDataProvider();
696   - if (stream_buffer) {
697   - result.replaceStreamData(stream_buffer, dict["/Filter"], dict["/DecodeParms"]);
698   - } else if (stream_provider) {
699   - // In this case, the remote stream's QPDF must stay in scope.
700   - m->copied_stream_data_provider->registerForeignStream(local_og, foreign_oh);
701   - result.replaceStreamData(
702   - m->copied_stream_data_provider, dict["/Filter"], dict["/DecodeParms"]);
703   - } else {
704   - auto foreign_stream_data = ForeignStreamData(foreign, foreign_oh.offset(), dict);
705   - m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
706   - result.replaceStreamData(
707   - m->copied_stream_data_provider, dict["/Filter"], dict["/DecodeParms"]);
708   - }
709   -}
710   -
711 629 unsigned long long
712 630 QPDF::getUniqueId() const
713 631 {
... ... @@ -893,27 +811,6 @@ QPDF::pipeStreamData(
893 811 will_retry);
894 812 }
895 813  
896   -bool
897   -QPDF::pipeForeignStreamData(
898   - ForeignStreamData& foreign, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
899   -{
900   - if (foreign.encp->encrypted) {
901   - QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
902   - }
903   - return pipeStreamData(
904   - foreign.encp,
905   - foreign.file,
906   - *this,
907   - foreign.foreign_og,
908   - foreign.offset,
909   - foreign.length,
910   - foreign.local_dict,
911   - foreign.is_root_metadata,
912   - pipeline,
913   - suppress_warnings,
914   - will_retry);
915   -}
916   -
917 814 // Throw a generic exception when we lack context for something more specific. New code should not
918 815 // use this.
919 816 void
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -25,33 +25,115 @@
25 25 using namespace std::literals;
26 26 using namespace qpdf;
27 27  
28   -// Pipe class is restricted to QPDF_Stream.
29   -class QPDF::Doc::Streams
  28 +using Streams = QPDF::Doc::Objects::Streams;
  29 +
  30 +bool
  31 +Streams::immediate_copy_from() const
  32 +{
  33 + return qpdf_.m->immediate_copy_from;
  34 +}
  35 +
  36 +class Streams::Copier final: public QPDFObjectHandle::StreamDataProvider
30 37 {
  38 + class Data
  39 + {
  40 + friend class Streams;
  41 +
  42 + public:
  43 + Data(Stream& source, Dictionary const& dest_dict) :
  44 + encp(source.qpdf()->m->encp),
  45 + file(source.qpdf()->m->file),
  46 + source_og(source.id_gen()),
  47 + offset(source.offset()),
  48 + length(source.getLength()),
  49 + dest_dict(dest_dict),
  50 + is_root_metadata(source.isRootMetadata())
  51 + {
  52 + }
  53 +
  54 + private:
  55 + std::shared_ptr<EncryptionParameters> encp;
  56 + std::shared_ptr<InputSource> file;
  57 + QPDFObjGen source_og;
  58 + qpdf_offset_t offset;
  59 + size_t length;
  60 + QPDFObjectHandle dest_dict;
  61 + bool is_root_metadata{false};
  62 + };
  63 +
31 64 public:
32   - static bool
33   - pipeStreamData(
34   - QPDF* qpdf,
35   - QPDFObjGen og,
36   - qpdf_offset_t offset,
37   - size_t length,
38   - QPDFObjectHandle dict,
39   - bool is_root_metadata,
40   - Pipeline* pipeline,
41   - bool suppress_warnings,
42   - bool will_retry)
  65 + Copier() = delete;
  66 + Copier(StreamDataProvider const&) = delete;
  67 + Copier(StreamDataProvider&&) = delete;
  68 + Copier& operator=(StreamDataProvider const&) = delete;
  69 + Copier& operator=(StreamDataProvider&&) = delete;
  70 + ~Copier() final = default;
  71 +
  72 + Copier(Streams& streams) :
  73 + QPDFObjectHandle::StreamDataProvider(true),
  74 + streams(streams)
43 75 {
44   - return qpdf->pipeStreamData(
45   - og, offset, length, dict, is_root_metadata, pipeline, suppress_warnings, will_retry);
46 76 }
47 77  
48   - static void
49   - copyStreamData(QPDF* qpdf, QPDFObjectHandle const& dest, QPDFObjectHandle const& src)
  78 + bool
  79 + provideStreamData(
  80 + QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) final
50 81 {
51   - qpdf->copyStreamData(dest, src);
  82 + auto data = copied_data.find(og);
  83 + if (data != copied_data.end()) {
  84 + auto& fd = data->second;
  85 + QTC::TC("qpdf", "QPDF pipe foreign encrypted stream", fd.encp->encrypted ? 0 : 1);
  86 + if (streams.qpdf().pipeStreamData(
  87 + fd.encp,
  88 + fd.file,
  89 + streams.qpdf(),
  90 + fd.source_og,
  91 + fd.offset,
  92 + fd.length,
  93 + fd.dest_dict,
  94 + fd.is_root_metadata,
  95 + pipeline,
  96 + suppress_warnings,
  97 + will_retry)) {
  98 + return true; // for CI coverage
  99 + } else {
  100 + return false;
  101 + }
  102 + }
  103 + auto stream = copied_streams.find(og);
  104 + qpdf_invariant(stream == copied_streams.end() || stream->second);
  105 + if (stream != copied_streams.end() &&
  106 + stream->second.pipeStreamData(
  107 + pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry)) {
  108 + return true; // for CI coverage
  109 + }
  110 + return false;
52 111 }
  112 +
  113 + void
  114 + register_copy(Stream& dest, Stream& source, bool provider)
  115 + {
  116 + qpdf_expect(source);
  117 + qpdf_expect(dest);
  118 + if (provider) {
  119 + copied_streams.insert_or_assign(dest, source);
  120 + } else {
  121 + copied_data.insert_or_assign(dest, Data(source, dest.getDict()));
  122 + }
  123 + }
  124 +
  125 + private:
  126 + Streams& streams;
  127 + std::map<QPDFObjGen, Stream> copied_streams;
  128 + std::map<QPDFObjGen, Data> copied_data;
53 129 };
54 130  
  131 +Streams::Streams(QPDF& qpdf) :
  132 + qpdf_(qpdf),
  133 + copier_(std::make_shared<Copier>(*this))
  134 +{
  135 +}
  136 +
55 137 namespace
56 138 {
57 139 class SF_Crypt final: public QPDFStreamFilter
... ... @@ -213,15 +295,42 @@ Stream::Stream(
213 295 }
214 296  
215 297 Stream
216   -Stream::copy() const
  298 +Stream::copy()
217 299 {
218 300 Stream result = qpdf()->newStream();
219 301 result.stream()->stream_dict = getDict().copy();
220   - QPDF::Doc::Streams::copyStreamData(qpdf(), result, *this);
  302 + copy_data_to(result);
221 303 return result;
222 304 }
223 305  
224 306 void
  307 +Stream::copy_data_to(Stream& dest)
  308 +{
  309 + qpdf_expect(dest);
  310 + auto s = stream();
  311 + auto& streams = qpdf()->doc().objects().streams();
  312 + auto& d_streams = dest.qpdf()->doc().objects().streams();
  313 +
  314 + auto dict = dest.getDict();
  315 +
  316 + // Copy information from the foreign stream so we can pipe its data later without keeping the
  317 + // original QPDF object around.
  318 + if (streams.immediate_copy_from() && !s->stream_data) {
  319 + // Pull the stream data into a buffer before attempting the copy operation. Do it on the
  320 + // source stream so that if the source stream is copied multiple times, we don't have to
  321 + // keep duplicating the memory.
  322 + replaceStreamData(
  323 + getRawStreamData(), s->stream_dict["/Filter"], s->stream_dict["/DecodeParms"]);
  324 + }
  325 + if (s->stream_data) {
  326 + dest.replaceStreamData(s->stream_data, dict["/Filter"], dict["/DecodeParms"]);
  327 + } else {
  328 + d_streams.copier()->register_copy(dest, *this, s->stream_provider.get());
  329 + dest.replaceStreamData(d_streams.copier(), dict["/Filter"], dict["/DecodeParms"]);
  330 + }
  331 +}
  332 +
  333 +void
225 334 Stream::registerStreamFilter(
226 335 std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
227 336 {
... ... @@ -593,7 +702,7 @@ Stream::pipeStreamData(
593 702 if (offset() == 0) {
594 703 throw std::logic_error("pipeStreamData called for stream with no data");
595 704 }
596   - if (!QPDF::Doc::Streams::pipeStreamData(
  705 + if (!Streams::pipeStreamData(
597 706 qpdf(),
598 707 id_gen(),
599 708 offset(),
... ...
libqpdf/qpdf/QPDFObjectHandle_private.hh
... ... @@ -469,7 +469,9 @@ namespace qpdf
469 469 qpdf_offset_t offset,
470 470 size_t length);
471 471  
472   - Stream copy() const;
  472 + Stream copy();
  473 +
  474 + void copy_data_to(Stream& target);
473 475  
474 476 Dictionary
475 477 getDict() const
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -89,48 +89,6 @@ class QPDF::EncryptionParameters
89 89 bool owner_password_matched{false};
90 90 };
91 91  
92   -class QPDF::ForeignStreamData
93   -{
94   - friend class QPDF;
95   -
96   - public:
97   - ForeignStreamData(Stream& foreign, qpdf_offset_t offset, QPDFObjectHandle local_dict);
98   -
99   - private:
100   - std::shared_ptr<EncryptionParameters> encp;
101   - std::shared_ptr<InputSource> file;
102   - QPDFObjGen foreign_og;
103   - qpdf_offset_t offset;
104   - size_t length;
105   - QPDFObjectHandle local_dict;
106   - bool is_root_metadata{false};
107   -};
108   -
109   -class QPDF::CopiedStreamDataProvider final: public QPDFObjectHandle::StreamDataProvider
110   -{
111   - public:
112   - CopiedStreamDataProvider(QPDF& destination_qpdf);
113   - ~CopiedStreamDataProvider() final = default;
114   - bool provideStreamData(
115   - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry) final;
116   - void
117   - registerForeignStream(QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)
118   - {
119   - foreign_streams.insert_or_assign(local_og, foreign_stream);
120   - }
121   -
122   - void
123   - registerForeignStream(QPDFObjGen local_og, ForeignStreamData foreign_stream)
124   - {
125   - foreign_stream_data.insert_or_assign(local_og, foreign_stream);
126   - }
127   -
128   - private:
129   - QPDF& destination_qpdf;
130   - std::map<QPDFObjGen, QPDFObjectHandle> foreign_streams;
131   - std::map<QPDFObjGen, ForeignStreamData> foreign_stream_data;
132   -};
133   -
134 92 class QPDF::StringDecrypter final: public QPDFObjectHandle::StringDecrypter
135 93 {
136 94 friend class QPDF;
... ... @@ -340,8 +298,6 @@ class QPDF::Doc
340 298 class JobSetter;
341 299 class ParseGuard;
342 300 class Resolver;
343   - class StreamCopier;
344   - class Streams;
345 301 class Writer;
346 302  
347 303 class Encryption
... ... @@ -620,6 +576,66 @@ class QPDF::Doc
620 576 std::map<unsigned long long, Copier> copiers;
621 577 }; // class QPDF::Doc::Objects::Foreign
622 578  
  579 + class Streams
  580 + {
  581 + // Copier manages the copying of streams into this PDF. It is used both for copying
  582 + // local and foreign streams.
  583 + class Copier;
  584 +
  585 + public:
  586 + Streams(QPDF& qpdf);
  587 +
  588 + Streams() = delete;
  589 + Streams(Streams const&) = delete;
  590 + Streams(Streams&&) = delete;
  591 + Streams& operator=(Streams const&) = delete;
  592 + Streams& operator=(Streams&&) = delete;
  593 + ~Streams() = default;
  594 +
  595 + public:
  596 + static bool
  597 + pipeStreamData(
  598 + QPDF* qpdf,
  599 + QPDFObjGen og,
  600 + qpdf_offset_t offset,
  601 + size_t length,
  602 + QPDFObjectHandle dict,
  603 + bool is_root_metadata,
  604 + Pipeline* pipeline,
  605 + bool suppress_warnings,
  606 + bool will_retry)
  607 + {
  608 + return qpdf->pipeStreamData(
  609 + og,
  610 + offset,
  611 + length,
  612 + dict,
  613 + is_root_metadata,
  614 + pipeline,
  615 + suppress_warnings,
  616 + will_retry);
  617 + }
  618 +
  619 + QPDF&
  620 + qpdf() const
  621 + {
  622 + return qpdf_;
  623 + }
  624 +
  625 + std::shared_ptr<Copier>&
  626 + copier()
  627 + {
  628 + return copier_;
  629 + }
  630 +
  631 + bool immediate_copy_from() const;
  632 +
  633 + private:
  634 + QPDF& qpdf_;
  635 +
  636 + std::shared_ptr<Copier> copier_;
  637 + }; // class QPDF::Doc::Objects::Streams
  638 +
623 639 public:
624 640 Objects() = delete;
625 641 Objects(Objects const&) = delete;
... ... @@ -631,7 +647,8 @@ class QPDF::Doc
631 647 Objects(QPDF& qpdf, QPDF::Members* m) :
632 648 qpdf(qpdf),
633 649 m(m),
634   - foreign_(qpdf)
  650 + foreign_(qpdf),
  651 + streams_(qpdf)
635 652 {
636 653 }
637 654  
... ... @@ -641,6 +658,12 @@ class QPDF::Doc
641 658 return foreign_;
642 659 }
643 660  
  661 + Streams&
  662 + streams()
  663 + {
  664 + return streams_;
  665 + }
  666 +
644 667 void parse(char const* password);
645 668 std::shared_ptr<QPDFObject> const& resolve(QPDFObjGen og);
646 669 void inParse(bool);
... ... @@ -717,6 +740,7 @@ class QPDF::Doc
717 740 QPDF::Members* m;
718 741  
719 742 Foreign foreign_;
  743 + Streams streams_;
720 744 }; // class QPDF::Doc::Objects
721 745  
722 746 // This class is used to represent a PDF Pages tree.
... ... @@ -898,7 +922,6 @@ class QPDF::Members
898 922 bool ever_pushed_inherited_attributes_to_pages{false};
899 923 bool ever_called_get_all_pages{false};
900 924 std::vector<QPDFExc> warnings;
901   - std::shared_ptr<CopiedStreamDataProvider> copied_stream_data_provider;
902 925 bool reconstructed_xref{false};
903 926 bool in_read_xref_stream{false};
904 927 bool fixed_dangling_refs{false};
... ...
qpdf/qpdf.testcov
... ... @@ -244,7 +244,7 @@ QPDFFormFieldObjectHelper list last too high 0
244 244 QPDFJob image optimize no pipeline 0
245 245 QPDFJob image optimize no shrink 0
246 246 QPDFJob image optimize too small 0
247   -QPDF pipe foreign encrypted stream 0
  247 +QPDF pipe foreign encrypted stream 1
248 248 QPDFJob copy same page more than once 1
249 249 QPDFPageObjectHelper bad token finding names 0
250 250 QPDFJob password mode bytes 0
... ... @@ -277,8 +277,6 @@ QPDFPageObjectHelper filter form xobject 0
277 277 QPDFJob found resources in non-leaf 0
278 278 QPDFJob found shared resources in leaf 0
279 279 QPDFJob found shared xobject in leaf 0
280   -QPDF copy foreign with data 1
281   -QPDF copy foreign with foreign_stream 1
282 280 QPDFObjectHandle need_newline 1
283 281 QPDFJob pages range omitted with . 0
284 282 qpdf-c invalid object handle 0
... ...