Commit bd47ea9b22707a89b44d9d99c9ec246155a38ab8

Authored by m-holger
Committed by GitHub
2 parents 38e9f9de 8f3f5099

Merge pull request #1460 from m-holger/stream

Fix stream filter handling.
libqpdf/QPDFWriter.cc
@@ -1294,7 +1294,8 @@ QPDFWriter::willFilterStream( @@ -1294,7 +1294,8 @@ QPDFWriter::willFilterStream(
1294 } 1294 }
1295 1295
1296 // Disable compression for empty streams to improve compatibility 1296 // Disable compression for empty streams to improve compatibility
1297 - if (stream_dict.getKey("/Length").isInteger() && stream_dict.getKey("/Length").getIntValue() == 0) { 1297 + if (stream_dict.getKey("/Length").isInteger() &&
  1298 + stream_dict.getKey("/Length").getIntValue() == 0) {
1298 filter = true; 1299 filter = true;
1299 compress_stream = false; 1300 compress_stream = false;
1300 } 1301 }
libqpdf/QPDF_Stream.cc
@@ -85,11 +85,51 @@ namespace @@ -85,11 +85,51 @@ namespace
85 85
86 /// User defined streamfilter factories 86 /// User defined streamfilter factories
87 std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories; 87 std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories;
  88 + bool filter_factories_registered = false;
88 } // namespace 89 } // namespace
89 90
  91 +std::string
  92 +QPDF_Stream::Members::expand_filter_name(std::string const& name) const
  93 +{
  94 + // The PDF specification provides these filter abbreviations for use in inline images, but
  95 + // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also
  96 + // accepts them for stream filters.
  97 + if (name == "/AHx") {
  98 + return "/ASCIIHexDecode";
  99 + }
  100 + if (name == "/A85") {
  101 + return "/ASCII85Decode";
  102 + }
  103 + if (name == "/LZW") {
  104 + return "/LZWDecode";
  105 + }
  106 + if (name == "/Fl") {
  107 + return "/FlateDecode";
  108 + }
  109 + if (name == "/RL") {
  110 + return "/RunLengthDecode";
  111 + }
  112 + if (name == "/CCF") {
  113 + return "/CCITTFaxDecode";
  114 + }
  115 + if (name == "/DCT") {
  116 + return "/DCTDecode";
  117 + }
  118 + return name;
  119 +};
  120 +
90 std::function<std::shared_ptr<QPDFStreamFilter>()> 121 std::function<std::shared_ptr<QPDFStreamFilter>()>
91 QPDF_Stream::Members::filter_factory(std::string const& name) const 122 QPDF_Stream::Members::filter_factory(std::string const& name) const
92 { 123 {
  124 + if (filter_factories_registered) [[unlikely]] {
  125 + // We need to check user provided filters first as we allow users to replace qpdf provided
  126 + // default filters. This will have a performance impact if the facility to register stream
  127 + // filters is actually used. We can optimize this away if necessary.
  128 + auto ff = filter_factories.find(expand_filter_name(name));
  129 + if (ff != filter_factories.end()) {
  130 + return ff->second;
  131 + }
  132 + }
93 if (name == "/FlateDecode") { 133 if (name == "/FlateDecode") {
94 return SF_FlateLzwDecode::flate_factory; 134 return SF_FlateLzwDecode::flate_factory;
95 } 135 }
@@ -112,8 +152,8 @@ QPDF_Stream::Members::filter_factory(std::string const&amp; name) const @@ -112,8 +152,8 @@ QPDF_Stream::Members::filter_factory(std::string const&amp; name) const
112 return SF_ASCIIHexDecode::factory; 152 return SF_ASCIIHexDecode::factory;
113 } 153 }
114 // The PDF specification provides these filter abbreviations for use in inline images, but 154 // The PDF specification provides these filter abbreviations for use in inline images, but
115 - // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also  
116 - // accepts them for stream filters. 155 + // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader
  156 + // also accepts them for stream filters.
117 157
118 if (name == "/Fl") { 158 if (name == "/Fl") {
119 return SF_FlateLzwDecode::flate_factory; 159 return SF_FlateLzwDecode::flate_factory;
@@ -133,15 +173,7 @@ QPDF_Stream::Members::filter_factory(std::string const&amp; name) const @@ -133,15 +173,7 @@ QPDF_Stream::Members::filter_factory(std::string const&amp; name) const
133 if (name == "/DCT") { 173 if (name == "/DCT") {
134 return SF_DCTDecode::factory; 174 return SF_DCTDecode::factory;
135 } 175 }
136 - if (filter_factories.empty()) {  
137 - return nullptr;  
138 - }  
139 - auto ff =  
140 - name == "/CCF" ? filter_factories.find("/CCITTFaxDecode") : filter_factories.find(name);  
141 - if (ff == filter_factories.end()) {  
142 - return nullptr;  
143 - }  
144 - return ff->second; 176 + return nullptr;
145 } 177 }
146 178
147 Stream::Stream( 179 Stream::Stream(
@@ -159,6 +191,7 @@ Stream::registerStreamFilter( @@ -159,6 +191,7 @@ Stream::registerStreamFilter(
159 std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory) 191 std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
160 { 192 {
161 filter_factories[filter_name] = factory; 193 filter_factories[filter_name] = factory;
  194 + filter_factories_registered = true;
162 } 195 }
163 196
164 JSON 197 JSON
@@ -437,7 +470,7 @@ Stream::pipeStreamData( @@ -437,7 +470,7 @@ Stream::pipeStreamData(
437 const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0; 470 const bool empty_stream_data = s->stream_data && s->stream_data->getSize() == 0;
438 const bool empty = empty_stream || empty_stream_data; 471 const bool empty = empty_stream || empty_stream_data;
439 472
440 - if(empty_stream || empty_stream_data) { 473 + if (empty_stream || empty_stream_data) {
441 filter = true; 474 filter = true;
442 } 475 }
443 476
libqpdf/qpdf/QPDFObject_private.hh
@@ -226,6 +226,7 @@ class QPDF_Stream final @@ -226,6 +226,7 @@ class QPDF_Stream final
226 std::shared_ptr<Buffer> stream_data; 226 std::shared_ptr<Buffer> stream_data;
227 std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider; 227 std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider;
228 std::vector<std::shared_ptr<QPDFObjectHandle::TokenFilter>> token_filters; 228 std::vector<std::shared_ptr<QPDFObjectHandle::TokenFilter>> token_filters;
  229 + std::string expand_filter_name(std::string const& name) const;
229 std::function<std::shared_ptr<QPDFStreamFilter>()> 230 std::function<std::shared_ptr<QPDFStreamFilter>()>
230 filter_factory(std::string const& name) const; 231 filter_factory(std::string const& name) const;
231 }; 232 };
manual/release-notes.rst
@@ -30,6 +30,13 @@ more detail. @@ -30,6 +30,13 @@ more detail.
30 - More sanity checks have been added when files with damaged xref tables 30 - More sanity checks have been added when files with damaged xref tables
31 are recovered. 31 are recovered.
32 32
  33 + - Other changes
  34 +
  35 + - There has been some refactoring of stream filtering. These are optimized
  36 + for the common case where no user provided stream filters are
  37 + registered by calling ``QPDF::registerStreamFilter``. If you are
  38 + providing your own stream filters please open a ticket_.
  39 +
33 12.2.0: May 4, 2025 40 12.2.0: May 4, 2025
34 - Upcoming C++ Version Change 41 - Upcoming C++ Version Change
35 42