Commit 39bfa0130713defc9abb478a70717ca07377cdab

Authored by Jay Berkenbilt
1 parent 1fb26f08

Implement user-provided stream filters

Refactor QPDF_Stream to use stream filter classes to handle supported
stream filters as well.
ChangeLog
  1 +2020-12-23 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Allow library users to provide their own decoders for stream
  4 + filters by deriving classes from QPDFStreamFilter and registering
  5 + them using QPDF::registerStreamFilter. Registered stream filters
  6 + provide code to validate and interpret /DecodeParms for a specific
  7 + /Filter and also to provide a pipeline that will decode. Note that
  8 + it is possible to encode to a filter type that is not supported
  9 + even without this feature.
  10 +
1 11 2020-12-22 Jay Berkenbilt <ejb@ql.org>
2 12  
3 13 * Add QPDFObjectHandle::makeDirect(bool allow_streams) -- if
... ...
... ... @@ -317,13 +317,6 @@ I find it useful to make reference to them in this list
317 317 is exercised elsewhere in qpdf's test suite, so this is not that
318 318 pressing.
319 319  
320   - * Support user-pluggable stream filters. This would enable external
321   - code to provide interpretation for filters that are missing from
322   - qpdf. Make it possible for user-provided filters to override
323   - built-in filters. Make sure that the pluggable filters can be
324   - prioritized so that we can poll all registered filters to see
325   - whether they are capable of filtering a particular stream.
326   -
327 320 * If possible, consider adding CCITT3, CCITT4, or any other easy
328 321 filters. For some reference code that we probably can't use but may
329 322 be handy anyway, see
... ...
include/qpdf/QPDF.hh
... ... @@ -31,6 +31,8 @@
31 31 #include <list>
32 32 #include <iostream>
33 33 #include <vector>
  34 +#include <functional>
  35 +#include <memory>
34 36  
35 37 #include <qpdf/QIntC.hh>
36 38 #include <qpdf/QPDFExc.hh>
... ... @@ -39,6 +41,7 @@
39 41 #include <qpdf/QPDFXRefEntry.hh>
40 42 #include <qpdf/QPDFObjectHandle.hh>
41 43 #include <qpdf/QPDFTokenizer.hh>
  44 +#include <qpdf/QPDFStreamFilter.hh>
42 45 #include <qpdf/Buffer.hh>
43 46 #include <qpdf/InputSource.hh>
44 47  
... ... @@ -132,6 +135,20 @@ class QPDF
132 135 QPDF_DLL
133 136 void emptyPDF();
134 137  
  138 + // From 10.1: register a new filter implementation for a specific
  139 + // stream filter. You can add your own implementations for new
  140 + // filter types or override existing ones provided by the library.
  141 + // Registered stream filters are used for decoding only as you can
  142 + // override encoding with stream data providers. For example, you
  143 + // could use this method to support for one of the other filter
  144 + // types by using additional third-party libraries that qpdf does
  145 + // not presently use. The standard filters are implemented using
  146 + // QPDFStreamFilter classes.
  147 + QPDF_DLL
  148 + static void registerStreamFilter(
  149 + std::string const& filter_name,
  150 + std::function<std::shared_ptr<QPDFStreamFilter> ()> factory);
  151 +
135 152 // Parameter settings
136 153  
137 154 // By default, warning messages are issued to std::cerr and output
... ...
include/qpdf/QPDFStreamFilter.hh 0 โ†’ 100644
  1 +// Copyright (c) 2005-2020 Jay Berkenbilt
  2 +//
  3 +// This file is part of qpdf.
  4 +//
  5 +// Licensed under the Apache License, Version 2.0 (the "License");
  6 +// you may not use this file except in compliance with the License.
  7 +// You may obtain a copy of the License at
  8 +//
  9 +// http://www.apache.org/licenses/LICENSE-2.0
  10 +//
  11 +// Unless required by applicable law or agreed to in writing, software
  12 +// distributed under the License is distributed on an "AS IS" BASIS,
  13 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 +// See the License for the specific language governing permissions and
  15 +// limitations under the License.
  16 +//
  17 +// Versions of qpdf prior to version 7 were released under the terms
  18 +// of version 2.0 of the Artistic License. At your option, you may
  19 +// continue to consider qpdf to be licensed under those terms. Please
  20 +// see the manual for additional information.
  21 +
  22 +#ifndef QPDFSTREAMFILTER_HH
  23 +#define QPDFSTREAMFILTER_HH
  24 +
  25 +#include <qpdf/DLL.h>
  26 +#include <qpdf/QPDFObjectHandle.hh>
  27 +#include <qpdf/Pipeline.hh>
  28 +
  29 +class QPDF_DLL_CLASS QPDFStreamFilter
  30 +{
  31 + public:
  32 + QPDF_DLL
  33 + QPDFStreamFilter() = default;
  34 +
  35 + QPDF_DLL
  36 + virtual ~QPDFStreamFilter() = default;
  37 +
  38 + // A QPDFStreamFilter class must implement, at a minimum,
  39 + // setDecodeParms() and getDecodePipeline(). QPDF will always call
  40 + // setDecodeParms() before calling getDecodePipeline(). It is
  41 + // expected that you will store any needed information from
  42 + // decode_parms (or the decode_parms object iself) in your
  43 + // instance so that it can be used to construct the decode
  44 + // pipeline.
  45 +
  46 + // Return a boolean indicating whether your filter can proceed
  47 + // with the given /DecodeParms. The default implementation accepts
  48 + // a null object and rejects everything else.
  49 + QPDF_DLL
  50 + virtual bool setDecodeParms(QPDFObjectHandle decode_parms);
  51 +
  52 + // Return a pipeline that will decode data encoded with your
  53 + // filter. Your implementation must ensure that the pipeline is
  54 + // deleted when the instance of your class is destroyed.
  55 + QPDF_DLL
  56 + virtual Pipeline* getDecodePipeline(Pipeline* next) = 0;
  57 +
  58 + // If your filter implements "specialized" compression or lossy
  59 + // compression, override one or both of these methods. The default
  60 + // implementations return false. See comments in QPDFWriter for
  61 + // details. QPDF defines specialized compression as non-lossy
  62 + // compression not intended for general-purpose data. qpdf, by
  63 + // default, doesn't mess with streams that are compressed with
  64 + // specialized compression, the idea being that the decision to
  65 + // use that compression scheme would fall outside of what
  66 + // QPDFWriter would know anything about, so any attempt to decode
  67 + // and re-encode would probably be undesirable.
  68 + QPDF_DLL
  69 + virtual bool isSpecializedCompression();
  70 + QPDF_DLL
  71 + virtual bool isLossyCompression();
  72 +
  73 + private:
  74 + QPDFStreamFilter(QPDFStreamFilter const&) = delete;
  75 + QPDFStreamFilter& operator=(QPDFStreamFilter const&) = delete;
  76 +};
  77 +
  78 +#endif // QPDFSTREAMFILTER_HH
... ...
libqpdf/QPDF.cc
... ... @@ -270,6 +270,14 @@ QPDF::emptyPDF()
270 270 }
271 271  
272 272 void
  273 +QPDF::registerStreamFilter(
  274 + std::string const& filter_name,
  275 + std::function<std::shared_ptr<QPDFStreamFilter> ()> factory)
  276 +{
  277 + QPDF_Stream::registerStreamFilter(filter_name, factory);
  278 +}
  279 +
  280 +void
273 281 QPDF::setIgnoreXRefStreams(bool val)
274 282 {
275 283 this->m->ignore_xref_streams = val;
... ...
libqpdf/QPDFStreamFilter.cc 0 โ†’ 100644
  1 +#include <qpdf/QPDFStreamFilter.hh>
  2 +
  3 +bool
  4 +QPDFStreamFilter::setDecodeParms(QPDFObjectHandle decode_parms)
  5 +{
  6 + return decode_parms.isNull();
  7 +}
  8 +
  9 +bool
  10 +QPDFStreamFilter::isSpecializedCompression()
  11 +{
  12 + return false;
  13 +}
  14 +
  15 +bool
  16 +QPDFStreamFilter::isLossyCompression()
  17 +{
  18 + return false;
  19 +}
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -3,15 +3,7 @@
3 3 #include <qpdf/QUtil.hh>
4 4 #include <qpdf/Pipeline.hh>
5 5 #include <qpdf/Pl_Flate.hh>
6   -#include <qpdf/Pl_PNGFilter.hh>
7   -#include <qpdf/Pl_TIFFPredictor.hh>
8   -#include <qpdf/Pl_RC4.hh>
9 6 #include <qpdf/Pl_Buffer.hh>
10   -#include <qpdf/Pl_ASCII85Decoder.hh>
11   -#include <qpdf/Pl_ASCIIHexDecoder.hh>
12   -#include <qpdf/Pl_LZWDecoder.hh>
13   -#include <qpdf/Pl_RunLength.hh>
14   -#include <qpdf/Pl_DCT.hh>
15 7 #include <qpdf/Pl_Count.hh>
16 8 #include <qpdf/ContentNormalizer.hh>
17 9 #include <qpdf/QTC.hh>
... ... @@ -19,10 +11,78 @@
19 11 #include <qpdf/QPDFExc.hh>
20 12 #include <qpdf/Pl_QPDFTokenizer.hh>
21 13 #include <qpdf/QIntC.hh>
  14 +#include <qpdf/SF_FlateLzwDecode.hh>
  15 +#include <qpdf/SF_DCTDecode.hh>
  16 +#include <qpdf/SF_RunLengthDecode.hh>
  17 +#include <qpdf/SF_ASCII85Decode.hh>
  18 +#include <qpdf/SF_ASCIIHexDecode.hh>
22 19  
23 20 #include <stdexcept>
24 21  
25   -std::map<std::string, std::string> QPDF_Stream::filter_abbreviations;
  22 +class SF_Crypt: public QPDFStreamFilter
  23 +{
  24 + public:
  25 + SF_Crypt() = default;
  26 + virtual ~SF_Crypt() = default;
  27 +
  28 + virtual bool setDecodeParms(QPDFObjectHandle decode_parms)
  29 + {
  30 + if (decode_parms.isNull())
  31 + {
  32 + return true;
  33 + }
  34 + bool filterable = true;
  35 + for (auto const& key: decode_parms.getKeys())
  36 + {
  37 + if (((key == "/Type") || (key == "/Name")) &&
  38 + (decode_parms.getKey("/Type").isNull() ||
  39 + (decode_parms.getKey("/Type").isName() &&
  40 + (decode_parms.getKey("/Type").getName() ==
  41 + "/CryptFilterDecodeParms"))))
  42 + {
  43 + // we handle this in decryptStream
  44 + }
  45 + else
  46 + {
  47 + filterable = false;
  48 + }
  49 + }
  50 + return filterable;
  51 + }
  52 +
  53 + virtual Pipeline* getDecodePipeline(Pipeline*)
  54 + {
  55 + // Not used -- handled by pipeStreamData
  56 + return nullptr;
  57 + }
  58 +};
  59 +
  60 +std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
  61 + // The PDF specification provides these filter abbreviations for
  62 + // use in inline images, but according to table H.1 in the pre-ISO
  63 + // versions of the PDF specification, Adobe Reader also accepts
  64 + // them for stream filters.
  65 + {"/AHx", "/ASCIIHexDecode"},
  66 + {"/A85", "/ASCII85Decode"},
  67 + {"/LZW", "/LZWDecode"},
  68 + {"/Fl", "/FlateDecode"},
  69 + {"/RL", "/RunLengthDecode"},
  70 + {"/CCF", "/CCITTFaxDecode"},
  71 + {"/DCT", "/DCTDecode"},
  72 +};
  73 +
  74 +std::map<
  75 + std::string,
  76 + std::function<std::shared_ptr<QPDFStreamFilter>()>>
  77 +QPDF_Stream::filter_factories = {
  78 + {"/Crypt", []() { return std::make_shared<SF_Crypt>(); }},
  79 + {"/FlateDecode", SF_FlateLzwDecode::flate_factory},
  80 + {"/LZWDecode", SF_FlateLzwDecode::lzw_factory},
  81 + {"/RunLengthDecode", SF_RunLengthDecode::factory},
  82 + {"/DCTDecode", SF_DCTDecode::factory},
  83 + {"/ASCII85Decode", SF_ASCII85Decode::factory},
  84 + {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
  85 +};
26 86  
27 87 QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation,
28 88 QPDFObjectHandle stream_dict,
... ... @@ -48,6 +108,14 @@ QPDF_Stream::~QPDF_Stream()
48 108 }
49 109  
50 110 void
  111 +QPDF_Stream::registerStreamFilter(
  112 + std::string const& filter_name,
  113 + std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
  114 +{
  115 + filter_factories[filter_name] = factory;
  116 +}
  117 +
  118 +void
51 119 QPDF_Stream::releaseResolved()
52 120 {
53 121 this->stream_provider = 0;
... ... @@ -190,125 +258,18 @@ QPDF_Stream::getRawStreamData()
190 258 }
191 259  
192 260 bool
193   -QPDF_Stream::understandDecodeParams(
194   - std::string const& filter, QPDFObjectHandle decode_obj,
195   - int& predictor, int& columns,
196   - int& colors, int& bits_per_component,
197   - bool& early_code_change)
198   -{
199   - bool filterable = true;
200   - std::set<std::string> keys = decode_obj.getKeys();
201   - for (std::set<std::string>::iterator iter = keys.begin();
202   - iter != keys.end(); ++iter)
203   - {
204   - std::string const& key = *iter;
205   - if (((filter == "/FlateDecode") || (filter == "/LZWDecode")) &&
206   - (key == "/Predictor"))
207   - {
208   - QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
209   - if (predictor_obj.isInteger())
210   - {
211   - predictor = predictor_obj.getIntValueAsInt();
212   - if (! ((predictor == 1) || (predictor == 2) ||
213   - ((predictor >= 10) && (predictor <= 15))))
214   - {
215   - filterable = false;
216   - }
217   - }
218   - else
219   - {
220   - filterable = false;
221   - }
222   - }
223   - else if ((filter == "/LZWDecode") && (key == "/EarlyChange"))
224   - {
225   - QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
226   - if (earlychange_obj.isInteger())
227   - {
228   - int earlychange = earlychange_obj.getIntValueAsInt();
229   - early_code_change = (earlychange == 1);
230   - if (! ((earlychange == 0) || (earlychange == 1)))
231   - {
232   - filterable = false;
233   - }
234   - }
235   - else
236   - {
237   - filterable = false;
238   - }
239   - }
240   - else if ((key == "/Columns") ||
241   - (key == "/Colors") ||
242   - (key == "/BitsPerComponent"))
243   - {
244   - QPDFObjectHandle param_obj = decode_obj.getKey(key);
245   - if (param_obj.isInteger())
246   - {
247   - int val = param_obj.getIntValueAsInt();
248   - if (key == "/Columns")
249   - {
250   - columns = val;
251   - }
252   - else if (key == "/Colors")
253   - {
254   - colors = val;
255   - }
256   - else if (key == "/BitsPerComponent")
257   - {
258   - bits_per_component = val;
259   - }
260   - }
261   - else
262   - {
263   - filterable = false;
264   - }
265   - }
266   - else if ((filter == "/Crypt") &&
267   - (((key == "/Type") || (key == "/Name")) &&
268   - (decode_obj.getKey("/Type").isNull() ||
269   - (decode_obj.getKey("/Type").isName() &&
270   - (decode_obj.getKey("/Type").getName() ==
271   - "/CryptFilterDecodeParms")))))
272   - {
273   - // we handle this in decryptStream
274   - }
275   - else
276   - {
277   - filterable = false;
278   - }
279   - }
280   -
281   - return filterable;
282   -}
283   -
284   -bool
285   -QPDF_Stream::filterable(std::vector<std::string>& filters,
286   - bool& specialized_compression,
287   - bool& lossy_compression,
288   - int& predictor, int& columns,
289   - int& colors, int& bits_per_component,
290   - bool& early_code_change)
  261 +QPDF_Stream::filterable(
  262 + std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
  263 + bool& specialized_compression,
  264 + bool& lossy_compression)
291 265 {
292   - if (filter_abbreviations.empty())
293   - {
294   - // The PDF specification provides these filter abbreviations
295   - // for use in inline images, but according to table H.1 in the
296   - // pre-ISO versions of the PDF specification, Adobe Reader
297   - // also accepts them for stream filters.
298   - filter_abbreviations["/AHx"] = "/ASCIIHexDecode";
299   - filter_abbreviations["/A85"] = "/ASCII85Decode";
300   - filter_abbreviations["/LZW"] = "/LZWDecode";
301   - filter_abbreviations["/Fl"] = "/FlateDecode";
302   - filter_abbreviations["/RL"] = "/RunLengthDecode";
303   - filter_abbreviations["/CCF"] = "/CCITTFaxDecode";
304   - filter_abbreviations["/DCT"] = "/DCTDecode";
305   - }
306   -
307 266 // Check filters
308 267  
309 268 QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
310 269 bool filters_okay = true;
311 270  
  271 + std::vector<std::string> filter_names;
  272 +
312 273 if (filter_obj.isNull())
313 274 {
314 275 // No filters
... ... @@ -316,7 +277,7 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
316 277 else if (filter_obj.isName())
317 278 {
318 279 // One filter
319   - filters.push_back(filter_obj.getName());
  280 + filter_names.push_back(filter_obj.getName());
320 281 }
321 282 else if (filter_obj.isArray())
322 283 {
... ... @@ -327,7 +288,7 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
327 288 QPDFObjectHandle item = filter_obj.getArrayItem(i);
328 289 if (item.isName())
329 290 {
330   - filters.push_back(item.getName());
  291 + filter_names.push_back(item.getName());
331 292 }
332 293 else
333 294 {
... ... @@ -351,34 +312,23 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
351 312  
352 313 bool filterable = true;
353 314  
354   - for (std::vector<std::string>::iterator iter = filters.begin();
355   - iter != filters.end(); ++iter)
  315 + for (auto& filter_name: filter_names)
356 316 {
357   - std::string& filter = *iter;
358   -
359   - if (filter_abbreviations.count(filter))
  317 + if (filter_abbreviations.count(filter_name))
360 318 {
361 319 QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation");
362   - filter = filter_abbreviations[filter];
  320 + filter_name = filter_abbreviations[filter_name];
363 321 }
364 322  
365   - if (filter == "/RunLengthDecode")
  323 + auto ff = filter_factories.find(filter_name);
  324 + if (ff == filter_factories.end())
366 325 {
367   - specialized_compression = true;
  326 + filterable = false;
368 327 }
369   - else if (filter == "/DCTDecode")
  328 + else
370 329 {
371   - specialized_compression = true;
372   - lossy_compression = true;
  330 + filters.push_back((ff->second)());
373 331 }
374   - else if (! ((filter == "/Crypt") ||
375   - (filter == "/FlateDecode") ||
376   - (filter == "/LZWDecode") ||
377   - (filter == "/ASCII85Decode") ||
378   - (filter == "/ASCIIHexDecode")))
379   - {
380   - filterable = false;
381   - }
382 332 }
383 333  
384 334 if (! filterable)
... ... @@ -386,15 +336,8 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
386 336 return false;
387 337 }
388 338  
389   - // `filters' now contains a list of filters to be applied in
390   - // order. See which ones we can support.
391   -
392   - // Initialize values to their defaults as per the PDF spec
393   - predictor = 1;
394   - columns = 0;
395   - colors = 1;
396   - bits_per_component = 8;
397   - early_code_change = true;
  339 + // filters now contains a list of filters to be applied in order.
  340 + // See which ones we can support.
398 341  
399 342 // See if we can support any decode parameters that are specified.
400 343  
... ... @@ -413,7 +356,7 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
413 356 }
414 357 else
415 358 {
416   - for (unsigned int i = 0; i < filters.size(); ++i)
  359 + for (unsigned int i = 0; i < filter_names.size(); ++i)
417 360 {
418 361 decode_parms.push_back(decode_obj);
419 362 }
... ... @@ -436,21 +379,21 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
436 379 return false;
437 380 }
438 381  
439   - for (unsigned int i = 0; i < filters.size(); ++i)
  382 + for (size_t i = 0; i < filters.size(); ++i)
440 383 {
441   - QPDFObjectHandle decode_item = decode_parms.at(i);
442   - if (decode_item.isNull())
443   - {
444   - // okay
445   - }
446   - else if (decode_item.isDictionary())
  384 + auto filter = filters.at(i);
  385 + auto decode_item = decode_parms.at(i);
  386 +
  387 + if (filter->setDecodeParms(decode_item))
447 388 {
448   - if (! understandDecodeParams(
449   - filters.at(i), decode_item,
450   - predictor, columns, colors, bits_per_component,
451   - early_code_change))
  389 + if (filter->isSpecializedCompression())
452 390 {
453   - filterable = false;
  391 + specialized_compression = true;
  392 + }
  393 + if (filter->isLossyCompression())
  394 + {
  395 + specialized_compression = true;
  396 + lossy_compression = true;
454 397 }
455 398 }
456 399 else
... ... @@ -459,17 +402,6 @@ QPDF_Stream::filterable(std::vector&lt;std::string&gt;&amp; filters,
459 402 }
460 403 }
461 404  
462   - if ((predictor > 1) && (columns == 0))
463   - {
464   - // invalid
465   - filterable = false;
466   - }
467   -
468   - if (! filterable)
469   - {
470   - return false;
471   - }
472   -
473 405 return filterable;
474 406 }
475 407  
... ... @@ -479,12 +411,7 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
479 411 qpdf_stream_decode_level_e decode_level,
480 412 bool suppress_warnings, bool will_retry)
481 413 {
482   - std::vector<std::string> filters;
483   - int predictor = 1;
484   - int columns = 0;
485   - int colors = 1;
486   - int bits_per_component = 8;
487   - bool early_code_change = true;
  414 + std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
488 415 bool specialized_compression = false;
489 416 bool lossy_compression = false;
490 417 bool ignored;
... ... @@ -497,10 +424,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
497 424 bool success = true;
498 425 if (filter)
499 426 {
500   - filter = filterable(filters, specialized_compression, lossy_compression,
501   - predictor, columns,
502   - colors, bits_per_component,
503   - early_code_change);
  427 + filter = filterable(
  428 + filters, specialized_compression, lossy_compression);
504 429 if ((decode_level < qpdf_dl_all) && lossy_compression)
505 430 {
506 431 filter = false;
... ... @@ -523,9 +448,11 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
523 448 return filter;
524 449 }
525 450  
526   - // Construct the pipeline in reverse order. Force pipelines we
527   - // create to be deleted when this function finishes.
528   - std::vector<PointerHolder<Pipeline> > to_delete;
  451 + // Construct the pipeline in reverse order. Force pipelines we
  452 + // create to be deleted when this function finishes. Pipelines
  453 + // created by QPDFStreamFilter objects will be deleted by those
  454 + // objects.
  455 + std::vector<PointerHolder<Pipeline>> to_delete;
529 456  
530 457 PointerHolder<ContentNormalizer> normalizer;
531 458 if (filter)
... ... @@ -555,80 +482,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
555 482 to_delete.push_back(pipeline);
556 483 }
557 484  
558   - for (std::vector<std::string>::reverse_iterator f_iter =
559   - filters.rbegin();
560   - f_iter != filters.rend(); ++f_iter)
  485 + for (auto f_iter = filters.rbegin();
  486 + f_iter != filters.rend(); ++f_iter)
561 487 {
562   - std::string const& filter_name = *f_iter;
563   -
564   - if ((filter_name == "/FlateDecode") ||
565   - (filter_name == "/LZWDecode"))
  488 + auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline);
  489 + if (decode_pipeline)
566 490 {
567   - if ((predictor >= 10) && (predictor <= 15))
568   - {
569   - QTC::TC("qpdf", "QPDF_Stream PNG filter");
570   - pipeline = new Pl_PNGFilter(
571   - "png decode", pipeline, Pl_PNGFilter::a_decode,
572   - QIntC::to_uint(columns),
573   - QIntC::to_uint(colors),
574   - QIntC::to_uint(bits_per_component));
575   - to_delete.push_back(pipeline);
576   - }
577   - else if (predictor == 2)
578   - {
579   - QTC::TC("qpdf", "QPDF_Stream TIFF predictor");
580   - pipeline = new Pl_TIFFPredictor(
581   - "tiff decode", pipeline, Pl_TIFFPredictor::a_decode,
582   - QIntC::to_uint(columns),
583   - QIntC::to_uint(colors),
584   - QIntC::to_uint(bits_per_component));
585   - to_delete.push_back(pipeline);
586   - }
  491 + pipeline = decode_pipeline;
587 492 }
588   -
589   - if (filter_name == "/Crypt")
590   - {
591   - // Ignore -- handled by pipeStreamData
592   - }
593   - else if (filter_name == "/FlateDecode")
594   - {
595   - pipeline = new Pl_Flate("stream inflate",
596   - pipeline, Pl_Flate::a_inflate);
597   - to_delete.push_back(pipeline);
598   - }
599   - else if (filter_name == "/ASCII85Decode")
600   - {
601   - pipeline = new Pl_ASCII85Decoder("ascii85 decode", pipeline);
602   - to_delete.push_back(pipeline);
603   - }
604   - else if (filter_name == "/ASCIIHexDecode")
605   - {
606   - pipeline = new Pl_ASCIIHexDecoder("asciiHex decode", pipeline);
607   - to_delete.push_back(pipeline);
608   - }
609   - else if (filter_name == "/LZWDecode")
610   - {
611   - pipeline = new Pl_LZWDecoder("lzw decode", pipeline,
612   - early_code_change);
613   - to_delete.push_back(pipeline);
614   - }
615   - else if (filter_name == "/RunLengthDecode")
616   - {
617   - pipeline = new Pl_RunLength("runlength decode", pipeline,
618   - Pl_RunLength::a_decode);
619   - to_delete.push_back(pipeline);
620   - }
621   - else if (filter_name == "/DCTDecode")
622   - {
623   - pipeline = new Pl_DCT("DCT decode", pipeline);
624   - to_delete.push_back(pipeline);
625   - }
626   - else
627   - {
628   - throw std::logic_error(
629   - "INTERNAL ERROR: QPDFStream: unknown filter "
630   - "encountered after check");
631   - }
632 493 }
633 494 }
634 495  
... ...
libqpdf/SF_FlateLzwDecode.cc 0 โ†’ 100644
  1 +#include <qpdf/SF_FlateLzwDecode.hh>
  2 +#include <qpdf/Pl_PNGFilter.hh>
  3 +#include <qpdf/Pl_TIFFPredictor.hh>
  4 +#include <qpdf/Pl_Flate.hh>
  5 +#include <qpdf/Pl_LZWDecoder.hh>
  6 +#include <qpdf/QTC.hh>
  7 +#include <qpdf/QIntC.hh>
  8 +
  9 +SF_FlateLzwDecode::SF_FlateLzwDecode(bool lzw) :
  10 + lzw(lzw),
  11 + // Initialize values to their defaults as per the PDF spec
  12 + predictor(1),
  13 + columns(0),
  14 + colors(1),
  15 + bits_per_component(8),
  16 + early_code_change(true)
  17 +{
  18 +}
  19 +
  20 +bool
  21 +SF_FlateLzwDecode::setDecodeParms(QPDFObjectHandle decode_parms)
  22 +{
  23 + if (decode_parms.isNull())
  24 + {
  25 + return true;
  26 + }
  27 +
  28 + bool filterable = true;
  29 + std::set<std::string> keys = decode_parms.getKeys();
  30 + for (auto const& key: keys)
  31 + {
  32 + QPDFObjectHandle value = decode_parms.getKey(key);
  33 + if (key == "/Predictor")
  34 + {
  35 + if (value.isInteger())
  36 + {
  37 + this->predictor = value.getIntValueAsInt();
  38 + if (! ((this->predictor == 1) || (this->predictor == 2) ||
  39 + ((this->predictor >= 10) && (this->predictor <= 15))))
  40 + {
  41 + filterable = false;
  42 + }
  43 + }
  44 + else
  45 + {
  46 + filterable = false;
  47 + }
  48 + }
  49 + else if ((key == "/Columns") ||
  50 + (key == "/Colors") ||
  51 + (key == "/BitsPerComponent"))
  52 + {
  53 + if (value.isInteger())
  54 + {
  55 + int val = value.getIntValueAsInt();
  56 + if (key == "/Columns")
  57 + {
  58 + this->columns = val;
  59 + }
  60 + else if (key == "/Colors")
  61 + {
  62 + this->colors = val;
  63 + }
  64 + else if (key == "/BitsPerComponent")
  65 + {
  66 + this->bits_per_component = val;
  67 + }
  68 + }
  69 + else
  70 + {
  71 + filterable = false;
  72 + }
  73 + }
  74 + else if (lzw && (key == "/EarlyChange"))
  75 + {
  76 + if (value.isInteger())
  77 + {
  78 + int earlychange = value.getIntValueAsInt();
  79 + this->early_code_change = (earlychange == 1);
  80 + if (! ((earlychange == 0) || (earlychange == 1)))
  81 + {
  82 + filterable = false;
  83 + }
  84 + }
  85 + else
  86 + {
  87 + filterable = false;
  88 + }
  89 + }
  90 + }
  91 +
  92 + if ((this->predictor > 1) && (this->columns == 0))
  93 + {
  94 + filterable = false;
  95 + }
  96 +
  97 + return filterable;
  98 +}
  99 +
  100 +
  101 +
  102 +Pipeline*
  103 +SF_FlateLzwDecode::getDecodePipeline(Pipeline* next)
  104 +{
  105 + std::shared_ptr<Pipeline> pipeline;
  106 + if ((this->predictor >= 10) && (this->predictor <= 15))
  107 + {
  108 + QTC::TC("qpdf", "SF_FlateLzwDecode PNG filter");
  109 + pipeline = std::make_shared<Pl_PNGFilter>(
  110 + "png decode", next, Pl_PNGFilter::a_decode,
  111 + QIntC::to_uint(this->columns),
  112 + QIntC::to_uint(this->colors),
  113 + QIntC::to_uint(this->bits_per_component));
  114 + this->pipelines.push_back(pipeline);
  115 + next = pipeline.get();
  116 + }
  117 + else if (this->predictor == 2)
  118 + {
  119 + QTC::TC("qpdf", "SF_FlateLzwDecode TIFF predictor");
  120 + pipeline = std::make_shared<Pl_TIFFPredictor>(
  121 + "tiff decode", next, Pl_TIFFPredictor::a_decode,
  122 + QIntC::to_uint(this->columns),
  123 + QIntC::to_uint(this->colors),
  124 + QIntC::to_uint(this->bits_per_component));
  125 + this->pipelines.push_back(pipeline);
  126 + next = pipeline.get();
  127 + }
  128 +
  129 + if (lzw)
  130 + {
  131 + pipeline = std::make_shared<Pl_LZWDecoder>(
  132 + "lzw decode", next, early_code_change);
  133 + }
  134 + else
  135 + {
  136 + pipeline = std::make_shared<Pl_Flate>(
  137 + "stream inflate", next, Pl_Flate::a_inflate);
  138 + }
  139 + this->pipelines.push_back(pipeline);
  140 + return pipeline.get();
  141 +}
  142 +
  143 +std::shared_ptr<QPDFStreamFilter>
  144 +SF_FlateLzwDecode::flate_factory()
  145 +{
  146 + return std::make_shared<SF_FlateLzwDecode>(false);
  147 +}
  148 +
  149 +std::shared_ptr<QPDFStreamFilter>
  150 +SF_FlateLzwDecode::lzw_factory()
  151 +{
  152 + return std::make_shared<SF_FlateLzwDecode>(true);
  153 +}
... ...
libqpdf/build.mk
... ... @@ -70,6 +70,7 @@ SRCS_libqpdf = \
70 70 libqpdf/QPDFPageDocumentHelper.cc \
71 71 libqpdf/QPDFPageLabelDocumentHelper.cc \
72 72 libqpdf/QPDFPageObjectHelper.cc \
  73 + libqpdf/QPDFStreamFilter.cc \
73 74 libqpdf/QPDFSystemError.cc \
74 75 libqpdf/QPDFTokenizer.cc \
75 76 libqpdf/QPDFWriter.cc \
... ... @@ -94,6 +95,7 @@ SRCS_libqpdf = \
94 95 libqpdf/QUtil.cc \
95 96 libqpdf/RC4.cc \
96 97 libqpdf/SecureRandomDataProvider.cc \
  98 + libqpdf/SF_FlateLzwDecode.cc \
97 99 libqpdf/SparseOHArray.cc \
98 100 libqpdf/qpdf-c.cc
99 101  
... ...
libqpdf/qpdf/QPDF_Stream.hh
... ... @@ -5,6 +5,10 @@
5 5  
6 6 #include <qpdf/QPDFObject.hh>
7 7 #include <qpdf/QPDFObjectHandle.hh>
  8 +#include <qpdf/QPDFStreamFilter.hh>
  9 +
  10 +#include <functional>
  11 +#include <memory>
8 12  
9 13 class Pipeline;
10 14 class QPDF;
... ... @@ -49,6 +53,10 @@ class QPDF_Stream: public QPDFObject
49 53  
50 54 void replaceDict(QPDFObjectHandle new_dict);
51 55  
  56 + static void registerStreamFilter(
  57 + std::string const& filter_name,
  58 + std::function<std::shared_ptr<QPDFStreamFilter>()> factory);
  59 +
52 60 // Replace object ID and generation. This may only be called if
53 61 // object ID and generation are 0. It is used by QPDFObjectHandle
54 62 // when adding streams to files.
... ... @@ -59,20 +67,15 @@ class QPDF_Stream: public QPDFObject
59 67  
60 68 private:
61 69 static std::map<std::string, std::string> filter_abbreviations;
  70 + static std::map<
  71 + std::string,
  72 + std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories;
62 73  
63 74 void replaceFilterData(QPDFObjectHandle const& filter,
64 75 QPDFObjectHandle const& decode_parms,
65 76 size_t length);
66   - bool understandDecodeParams(
67   - std::string const& filter, QPDFObjectHandle decode_params,
68   - int& predictor, int& columns,
69   - int& colors, int& bits_per_component,
70   - bool& early_code_change);
71   - bool filterable(std::vector<std::string>& filters,
72   - bool& specialized_compression, bool& lossy_compression,
73   - int& predictor, int& columns,
74   - int& colors, int& bits_per_component,
75   - bool& early_code_change);
  77 + bool filterable(std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
  78 + bool& specialized_compression, bool& lossy_compression);
76 79 void warn(QPDFExc const& e);
77 80 void setDictDescription();
78 81 void setStreamDescription();
... ...
libqpdf/qpdf/SF_ASCII85Decode.hh 0 โ†’ 100644
  1 +#include <qpdf/QPDFStreamFilter.hh>
  2 +#include <qpdf/Pl_ASCII85Decoder.hh>
  3 +#include <memory>
  4 +
  5 +#ifndef SF_ASCII85DECODE_HH
  6 +#define SF_ASCII85DECODE_HH
  7 +
  8 +class SF_ASCII85Decode: public QPDFStreamFilter
  9 +{
  10 + public:
  11 + SF_ASCII85Decode() = default;
  12 + virtual ~SF_ASCII85Decode() = default;
  13 +
  14 + virtual Pipeline* getDecodePipeline(Pipeline* next) override
  15 + {
  16 + this->pipeline = std::make_shared<Pl_ASCII85Decoder>(
  17 + "ascii85 decode", next);
  18 + return this->pipeline.get();
  19 + }
  20 +
  21 + static std::shared_ptr<QPDFStreamFilter> factory()
  22 + {
  23 + return std::make_shared<SF_ASCII85Decode>();
  24 + }
  25 +
  26 + private:
  27 + std::shared_ptr<Pipeline> pipeline;
  28 +};
  29 +
  30 +#endif // SF_ASCII85DECODE_HH
... ...
libqpdf/qpdf/SF_ASCIIHexDecode.hh 0 โ†’ 100644
  1 +#include <qpdf/QPDFStreamFilter.hh>
  2 +#include <qpdf/Pl_ASCIIHexDecoder.hh>
  3 +#include <memory>
  4 +
  5 +#ifndef SF_ASCIIHEXDECODE_HH
  6 +#define SF_ASCIIHEXDECODE_HH
  7 +
  8 +class SF_ASCIIHexDecode: public QPDFStreamFilter
  9 +{
  10 + public:
  11 + SF_ASCIIHexDecode() = default;
  12 + virtual ~SF_ASCIIHexDecode() = default;
  13 +
  14 + virtual Pipeline* getDecodePipeline(Pipeline* next) override
  15 + {
  16 + this->pipeline = std::make_shared<Pl_ASCIIHexDecoder>(
  17 + "asciiHex decode", next);
  18 + return this->pipeline.get();
  19 + }
  20 +
  21 + static std::shared_ptr<QPDFStreamFilter> factory()
  22 + {
  23 + return std::make_shared<SF_ASCIIHexDecode>();
  24 + }
  25 +
  26 + private:
  27 + std::shared_ptr<Pipeline> pipeline;
  28 +};
  29 +
  30 +#endif // SF_ASCIIHEXDECODE_HH
... ...
libqpdf/qpdf/SF_DCTDecode.hh 0 โ†’ 100644
  1 +#include <qpdf/QPDFStreamFilter.hh>
  2 +#include <qpdf/Pl_DCT.hh>
  3 +#include <memory>
  4 +
  5 +#ifndef SF_DCTDECODE_HH
  6 +#define SF_DCTDECODE_HH
  7 +
  8 +class SF_DCTDecode: public QPDFStreamFilter
  9 +{
  10 + public:
  11 + SF_DCTDecode() = default;
  12 + virtual ~SF_DCTDecode() = default;
  13 +
  14 + virtual Pipeline* getDecodePipeline(Pipeline* next) override
  15 + {
  16 + this->pipeline = std::make_shared<Pl_DCT>("DCT decode", next);
  17 + return this->pipeline.get();
  18 + }
  19 +
  20 + static std::shared_ptr<QPDFStreamFilter> factory()
  21 + {
  22 + return std::make_shared<SF_DCTDecode>();
  23 + }
  24 +
  25 + virtual bool isSpecializedCompression() override
  26 + {
  27 + return true;
  28 + }
  29 +
  30 + virtual bool isLossyCompression() override
  31 + {
  32 + return true;
  33 + }
  34 +
  35 + private:
  36 + std::shared_ptr<Pipeline> pipeline;
  37 +};
  38 +
  39 +#endif // SF_DCTDECODE_HH
... ...
libqpdf/qpdf/SF_FlateLzwDecode.hh 0 โ†’ 100644
  1 +#include <qpdf/QPDFStreamFilter.hh>
  2 +#include <memory>
  3 +#include <vector>
  4 +
  5 +#ifndef SF_FLATELZWDECODE_HH
  6 +#define SF_FLATELZWDECODE_HH
  7 +
  8 +class SF_FlateLzwDecode: public QPDFStreamFilter
  9 +{
  10 + public:
  11 + SF_FlateLzwDecode(bool lzw);
  12 + virtual ~SF_FlateLzwDecode() = default;
  13 +
  14 + virtual bool setDecodeParms(QPDFObjectHandle decode_parms);
  15 + virtual Pipeline* getDecodePipeline(Pipeline* next);
  16 +
  17 + static std::shared_ptr<QPDFStreamFilter> flate_factory();
  18 + static std::shared_ptr<QPDFStreamFilter> lzw_factory();
  19 +
  20 + private:
  21 + bool lzw;
  22 + int predictor;
  23 + int columns;
  24 + int colors;
  25 + int bits_per_component;
  26 + bool early_code_change;
  27 + std::vector<std::shared_ptr<Pipeline>> pipelines;
  28 +};
  29 +
  30 +#endif // SF_FLATELZWDECODE_HH
... ...
libqpdf/qpdf/SF_RunLengthDecode.hh 0 โ†’ 100644
  1 +#include <qpdf/QPDFStreamFilter.hh>
  2 +#include <qpdf/Pl_RunLength.hh>
  3 +#include <memory>
  4 +
  5 +#ifndef SF_RUNLENGTHDECODE_HH
  6 +#define SF_RUNLENGTHDECODE_HH
  7 +
  8 +class SF_RunLengthDecode: public QPDFStreamFilter
  9 +{
  10 + public:
  11 + SF_RunLengthDecode() = default;
  12 + virtual ~SF_RunLengthDecode() = default;
  13 +
  14 + virtual Pipeline* getDecodePipeline(Pipeline* next) override
  15 + {
  16 + this->pipeline = std::make_shared<Pl_RunLength>(
  17 + "runlength decode", next, Pl_RunLength::a_decode);
  18 + return this->pipeline.get();
  19 + }
  20 +
  21 + static std::shared_ptr<QPDFStreamFilter> factory()
  22 + {
  23 + return std::make_shared<SF_RunLengthDecode>();
  24 + }
  25 +
  26 + virtual bool isSpecializedCompression() override
  27 + {
  28 + return true;
  29 + }
  30 +
  31 + private:
  32 + std::shared_ptr<Pipeline> pipeline;
  33 +};
  34 +
  35 +#endif // SF_RUNLENGTHDECODE_HH
... ...
qpdf/qpdf.testcov
... ... @@ -86,7 +86,7 @@ QPDF prev key in trailer dictionary 0
86 86 QPDF found xref stream 0
87 87 QPDF ignoring XRefStm in trailer 0
88 88 QPDF xref deleted object 0
89   -QPDF_Stream PNG filter 0
  89 +SF_FlateLzwDecode PNG filter 0
90 90 QPDF xref /Index is null 0
91 91 QPDF xref /Index is array 1
92 92 QPDFWriter copy Extends 0
... ... @@ -294,7 +294,7 @@ qpdf-c called qpdf_set_decode_level 0
294 294 qpdf-c called qpdf_set_compress_streams 0
295 295 qpdf-c called qpdf_set_preserve_unreferenced_objects 0
296 296 qpdf-c called qpdf_set_newline_before_endstream 0
297   -QPDF_Stream TIFF predictor 0
  297 +SF_FlateLzwDecode TIFF predictor 0
298 298 QPDFTokenizer inline image at EOF 0
299 299 Pl_QPDFTokenizer found ID 0
300 300 QPDFObjectHandle non-stream in stream array 0
... ...