Commit d4d7630cf544dc295202382026658b55bf49f76b

Authored by Jay Berkenbilt
1 parent ac042d16

Add pdf-custom-filter example

ChangeLog
... ... @@ -27,7 +27,8 @@
27 27 provide code to validate and interpret /DecodeParms for a specific
28 28 /Filter and also to provide a pipeline that will decode. Note that
29 29 it is possible to encode to a filter type that is not supported
30   - even without this feature.
  30 + even without this feature. See examples/pdf-custom-filter.cc for
  31 + an example of using custom stream filters.
31 32  
32 33 2020-12-22 Jay Berkenbilt <ejb@ql.org>
33 34  
... ...
... ... @@ -589,6 +589,8 @@ I find it useful to make reference to them in this list
589 589 a stream data provider is especially expensive, it can implement
590 590 its own cache.
591 591  
592   - The implementation of pluggable stream filters includes an example
593   - that illustrates how a program might handle making decisions about
594   - filters and decode parameters based on the input data.
  592 + The example examples/pdf-custom-filter.cc demonstrates the use of
  593 + custom stream filters. This includes a custom pipeline, a custom
  594 + stream filter, as well as modification of a stream's dictionary to
  595 + include creation of a new stream that is referenced from
  596 + /DecodeParms.
... ...
examples/build.mk
1 1 BINS_examples = \
2 2 pdf-bookmarks \
3   - pdf-mod-info \
4   - pdf-npages \
  3 + pdf-count-strings \
  4 + pdf-create \
  5 + pdf-custom-filter \
5 6 pdf-double-page-size \
  7 + pdf-filter-tokens \
6 8 pdf-invert-images \
7   - pdf-create \
  9 + pdf-mod-info \
  10 + pdf-npages \
  11 + pdf-overlay-page \
8 12 pdf-parse-content \
9   - pdf-split-pages \
10   - pdf-filter-tokens \
11   - pdf-count-strings \
12 13 pdf-set-form-values \
13   - pdf-overlay-page
  14 + pdf-split-pages
14 15 CBINS_examples = \
15 16 pdf-c-objects \
16 17 pdf-linearize
... ...
examples/pdf-custom-filter.cc 0 → 100644
  1 +#include <qpdf/QPDF.hh>
  2 +#include <qpdf/QUtil.hh>
  3 +#include <qpdf/QPDFWriter.hh>
  4 +#include <qpdf/QPDFStreamFilter.hh>
  5 +
  6 +#include <cstring>
  7 +#include <exception>
  8 +#include <iostream>
  9 +#include <memory>
  10 +
  11 +// This example shows you everything you need to know to implement a
  12 +// custom stream filter for encoding and decoding as well as a stream
  13 +// data provider that modifies the stream's dictionary. This example
  14 +// uses the pattern of having the stream data provider class use a
  15 +// second QPDF instance with copies of streams from the original QPDF
  16 +// so that the stream data provider can access the original stream
  17 +// data. This is implement very efficiently inside the qpdf library as
  18 +// the second QPDF instance knows how to read the stream data from the
  19 +// original input file, so no extra copies of the original stream data
  20 +// are made.
  21 +
  22 +// This example creates an imaginary filter called /XORDecode. There
  23 +// is no such filter in PDF, so the streams created by the example
  24 +// would not be usable by any PDF reader. However, the techniques here
  25 +// would work if you were going to implement support for a filter that
  26 +// qpdf does not support natively. For example, using the techinques
  27 +// shown here, it would be possible to create an application that
  28 +// downsampled or re-encoded images or that re-compressed streams
  29 +// using a more efficient "deflate" implementation than zlib.
  30 +
  31 +// Comments appear throughout the code describing each piece of code
  32 +// and its purpose. You can read the file top to bottom, or you can
  33 +// start with main() and follow the flow.
  34 +
  35 +// Please also see the test suite, qtest/custom-filter.test, which
  36 +// contains additional comments describing how to observe the results
  37 +// of running this example on test files that are specifically crafted
  38 +// for it.
  39 +
  40 +static char const* whoami = 0;
  41 +
  42 +
  43 +class Pl_XOR: public Pipeline
  44 +{
  45 + // This class implements a Pipeline for the made-up XOR decoder.
  46 + // It is initialized with a single-byte "key" and just XORs each
  47 + // byte with that key. This makes it reversible, so there is no
  48 + // distinction between encoding and decoding.
  49 +
  50 + public:
  51 + Pl_XOR(char const* identifier, Pipeline* next, unsigned char key);
  52 + virtual ~Pl_XOR() = default;
  53 + virtual void write(unsigned char* data, size_t len) override;
  54 + virtual void finish() override;
  55 +
  56 + private:
  57 + unsigned char key;
  58 +};
  59 +
  60 +Pl_XOR::Pl_XOR(char const* identifier, Pipeline* next, unsigned char key) :
  61 + Pipeline(identifier, next),
  62 + key(key)
  63 +{
  64 +}
  65 +
  66 +void
  67 +Pl_XOR::write(unsigned char* data, size_t len)
  68 +{
  69 + for (size_t i = 0; i < len; ++i)
  70 + {
  71 + unsigned char p = data[i] ^ this->key;
  72 + getNext()->write(&p, 1);
  73 + }
  74 +}
  75 +
  76 +void
  77 +Pl_XOR::finish()
  78 +{
  79 + getNext()->finish();
  80 +}
  81 +
  82 +class SF_XORDecode: public QPDFStreamFilter
  83 +{
  84 + // This class implements a QPDFStreamFilter that knows how to
  85 + // validate and interpret decode parameters (/DecodeParms) for the
  86 + // made-up /XORDecode stream filter. Since this is not a real
  87 + // stream filter, no actual PDF reader would know how to interpret
  88 + // it. This is just to illlustrate how to create a stream filter.
  89 + // In main(), we call QPDF::registerStreamFilter to tell the
  90 + // library about the filter. See comments in QPDFStreamFilter.hh
  91 + // for details on how to implement the methods. For purposes of
  92 + // example, we are calling this a "specialized" compression
  93 + // filter, which just means QPDF assumes that it should not
  94 + // "uncompress" the stream by default.
  95 + public:
  96 + virtual ~SF_XORDecode() = default;
  97 + virtual bool setDecodeParms(QPDFObjectHandle decode_parms) override;
  98 + virtual Pipeline* getDecodePipeline(Pipeline* next) override;
  99 + virtual bool isSpecializedCompression() override;
  100 +
  101 + private:
  102 + unsigned char key;
  103 + // It is the responsibility of the QPDFStreamFilter implementation
  104 + // to ensure that the pipeline returned by getDecodePipeline() is
  105 + // deleted when the class is deleted. The easiest way to do this
  106 + // is to stash the pipeline in a std::shared_ptr, which enables us
  107 + // to use the default destructor implementation.
  108 + std::shared_ptr<Pl_XOR> pipeline;
  109 +};
  110 +
  111 +bool
  112 +SF_XORDecode::setDecodeParms(QPDFObjectHandle decode_parms)
  113 +{
  114 + // For purposes of example, we store the key in a separate stream.
  115 + // We could just as well store the key directly in /DecodeParms,
  116 + // but this example uses a stream to illustrate how one might do
  117 + // that. For example, if implementing /JBIG2Decode, one would need
  118 + // to handle the /JBIG2Globals key, which points to a stream. See
  119 + // comments in SF_XORDecode::registerStream for additional notes
  120 + // on this.
  121 + try
  122 + {
  123 + // Expect /DecodeParms to be a dictionary with a /KeyStream
  124 + // key that points to a one-byte stream whose single byte is
  125 + // the key. If we are successful at retrieving the key, return
  126 + // true, indicating that we are able to process with the given
  127 + // decode parameters. Under any other circumstances, return
  128 + // false. For other examples of QPDFStreamFilter
  129 + // implementations, look at the classes whose names start with
  130 + // SF_ in the qpdf library implementation.
  131 + auto buf = decode_parms.getKey("/KeyStream").getStreamData();
  132 + if (buf->getSize() != 1)
  133 + {
  134 + return false;
  135 + }
  136 + this->key = buf->getBuffer()[0];
  137 + return true;
  138 + }
  139 + catch (std::exception& e)
  140 + {
  141 + std::cerr << "Error extracting key for /XORDecode: "
  142 + << e.what() << std::endl;
  143 + }
  144 + return false;
  145 +}
  146 +
  147 +Pipeline*
  148 +SF_XORDecode::getDecodePipeline(Pipeline* next)
  149 +{
  150 + // Return a pipeline that the qpdf library should pass the stream
  151 + // data through. The pipeline should receive encoded data and pass
  152 + // decoded data to "next". getDecodePipeline() can always count on
  153 + // setDecodeParms() having been called first. The setDecodeParms()
  154 + // method should store any parameters needed by the pipeline. To
  155 + // ensure that the pipeline we return disappears when the class
  156 + // disappears, stash it in a std::shared_ptr<Pl_XOR> and retrieve
  157 + // the raw pointer from there.
  158 + this->pipeline = std::make_shared<Pl_XOR>("xor", next, this->key);
  159 + return this->pipeline.get();
  160 +}
  161 +
  162 +bool
  163 +SF_XORDecode::isSpecializedCompression()
  164 +{
  165 + // The default implementation of QPDFStreamFilter would return
  166 + // false, so if you want a specialized or lossy compression
  167 + // filter, override one of the methods as described in
  168 + // QPDFStreamFilter.hh.
  169 + return true;
  170 +}
  171 +
  172 +class StreamReplacer: public QPDFObjectHandle::StreamDataProvider
  173 +{
  174 + // This class implements a StreamDataProvider that, under specific
  175 + // conditions, replaces the stream data with data encoded with the
  176 + // made-up /XORDecode filter.
  177 +
  178 + // The flow for this class is as follows:
  179 + //
  180 + // * The main application iterates through streams that should be
  181 + // replaced and calls registerStream. registerStream in turn
  182 + // calls maybeReplace passing nullptr to pipeline and the
  183 + // address of a valid QPDFObjectHandle to dict_updates. The
  184 + // stream passed in for this call is the stream for the original
  185 + // QPDF object. It has not yet been altered, so we have access
  186 + // to its original dictionary and data. As described in the
  187 + // method, the method when called in this way makes a
  188 + // determination as to whether the stream should be replaced. If
  189 + // so, registerStream makes whatever changes are required. We
  190 + // have to do this now because we can't modify the stream during
  191 + // the writing process.
  192 + //
  193 + // * provideStreamData(), which is called by QPDFWriter during the
  194 + // write process, actually writes the modified stream data. It
  195 + // calls maybeReplace again, but this time it passes a valid
  196 + // pipeline and passes nullptr to dict_updates. In this mode,
  197 + // the stream dictionary has already been altered, and the
  198 + // original stream data is no longer directly accessible. Trying
  199 + // to retrieve the stream data would be an infinite loop because
  200 + // it would just end up calling provideStreamData again. This is
  201 + // why maybeReplace uses a stashed copy of the original stream
  202 + // from the "other" QPDF object.
  203 +
  204 + // Additional explanation can be found in the method
  205 + // implementations.
  206 +
  207 + public:
  208 + StreamReplacer(QPDF* pdf);
  209 + virtual ~StreamReplacer() = default;
  210 + virtual void provideStreamData(int objid, int generation,
  211 + Pipeline* pipeline) override;
  212 +
  213 + void registerStream(
  214 + QPDFObjectHandle stream,
  215 + PointerHolder<QPDFObjectHandle::StreamDataProvider> self);
  216 +
  217 + private:
  218 + bool maybeReplace(QPDFObjGen const& og,
  219 + QPDFObjectHandle& stream, Pipeline* pipeline,
  220 + QPDFObjectHandle* dict_updates);
  221 +
  222 + // Hang onto a reference to the QPDF object containing the streams
  223 + // we are replacing. We need this to create a new stream.
  224 + QPDF* pdf;
  225 +
  226 + // This second QPDF instance gives us a place to copy streams to
  227 + // so that we can access the original stream data of the streams
  228 + // whose data we are replacing.
  229 + QPDF other;
  230 +
  231 + // Map the object/generation in original file to the copied stream
  232 + // in "other". We use this to retrieve the original data.
  233 + std::map<QPDFObjGen, QPDFObjectHandle> copied_streams;
  234 +
  235 + // Each stream gets is own "key" for the XOR filter. We use a
  236 + // single instance of StreamReplacer for all streams, so stash all
  237 + // the keys here.
  238 + std::map<QPDFObjGen, unsigned char> keys;
  239 +};
  240 +
  241 +StreamReplacer::StreamReplacer(QPDF* pdf) :
  242 + pdf(pdf)
  243 +{
  244 + // Our "other" QPDF is just a place to stash streams. It doesn't
  245 + // have to be a valid PDF with pages, etc. We are never going to
  246 + // write this out.
  247 + this->other.emptyPDF();
  248 +}
  249 +
  250 +bool
  251 +StreamReplacer::maybeReplace(QPDFObjGen const& og,
  252 + QPDFObjectHandle& stream,
  253 + Pipeline* pipeline,
  254 + QPDFObjectHandle* dict_updates)
  255 +{
  256 + // As described in the class comments, this method is called
  257 + // twice. Before writing has started pipeline is nullptr, and
  258 + // dict_updates is provided. In this mode, we figure out whether
  259 + // we should replace the stream and, if so, take care of the
  260 + // necessary setup. When we are actually ready to supply the data,
  261 + // this method is called again with pipeline populated and
  262 + // dict_updates as a nullptr. In this mode, we are not allowed to
  263 + // change anything, sincing writing is already in progress. We
  264 + // must simply provide the stream data.
  265 +
  266 + // The return value indicates whether or not we should replace the
  267 + // stream. If the first call returns false, there will be no
  268 + // second call. If the second call returns false, something went
  269 + // wrong since the method should always make the same decision for
  270 + // a given stream.
  271 +
  272 + // For this example, all the determination logic could have
  273 + // appeared inside the if (dict_updates) block rather than being
  274 + // duplicated, but in some cases, there may be a reason to
  275 + // duplicate things. For example, if you wanted to write code that
  276 + // re-encoded an image if the new encoding was more efficient,
  277 + // you'd have to actually try it out. Then you would either have
  278 + // to cache the result somewhere or just repeat the calculations,
  279 + // depending on space/time constraints, etc.
  280 +
  281 + // In our contrived example, we are replacing the data for all
  282 + // streams that have /DoXOR = true in the stream dictionary. If
  283 + // this were a more realistic application, our criteria would be
  284 + // more sensible. For example, an image downsampler might choose
  285 + // to replace a stream that represented an image with a high pixel
  286 + // density.
  287 + auto dict = stream.getDict();
  288 + auto mark = dict.getKey("/DoXOR");
  289 + if (! (mark.isBool() && mark.getBoolValue()))
  290 + {
  291 + return false;
  292 + }
  293 +
  294 + // We can't replace the stream data if we can't get the original
  295 + // stream data for any reason. A more realistic application may
  296 + // actually look at the data here as well, or it may be able to
  297 + // make all its decisions from the stream dictionary. However,
  298 + // it's a good idea to make sure we can retrieve the filtered data
  299 + // if we are going to need it later.
  300 + PointerHolder<Buffer> out;
  301 + try
  302 + {
  303 + out = stream.getStreamData();
  304 + }
  305 + catch (...)
  306 + {
  307 + return false;
  308 + }
  309 +
  310 + if (dict_updates)
  311 + {
  312 + // It's not safe to make any modifications to any objects
  313 + // during the writing process since the updated objects may
  314 + // have already been written. In this mode, when dict_updates
  315 + // is provided, we have not started writing. Store the
  316 + // modifications we intend to make to the stream dictionary
  317 + // here. We're just storing /OrigLength for purposes of
  318 + // example. Again, a realistic application would make other
  319 + // changes. For example, an image resampler might change the
  320 + // dimensions or other properties of the image.
  321 + dict_updates->replaceKey(
  322 + "/OrigLength", QPDFObjectHandle::newInteger(
  323 + QIntC::to_longlong(out->getSize())));
  324 + // We are also storing the "key" that we will access when
  325 + // writing the data.
  326 + this->keys[og] = QIntC::to_uchar(
  327 + (og.getObj() * QIntC::to_int(out->getSize())) & 0xff);
  328 + }
  329 +
  330 + if (pipeline)
  331 + {
  332 + unsigned char key = this->keys[og];
  333 + Pl_XOR p("xor", pipeline, key);
  334 + p.write(out->getBuffer(), out->getSize());
  335 + p.finish();
  336 + }
  337 + return true;
  338 +}
  339 +
  340 +void
  341 +StreamReplacer::registerStream(
  342 + QPDFObjectHandle stream,
  343 + PointerHolder<QPDFObjectHandle::StreamDataProvider> self)
  344 +{
  345 + QPDFObjGen og(stream.getObjGen());
  346 +
  347 + // We don't need to process a stream more than once. In this
  348 + // example, we are just iterating through objects, but if we were
  349 + // doing something like iterating through images on pages, we
  350 + // might realistically encounter the same stream more than once.
  351 + if (this->copied_streams.count(og) > 0)
  352 + {
  353 + return;
  354 + }
  355 + // Store something in copied_streams so that we don't
  356 + // double-process even in the negative case. This gets replaced
  357 + // later if needed.
  358 + this->copied_streams[og] = QPDFObjectHandle::newNull();
  359 +
  360 + // Call maybeReplace with dict_updates. In this mode, it
  361 + // determines whether we should replace the stream data and, if
  362 + // so, supplies dictionary updates we should make.
  363 + bool should_replace = false;
  364 + QPDFObjectHandle dict_updates = QPDFObjectHandle::newDictionary();
  365 + try
  366 + {
  367 + should_replace = maybeReplace(og, stream, nullptr, &dict_updates);
  368 + }
  369 + catch (std::exception& e)
  370 + {
  371 + stream.warnIfPossible(
  372 + std::string("exception while attempting to replace: ") +
  373 + e.what());
  374 + }
  375 +
  376 + if (should_replace)
  377 + {
  378 + // Copy the stream to another QPDF object so we can get to the
  379 + // original data from the stream data provider.
  380 + this->copied_streams[og] = this->other.copyForeignObject(stream);
  381 + // Update the stream dictionary with any changes.
  382 + auto dict = stream.getDict();
  383 + for (auto const& k: dict_updates.getKeys())
  384 + {
  385 + dict.replaceKey(k, dict_updates.getKey(k));
  386 + }
  387 + // Create the key stream that will be referenced from
  388 + // /DecodeParms. We have to do this now since you can't modify
  389 + // or create objects during write.
  390 + char p[1] = { static_cast<char>(this->keys[og]) };
  391 + std::string p_str(p, 1);
  392 + QPDFObjectHandle dp_stream =
  393 + QPDFObjectHandle::newStream(this->pdf, p_str);
  394 + // Create /DecodeParms as expected by our fictitious
  395 + // /XORDecode filter.
  396 + QPDFObjectHandle decode_parms =
  397 + QPDFObjectHandle::newDictionary({{"/KeyStream", dp_stream}});
  398 + stream.replaceStreamData(
  399 + self,
  400 + QPDFObjectHandle::newName("/XORDecode"),
  401 + decode_parms);
  402 + // Further, if /ProtectXOR = true, we disable filtering on write
  403 + // so that QPDFWriter will not decode the stream even though we
  404 + // have registered a stream filter for /XORDecode.
  405 + auto protect = dict.getKey("/ProtectXOR");
  406 + if (protect.isBool() && protect.getBoolValue())
  407 + {
  408 + stream.setFilterOnWrite(false);
  409 + }
  410 + }
  411 +}
  412 +
  413 +void
  414 +StreamReplacer::provideStreamData(int objid, int generation,
  415 + Pipeline* pipeline)
  416 +{
  417 + QPDFObjGen og(objid, generation);
  418 + QPDFObjectHandle orig = this->copied_streams[og];
  419 + // call maybeReplace again, this time with the pipeline and no
  420 + // dict_updates. In this mode, maybeReplace doesn't make any
  421 + // changes. We have to hand it the original stream data, which we
  422 + // get from copied_streams.
  423 + if (! maybeReplace(og, orig, pipeline, nullptr))
  424 + {
  425 + // Since this only gets called for streams we already
  426 + // determined we are replacing, a false return would indicate
  427 + // a logic error.
  428 + throw std::logic_error(
  429 + "should_replace return false in provideStreamData");
  430 + }
  431 +}
  432 +
  433 +static void process(char const* infilename, char const* outfilename,
  434 + bool decode_specialized)
  435 +{
  436 + QPDF qpdf;
  437 + qpdf.processFile(infilename);
  438 +
  439 + // Create a single StreamReplacer instance. The interface requires
  440 + // a PointerHolder in various places, so allocate a StreamReplacer
  441 + // and stash it in a PointerHolder.
  442 + StreamReplacer* replacer = new StreamReplacer(&qpdf);
  443 + PointerHolder<QPDFObjectHandle::StreamDataProvider> p(replacer);
  444 +
  445 + for (auto& o: qpdf.getAllObjects())
  446 + {
  447 + if (o.isStream())
  448 + {
  449 + // Call registerStream for every stream. Only ones that
  450 + // registerStream decides to replace will actually be
  451 + // replaced.
  452 + replacer->registerStream(o, p);
  453 + }
  454 + }
  455 +
  456 + QPDFWriter w(qpdf, outfilename);
  457 + if (decode_specialized)
  458 + {
  459 + w.setDecodeLevel(qpdf_dl_specialized);
  460 + }
  461 + // For the test suite, use static IDs.
  462 + w.setStaticID(true); // for testing only
  463 + w.write();
  464 + std::cout << whoami << ": new file written to " << outfilename
  465 + << std::endl;
  466 +}
  467 +
  468 +static void usage()
  469 +{
  470 + std::cerr
  471 + << "\n"
  472 + << "Usage: " << whoami << " [ --decode-specialized ] infile outfile\n"
  473 + << std::endl;
  474 + exit(2);
  475 +}
  476 +
  477 +int main(int argc, char* argv[])
  478 +{
  479 + whoami = QUtil::getWhoami(argv[0]);
  480 +
  481 + // For libtool's sake....
  482 + if (strncmp(whoami, "lt-", 3) == 0)
  483 + {
  484 + whoami += 3;
  485 + }
  486 +
  487 + char const* infilename = 0;
  488 + char const* outfilename = 0;
  489 + bool decode_specialized = false;
  490 + for (int i = 1; i < argc; ++i)
  491 + {
  492 + if (strcmp(argv[i], "--decode-specialized") == 0)
  493 + {
  494 + decode_specialized = true;
  495 + }
  496 + else if (! infilename)
  497 + {
  498 + infilename = argv[i];
  499 + }
  500 + else if (! outfilename)
  501 + {
  502 + outfilename = argv[i];
  503 + }
  504 + else
  505 + {
  506 + usage();
  507 + }
  508 + }
  509 + if (! (infilename && outfilename))
  510 + {
  511 + usage();
  512 + }
  513 +
  514 + try
  515 + {
  516 + // Register our fictitious filter. This enables QPDFWriter to
  517 + // decode our streams. This is not a real filter, so no real
  518 + // PDF reading application would be able to interpret it. This
  519 + // is just for illustrative purposes.
  520 + QPDF::registerStreamFilter(
  521 + "/XORDecode", []{ return std::make_shared<SF_XORDecode>(); });
  522 + // Do the actual processing.
  523 + process(infilename, outfilename, decode_specialized);
  524 + }
  525 + catch (std::exception &e)
  526 + {
  527 + std::cerr << whoami << ": exception: " << e.what() << std::endl;
  528 + exit(2);
  529 + }
  530 +
  531 + return 0;
  532 +}
... ...
examples/qtest/custom-filter.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +chdir("custom-filter") or die "chdir testdir failed: $!\n";
  7 +
  8 +require TestDriver;
  9 +
  10 +cleanup();
  11 +
  12 +my $td = new TestDriver('custom-filter');
  13 +
  14 +# The file input.pdf contains two streams, whose contents appear
  15 +# uncompressed with explanatory text. They are marked with the keys
  16 +# that pdf-custom-filter uses to decide 1) to re-encode using the
  17 +# fictitious /XORDecode filter, and 2) whether to protect the stream
  18 +# to prevent decoding using the custom filter even when decoding
  19 +# specialized filters is requested.
  20 +
  21 +$td->runtest("custom filter, decode generalized",
  22 + {$td->COMMAND => "pdf-custom-filter input.pdf a.pdf"},
  23 + {$td->STRING => "pdf-custom-filter: new file written to a.pdf\n",
  24 + $td->EXIT_STATUS => 0},
  25 + $td->NORMALIZE_NEWLINES);
  26 +$td->runtest("check output",
  27 + {$td->FILE => "a.pdf"},
  28 + {$td->FILE => "generalized.pdf"});
  29 +
  30 +$td->runtest("custom filter, decode specialized",
  31 + {$td->COMMAND =>
  32 + "pdf-custom-filter --decode-specialized input.pdf a.pdf"},
  33 + {$td->STRING => "pdf-custom-filter: new file written to a.pdf\n",
  34 + $td->EXIT_STATUS => 0},
  35 + $td->NORMALIZE_NEWLINES);
  36 +$td->runtest("check output",
  37 + {$td->FILE => "a.pdf"},
  38 + {$td->FILE => "specialized.pdf"});
  39 +
  40 +cleanup();
  41 +
  42 +$td->report(4);
  43 +
  44 +sub cleanup
  45 +{
  46 + unlink "a.pdf";
  47 +}
... ...
examples/qtest/custom-filter/generalized.pdf 0 → 100644
No preview for this file type
examples/qtest/custom-filter/input.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 2 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 2 0
  14 +2 0 obj
  15 +<<
  16 + /Count 1
  17 + /Kids [
  18 + 3 0 R
  19 + ]
  20 + /Type /Pages
  21 +>>
  22 +endobj
  23 +
  24 +%% Page 1
  25 +%% Original object ID: 3 0
  26 +3 0 obj
  27 +<<
  28 + /Contents 4 0 R
  29 + /MediaBox [
  30 + 0
  31 + 0
  32 + 612
  33 + 792
  34 + ]
  35 + /Parent 2 0 R
  36 + /Resources <<
  37 + /Font <<
  38 + /F1 6 0 R
  39 + >>
  40 + /ProcSet 7 0 R
  41 + >>
  42 + /Type /Page
  43 +>>
  44 +endobj
  45 +
  46 +%% Contents for page 1
  47 +%% Original object ID: 4 0
  48 +4 0 obj
  49 +<<
  50 + /Length 5 0 R
  51 +>>
  52 +stream
  53 +BT
  54 + /F1 24 Tf
  55 + 72 720 Td
  56 + (Potato) Tj
  57 +ET
  58 +endstream
  59 +endobj
  60 +
  61 +5 0 obj
  62 +44
  63 +endobj
  64 +
  65 +%% Original object ID: 6 0
  66 +6 0 obj
  67 +<<
  68 + /BaseFont /Helvetica
  69 + /Encoding /WinAnsiEncoding
  70 + /Name /F1
  71 + /Subtype /Type1
  72 + /Type /Font
  73 +>>
  74 +endobj
  75 +
  76 +%% Original object ID: 5 0
  77 +7 0 obj
  78 +[
  79 + /PDF
  80 + /Text
  81 +]
  82 +endobj
  83 +
  84 +8 0 obj
  85 +<<
  86 + /Length 9 0 R
  87 + /DoXOR true
  88 +>>
  89 +stream
  90 +
  91 +This stream has /DoXOR true. When processed with pdf-custom-filter
  92 +without the --decode-specialized option, the stream will appear in the
  93 +output encoded with the fictitious /XORDecode filter, and its
  94 +/DecodeParms will contain a reference to the key stream. When
  95 +processed with pdf-custom-filter with the --decode-specialized option,
  96 +it will appear in the output as a regular stream with /FlateDecode,
  97 +but the /OrigLength key will still have been added.
  98 +
  99 +endstream
  100 +endobj
  101 +
  102 +9 0 obj
  103 +455
  104 +endobj
  105 +
  106 +10 0 obj
  107 +<<
  108 + /Length 11 0 R
  109 + /DoXOR true
  110 + /ProtectXOR true
  111 +>>
  112 +stream
  113 +
  114 +This stream has /DoXOR true and /ProtectXOR true. When processed with
  115 +pdf-custom-filter with or without the --decode-specialized option, the
  116 +stream will appear in the output encoded with the fictitious
  117 +/XORDecode filter, and its /DecodeParms will contain a reference to
  118 +the key stream.
  119 +
  120 +endstream
  121 +endobj
  122 +
  123 +11 0 obj
  124 +288
  125 +endobj
  126 +
  127 +xref
  128 +0 12
  129 +0000000000 65535 f
  130 +0000000052 00000 n
  131 +0000000133 00000 n
  132 +0000000242 00000 n
  133 +0000000484 00000 n
  134 +0000000583 00000 n
  135 +0000000629 00000 n
  136 +0000000774 00000 n
  137 +0000000809 00000 n
  138 +0000001333 00000 n
  139 +0000001353 00000 n
  140 +0000001731 00000 n
  141 +trailer <<
  142 + /Root 1 0 R
  143 + /Size 12
  144 + /Example [ 8 0 R 10 0 R ]
  145 + /ID [<01f4bb169ae6e6b5f27505733e9abf42><01f4bb169ae6e6b5f27505733e9abf42>]
  146 +>>
  147 +startxref
  148 +1752
  149 +%%EOF
... ...
examples/qtest/custom-filter/specialized.pdf 0 → 100644
No preview for this file type
include/qpdf/QPDFObjectHandle.hh
... ... @@ -92,6 +92,15 @@ class QPDFObjectHandle
92 92 // writing linearized files, if the work done by your stream
93 93 // data provider is slow or computationally intensive, you
94 94 // might want to implement your own cache.
  95 + //
  96 + // * Once you have called replaceStreamData, the original
  97 + // stream data is no longer directly accessible from the
  98 + // stream, but this is easy to work around by copying the
  99 + // stream to a separate QPDF object. The qpdf library
  100 + // implements this very efficiently without actually making
  101 + // a copy of the stream data. You can find examples of this
  102 + // pattern in some of the examples, including
  103 + // pdf-custom-filter.cc and pdf-invert-images.cc.
95 104  
96 105 // Prior to qpdf 10.0.0, it was not possible to handle errors
97 106 // the way pipeStreamData does or to pass back success.
... ...