Commit 30709935af023dd66a17f2d494aa7dc84b7177e1

Authored by Jay Berkenbilt
1 parent 99101044

Filter tokens example

ChangeLog
@@ -150,6 +150,9 @@ @@ -150,6 +150,9 @@
150 QPDFObjectHandle::pipeStreamData, you don't need to worry about 150 QPDFObjectHandle::pipeStreamData, you don't need to worry about
151 this at all. 151 this at all.
152 152
  153 + * Provide heavily annoated examples/pdf-filter-tokens.cc example
  154 + that illustrates use of some simple token filters.
  155 +
153 2018-02-04 Jay Berkenbilt <ejb@ql.org> 156 2018-02-04 Jay Berkenbilt <ejb@ql.org>
154 157
155 * Add QPDFWriter::setLinearizationPass1Filename method and 158 * Add QPDFWriter::setLinearizationPass1Filename method and
examples/build.mk
@@ -6,7 +6,8 @@ BINS_examples = \ @@ -6,7 +6,8 @@ BINS_examples = \
6 pdf-invert-images \ 6 pdf-invert-images \
7 pdf-create \ 7 pdf-create \
8 pdf-parse-content \ 8 pdf-parse-content \
9 - pdf-split-pages 9 + pdf-split-pages \
  10 + pdf-filter-tokens
10 CBINS_examples = pdf-linearize 11 CBINS_examples = pdf-linearize
11 12
12 TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B))) 13 TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B)))
examples/pdf-filter-tokens.cc 0 → 100644
  1 +//
  2 +// This example illustrates the use of QPDFObjectHandle::TokenFilter.
  3 +// Please see comments inline for details.
  4 +//
  5 +
  6 +#include <iostream>
  7 +#include <string.h>
  8 +#include <stdlib.h>
  9 +#include <algorithm>
  10 +#include <deque>
  11 +
  12 +#include <qpdf/QPDF.hh>
  13 +#include <qpdf/QUtil.hh>
  14 +#include <qpdf/QPDFWriter.hh>
  15 +#include <qpdf/QPDFObjectHandle.hh>
  16 +
  17 +static char const* whoami = 0;
  18 +
  19 +void usage()
  20 +{
  21 + std::cerr << "Usage: " << whoami << " infile outfile" << std::endl
  22 + << "Applies token filters to infile and writes outfile"
  23 + << std::endl;
  24 + exit(2);
  25 +}
  26 +
  27 +// The StringReverser class is a trivial example of using a token
  28 +// filter. This class only overrides the pure virtual handleToken
  29 +// function and preserves the default handleEOF function.
  30 +class StringReverser: public QPDFObjectHandle::TokenFilter
  31 +{
  32 + public:
  33 + virtual ~StringReverser()
  34 + {
  35 + }
  36 + virtual void handleToken(QPDFTokenizer::Token const&);
  37 +};
  38 +
  39 +void
  40 +StringReverser::handleToken(QPDFTokenizer::Token const& token)
  41 +{
  42 + // For string tokens, reverse the characters. For other tokens,
  43 + // just pass them through. Notice that we construct a new string
  44 + // token and write that, thus allowing the library to handle any
  45 + // subtleties about properly encoding unprintable characters. This
  46 + // function doesn't handle multibyte characters at all. It's not
  47 + // intended to be an example of the correct way to reverse
  48 + // strings. It's just intended to give a simple example of a
  49 + // pretty minimal filter and to show an example of writing a
  50 + // constructed token.
  51 + if (token.getType() == QPDFTokenizer::tt_string)
  52 + {
  53 + std::string value = token.getValue();
  54 + std::reverse(value.begin(), value.end());
  55 + writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, value));
  56 + }
  57 + else
  58 + {
  59 + writeToken(token);
  60 + }
  61 +}
  62 +
  63 +// The ColorToGray filter finds all "rg" operators in the content
  64 +// stream and replaces them with "g" operators, thus mapping color to
  65 +// grayscale. Note that it only applies to content streams, not
  66 +// images, so this will not replace color images with grayscale
  67 +// images.
  68 +class ColorToGray: public QPDFObjectHandle::TokenFilter
  69 +{
  70 + public:
  71 + virtual ~ColorToGray()
  72 + {
  73 + }
  74 + virtual void handleToken(QPDFTokenizer::Token const&);
  75 + virtual void handleEOF();
  76 +
  77 + private:
  78 + bool isNumeric(QPDFTokenizer::token_type_e);
  79 + bool isIgnorable(QPDFTokenizer::token_type_e);
  80 + double numericValue(QPDFTokenizer::Token const&);
  81 +
  82 + std::deque<QPDFTokenizer::Token> all_stack;
  83 + std::deque<QPDFTokenizer::Token> stack;
  84 +};
  85 +
  86 +bool
  87 +ColorToGray::isNumeric(QPDFTokenizer::token_type_e token_type)
  88 +{
  89 + return ((token_type == QPDFTokenizer::tt_integer) ||
  90 + (token_type == QPDFTokenizer::tt_real));
  91 +}
  92 +
  93 +bool
  94 +ColorToGray::isIgnorable(QPDFTokenizer::token_type_e token_type)
  95 +{
  96 + return ((token_type == QPDFTokenizer::tt_space) ||
  97 + (token_type == QPDFTokenizer::tt_comment));
  98 +}
  99 +
  100 +double
  101 +ColorToGray::numericValue(QPDFTokenizer::Token const& token)
  102 +{
  103 + return QPDFObjectHandle::parse(token.getValue()).getNumericValue();
  104 +}
  105 +
  106 +void
  107 +ColorToGray::handleToken(QPDFTokenizer::Token const& token)
  108 +{
  109 + // Track the number of non-ignorable tokens we've seen. If we see
  110 + // an "rg" following three numbers, convert it to a grayscale
  111 + // value. Keep writing tokens to the output as we can.
  112 +
  113 + // There are several things to notice here. We keep two stacks:
  114 + // one of "meaningful" tokens, and one of all tokens. This way we
  115 + // can preserve whitespace or comments that we encounter in the
  116 + // stream and there preserve layout. As we receive tokens, we keep
  117 + // the last four meaningful tokens. If we see three numbers
  118 + // followed by rg, we use the three numbers to calculate a gray
  119 + // value that is perceptually similar to the color value and then
  120 + // write the "g" operator to the output, discarding any spaces or
  121 + // comments encountered embedded in the "rg" operator.
  122 +
  123 + // The stack and all_stack members are updated in such a way that
  124 + // they always contain exactly the same non-ignorable tokens. The
  125 + // stack member contains the tokens that would be left if you
  126 + // removed all space and comment tokens from all_stack.
  127 +
  128 + // On each new token, flush out any space or comment tokens. Store
  129 + // the incoming token. If we just got an rg preceded by the right
  130 + // kinds of operands, replace the command. Flush any additional
  131 + // accumulated tokens to keep the stack only four tokens deep.
  132 +
  133 + while ((! this->all_stack.empty()) &&
  134 + isIgnorable(this->all_stack.at(0).getType()))
  135 + {
  136 + writeToken(this->all_stack.at(0));
  137 + this->all_stack.pop_front();
  138 + }
  139 + this->all_stack.push_back(token);
  140 + QPDFTokenizer::token_type_e token_type = token.getType();
  141 + if (! isIgnorable(token_type))
  142 + {
  143 + this->stack.push_back(token);
  144 + if ((this->stack.size() == 4) &&
  145 + (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "rg")) &&
  146 + (isNumeric(this->stack.at(0).getType())) &&
  147 + (isNumeric(this->stack.at(1).getType())) &&
  148 + (isNumeric(this->stack.at(2).getType())))
  149 + {
  150 + double r = numericValue(this->stack.at(0));
  151 + double g = numericValue(this->stack.at(1));
  152 + double b = numericValue(this->stack.at(2));
  153 + double gray = ((0.3 * r) + (0.59 * b) + (0.11 * g));
  154 + if (gray > 1.0)
  155 + {
  156 + gray = 1.0;
  157 + }
  158 + if (gray < 0.0)
  159 + {
  160 + gray = 0.0;
  161 + }
  162 + write(QUtil::double_to_string(gray, 3));
  163 + write(" g");
  164 + this->stack.clear();
  165 + this->all_stack.clear();
  166 + }
  167 + }
  168 + if (this->stack.size() == 4)
  169 + {
  170 + writeToken(this->all_stack.at(0));
  171 + this->all_stack.pop_front();
  172 + this->stack.pop_front();
  173 + }
  174 +}
  175 +
  176 +void
  177 +ColorToGray::handleEOF()
  178 +{
  179 + // Flush out any remaining accumulated tokens.
  180 + while (! this->all_stack.empty())
  181 + {
  182 + writeToken(this->all_stack.at(0));
  183 + this->all_stack.pop_front();
  184 + }
  185 + // Remember to call finish(). If you override handleEOF, it is
  186 + // essential that you call finish() or else you are likely to lose
  187 + // some data in buffers of downstream pipelines that are not
  188 + // flushed out. This is also mentioned in comments in
  189 + // QPDFObjectHandle.hh.
  190 + finish();
  191 +}
  192 +
  193 +int main(int argc, char* argv[])
  194 +{
  195 + whoami = QUtil::getWhoami(argv[0]);
  196 +
  197 + // For libtool's sake....
  198 + if (strncmp(whoami, "lt-", 3) == 0)
  199 + {
  200 + whoami += 3;
  201 + }
  202 +
  203 + if (argc != 3)
  204 + {
  205 + usage();
  206 + }
  207 + char const* infilename = argv[1];
  208 + char const* outfilename = argv[2];
  209 +
  210 + try
  211 + {
  212 + QPDF pdf;
  213 + pdf.processFile(infilename);
  214 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
  215 + for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
  216 + iter != pages.end(); ++iter)
  217 + {
  218 + // Attach two token filters to each page of this file.
  219 + // When the file is written, or when the pages' contents
  220 + // are retrieved in any other way, the filters will be
  221 + // applied. See comments on the filters for additional
  222 + // details.
  223 + QPDFObjectHandle page = *iter;
  224 + page.addContentTokenFilter(new StringReverser);
  225 + page.addContentTokenFilter(new ColorToGray);
  226 + }
  227 +
  228 + QPDFWriter w(pdf, outfilename);
  229 + w.setStaticID(true); // for testing only
  230 + w.write();
  231 + }
  232 + catch (std::exception& e)
  233 + {
  234 + std::cerr << whoami << ": " << e.what() << std::endl;
  235 + exit(2);
  236 + }
  237 +
  238 + return 0;
  239 +}
examples/qtest/filter-tokens.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +BEGIN { $^W = 1; }
  4 +use strict;
  5 +
  6 +chdir("filter-tokens");
  7 +
  8 +require TestDriver;
  9 +
  10 +my $td = new TestDriver('pdf-filter-tokens');
  11 +
  12 +$td->runtest("filter tokens",
  13 + {$td->COMMAND => "pdf-filter-tokens in.pdf a.pdf"},
  14 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  15 +
  16 +$td->runtest("check output",
  17 + {$td->FILE => "a.pdf"},
  18 + {$td->FILE => "out.pdf"});
  19 +
  20 +$td->report(2);
examples/qtest/filter-tokens/a.pdf 0 → 100644
No preview for this file type
examples/qtest/filter-tokens/in.pdf 0 → 100644
No preview for this file type
examples/qtest/filter-tokens/out.pdf 0 → 100644
No preview for this file type