Commit 99101044429c3c91bd11bdd1b26e5b6c2ceb140b
1 parent
b8723e97
Implement TokenFilter and refactor Pl_QPDFTokenizer
Implement a TokenFilter class and refactor Pl_QPDFTokenizer to use a TokenFilter class called ContentNormalizer. Pl_QPDFTokenizer is now a general filter that passes data through a TokenFilter.
Showing
16 changed files
with
631 additions
and
115 deletions
ChangeLog
| @@ -107,6 +107,49 @@ | @@ -107,6 +107,49 @@ | ||
| 107 | applications that use page-level APIs in QPDFObjectHandle to be | 107 | applications that use page-level APIs in QPDFObjectHandle to be |
| 108 | more tolerant of certain types of damaged files. | 108 | more tolerant of certain types of damaged files. |
| 109 | 109 | ||
| 110 | + * Add QPDFObjectHandle::TokenFilter class and methods to use it to | ||
| 111 | + perform lexical filtering on content streams. You can call | ||
| 112 | + QPDFObjectHandle::addTokenFilter on stream object, or you can call | ||
| 113 | + the higher level QPDFObjectHandle::addContentTokenFilter on a page | ||
| 114 | + object to cause the stream's contents to passed through a token | ||
| 115 | + filter while being retrieved by QPDFWriter or any other consumer. | ||
| 116 | + For details on using TokenFilter, please see comments in | ||
| 117 | + QPDFObjectHandle.hh. | ||
| 118 | + | ||
| 119 | + * Enhance the string, type QPDFTokenizer::Token constructor to | ||
| 120 | + initialize a raw value in addition to a value. Tokens have a | ||
| 121 | + value, which is a canonical representation, and a raw value. For | ||
| 122 | + all tokens except strings and names, the raw value and the value | ||
| 123 | + are the same. For strings, the value excludes the outer delimiters | ||
| 124 | + and has non-printing characters normalized. For names, the value | ||
| 125 | + resolves non-printing characters. In order to better facilitate | ||
| 126 | + token filters that mostly preserve contents and to enable | ||
| 127 | + developers to be mostly unconcerned about the nuances of token | ||
| 128 | + values and raw values, creating string and name tokens now | ||
| 129 | + properly handles this subtlety of values and raw values. When | ||
| 130 | + constructing string tokens, take care to avoid passing in the | ||
| 131 | + outer delimiters. This has always been the case, but it is now | ||
| 132 | + clarified in comments in QPDFObjectHandle.hh::TokenFilter. This | ||
| 133 | + has no impact on any existing code unless there's some code | ||
| 134 | + somewhere that was relying on Token::getRawValue() returning an | ||
| 135 | + empty string for a manually constructed token. The token class's | ||
| 136 | + operator== method still only looks at type and value, not raw | ||
| 137 | + value. For example, string tokens for <41> and (A) would still be | ||
| 138 | + equal because both are representations of the string "A". | ||
| 139 | + | ||
| 140 | + * Add QPDFObjectHandle::isDataModified method. This method just | ||
| 141 | + returns true if addTokenFilter has been called on the stream. It | ||
| 142 | + enables a caller to determine whether it is safe to optimize away | ||
| 143 | + piping of stream data in cases where the input and output are | ||
| 144 | + expected to be the same. QPDFWriter uses this internally to skip | ||
| 145 | + the optimization of not re-compressing already compressed streams | ||
| 146 | + if addTokenFilter has been called. Most developers will not have | ||
| 147 | + to worry about this as it is used internally in the library in the | ||
| 148 | + places that need it. If you are manually retrieving stream data | ||
| 149 | + with QPDFObjectHandle::getStreamData or | ||
| 150 | + QPDFObjectHandle::pipeStreamData, you don't need to worry about | ||
| 151 | + this at all. | ||
| 152 | + | ||
| 110 | 2018-02-04 Jay Berkenbilt <ejb@ql.org> | 153 | 2018-02-04 Jay Berkenbilt <ejb@ql.org> |
| 111 | 154 | ||
| 112 | * Add QPDFWriter::setLinearizationPass1Filename method and | 155 | * Add QPDFWriter::setLinearizationPass1Filename method and |
include/qpdf/QPDFObjectHandle.hh
| @@ -35,6 +35,7 @@ | @@ -35,6 +35,7 @@ | ||
| 35 | #include <qpdf/PointerHolder.hh> | 35 | #include <qpdf/PointerHolder.hh> |
| 36 | #include <qpdf/Buffer.hh> | 36 | #include <qpdf/Buffer.hh> |
| 37 | #include <qpdf/InputSource.hh> | 37 | #include <qpdf/InputSource.hh> |
| 38 | +#include <qpdf/QPDFTokenizer.hh> | ||
| 38 | 39 | ||
| 39 | #include <qpdf/QPDFObject.hh> | 40 | #include <qpdf/QPDFObject.hh> |
| 40 | 41 | ||
| @@ -76,6 +77,66 @@ class QPDFObjectHandle | @@ -76,6 +77,66 @@ class QPDFObjectHandle | ||
| 76 | Pipeline* pipeline) = 0; | 77 | Pipeline* pipeline) = 0; |
| 77 | }; | 78 | }; |
| 78 | 79 | ||
| 80 | + // The TokenFilter class provides a way to filter content streams | ||
| 81 | + // in a lexically aware fashion. TokenFilters can be attached to | ||
| 82 | + // streams using the addTokenFilter or addContentTokenFilter | ||
| 83 | + // methods. The handleToken method is called for each token, | ||
| 84 | + // including the eof token, and then handleEOF is called at the | ||
| 85 | + // very end. Handlers may call write (or writeToken) to pass data | ||
| 86 | + // downstream. The finish() method must be called exactly one time | ||
| 87 | + // to ensure that any written data is flushed out. The default | ||
| 88 | + // handleEOF calls finish. If you override handleEOF, you must | ||
| 89 | + // ensure that finish() is called either there or in response to | ||
| 90 | + // whatever event causes you to terminate creation of output. | ||
| 91 | + // Failure to call finish() may result in some of the data you | ||
| 92 | + // have written being lost. You should not rely on a destructor | ||
| 93 | + // for calling finish() since the destructor call may occur later | ||
| 94 | + // than you expect. Please see examples/token-filters.cc for | ||
| 95 | + // examples of using TokenFilters. | ||
| 96 | + // | ||
| 97 | + // Please note that when you call token.getValue() on a token of | ||
| 98 | + // type tt_string, you get the string value without any | ||
| 99 | + // delimiters. token.getRawValue() will return something suitable | ||
| 100 | + // for being written to output, or calling writeToken with a | ||
| 101 | + // string token will also work. The correct way to construct a | ||
| 102 | + // string token that would write the literal value (str) is | ||
| 103 | + // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "str"). | ||
| 104 | + class TokenFilter | ||
| 105 | + { | ||
| 106 | + public: | ||
| 107 | + QPDF_DLL | ||
| 108 | + TokenFilter() | ||
| 109 | + { | ||
| 110 | + } | ||
| 111 | + QPDF_DLL | ||
| 112 | + virtual ~TokenFilter() | ||
| 113 | + { | ||
| 114 | + } | ||
| 115 | + virtual void handleToken(QPDFTokenizer::Token const&) = 0; | ||
| 116 | + virtual void handleEOF() | ||
| 117 | + { | ||
| 118 | + // If you override handleEOF, you must be sure to call | ||
| 119 | + // finish(). | ||
| 120 | + finish(); | ||
| 121 | + } | ||
| 122 | + | ||
| 123 | + // This is called internally by the qpdf library. | ||
| 124 | + void setPipeline(Pipeline*); | ||
| 125 | + | ||
| 126 | + protected: | ||
| 127 | + QPDF_DLL | ||
| 128 | + void write(char const* data, size_t len); | ||
| 129 | + QPDF_DLL | ||
| 130 | + void write(std::string const& str); | ||
| 131 | + QPDF_DLL | ||
| 132 | + void writeToken(QPDFTokenizer::Token const&); | ||
| 133 | + QPDF_DLL | ||
| 134 | + void finish(); | ||
| 135 | + | ||
| 136 | + private: | ||
| 137 | + Pipeline* pipeline; | ||
| 138 | + }; | ||
| 139 | + | ||
| 79 | // This class is used by parse to decrypt strings when reading an | 140 | // This class is used by parse to decrypt strings when reading an |
| 80 | // object that contains encrypted strings. | 141 | // object that contains encrypted strings. |
| 81 | class StringDecrypter | 142 | class StringDecrypter |
| @@ -223,6 +284,23 @@ class QPDFObjectHandle | @@ -223,6 +284,23 @@ class QPDFObjectHandle | ||
| 223 | static void parseContentStream(QPDFObjectHandle stream_or_array, | 284 | static void parseContentStream(QPDFObjectHandle stream_or_array, |
| 224 | ParserCallbacks* callbacks); | 285 | ParserCallbacks* callbacks); |
| 225 | 286 | ||
| 287 | + // Attach a token filter to a page's contents. If the page's | ||
| 288 | + // contents is an array of streams, it is automatically coalesced. | ||
| 289 | + // The token filter is applied to the page's contents as a single | ||
| 290 | + // stream. | ||
| 291 | + QPDF_DLL | ||
| 292 | + void addContentTokenFilter(PointerHolder<TokenFilter> token_filter); | ||
| 293 | + | ||
| 294 | + // As of qpdf 8, it is possible to add custom token filters to a | ||
| 295 | + // stream. The tokenized stream data is passed through the token | ||
| 296 | + // filter after all original filters but before content stream | ||
| 297 | + // normalization if requested. This is a low-level interface to | ||
| 298 | + // add it to a stream. You will usually want to call | ||
| 299 | + // addContentTokenFilter instead, which can be applied to a page | ||
| 300 | + // object, and which will automatically handle the case of pages | ||
| 301 | + // whose contents are split across multiple streams. | ||
| 302 | + void addTokenFilter(PointerHolder<TokenFilter> token_filter); | ||
| 303 | + | ||
| 226 | // Type-specific factories | 304 | // Type-specific factories |
| 227 | QPDF_DLL | 305 | QPDF_DLL |
| 228 | static QPDFObjectHandle newNull(); | 306 | static QPDFObjectHandle newNull(); |
| @@ -414,6 +492,13 @@ class QPDFObjectHandle | @@ -414,6 +492,13 @@ class QPDFObjectHandle | ||
| 414 | QPDF_DLL | 492 | QPDF_DLL |
| 415 | QPDFObjectHandle getDict(); | 493 | QPDFObjectHandle getDict(); |
| 416 | 494 | ||
| 495 | + // If addTokenFilter has been called for this stream, then the | ||
| 496 | + // original data should be considered to be modified. This means we | ||
| 497 | + // should avoid optimizations such as not filtering a stream that | ||
| 498 | + // is already compressed. | ||
| 499 | + QPDF_DLL | ||
| 500 | + bool isDataModified(); | ||
| 501 | + | ||
| 417 | // Returns filtered (uncompressed) stream data. Throws an | 502 | // Returns filtered (uncompressed) stream data. Throws an |
| 418 | // exception if the stream is filtered and we can't decode it. | 503 | // exception if the stream is filtered and we can't decode it. |
| 419 | QPDF_DLL | 504 | QPDF_DLL |
| @@ -608,7 +693,7 @@ class QPDFObjectHandle | @@ -608,7 +693,7 @@ class QPDFObjectHandle | ||
| 608 | // stream or an array of streams. If this page's content is an | 693 | // stream or an array of streams. If this page's content is an |
| 609 | // array, concatenate the streams into a single stream. This can | 694 | // array, concatenate the streams into a single stream. This can |
| 610 | // be useful when working with files that split content streams in | 695 | // be useful when working with files that split content streams in |
| 611 | - // arbitary spots, such as in the middle of a token, as that can | 696 | + // arbitrary spots, such as in the middle of a token, as that can |
| 612 | // confuse some software. You could also call this after calling | 697 | // confuse some software. You could also call this after calling |
| 613 | // addPageContents. | 698 | // addPageContents. |
| 614 | QPDF_DLL | 699 | QPDF_DLL |
include/qpdf/QPDFTokenizer.hh
| @@ -62,13 +62,8 @@ class QPDFTokenizer | @@ -62,13 +62,8 @@ class QPDFTokenizer | ||
| 62 | { | 62 | { |
| 63 | public: | 63 | public: |
| 64 | Token() : type(tt_bad) {} | 64 | Token() : type(tt_bad) {} |
| 65 | - | ||
| 66 | - Token(token_type_e type, std::string const& value) : | ||
| 67 | - type(type), | ||
| 68 | - value(value) | ||
| 69 | - { | ||
| 70 | - } | ||
| 71 | - | 65 | + QPDF_DLL |
| 66 | + Token(token_type_e type, std::string const& value); | ||
| 72 | Token(token_type_e type, std::string const& value, | 67 | Token(token_type_e type, std::string const& value, |
| 73 | std::string raw_value, std::string error_message) : | 68 | std::string raw_value, std::string error_message) : |
| 74 | type(type), | 69 | type(type), |
| @@ -93,7 +88,7 @@ class QPDFTokenizer | @@ -93,7 +88,7 @@ class QPDFTokenizer | ||
| 93 | { | 88 | { |
| 94 | return this->error_message; | 89 | return this->error_message; |
| 95 | } | 90 | } |
| 96 | - bool operator==(Token const& rhs) | 91 | + bool operator==(Token const& rhs) const |
| 97 | { | 92 | { |
| 98 | // Ignore fields other than type and value | 93 | // Ignore fields other than type and value |
| 99 | return ((this->type != tt_bad) && | 94 | return ((this->type != tt_bad) && |
libqpdf/ContentNormalizer.cc
0 → 100644
| 1 | +#include <qpdf/ContentNormalizer.hh> | ||
| 2 | +#include <qpdf/QUtil.hh> | ||
| 3 | + | ||
| 4 | +ContentNormalizer::ContentNormalizer() | ||
| 5 | +{ | ||
| 6 | +} | ||
| 7 | + | ||
| 8 | +ContentNormalizer::~ContentNormalizer() | ||
| 9 | +{ | ||
| 10 | +} | ||
| 11 | + | ||
| 12 | +void | ||
| 13 | +ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) | ||
| 14 | +{ | ||
| 15 | + std::string value = token.getRawValue(); | ||
| 16 | + QPDFTokenizer::token_type_e token_type = token.getType(); | ||
| 17 | + | ||
| 18 | + switch (token_type) | ||
| 19 | + { | ||
| 20 | + case QPDFTokenizer::tt_space: | ||
| 21 | + { | ||
| 22 | + size_t len = value.length(); | ||
| 23 | + for (size_t i = 0; i < len; ++i) | ||
| 24 | + { | ||
| 25 | + char ch = value.at(i); | ||
| 26 | + if (ch == '\r') | ||
| 27 | + { | ||
| 28 | + if ((i + 1 < len) && (value.at(i + 1) == '\n')) | ||
| 29 | + { | ||
| 30 | + // ignore | ||
| 31 | + } | ||
| 32 | + else | ||
| 33 | + { | ||
| 34 | + write("\n"); | ||
| 35 | + } | ||
| 36 | + } | ||
| 37 | + else | ||
| 38 | + { | ||
| 39 | + write(&ch, 1); | ||
| 40 | + } | ||
| 41 | + } | ||
| 42 | + } | ||
| 43 | + break; | ||
| 44 | + | ||
| 45 | + case QPDFTokenizer::tt_string: | ||
| 46 | + // Replacing string and name tokens in this way normalizes | ||
| 47 | + // their representation as this will automatically handle | ||
| 48 | + // quoting of unprintable characters, etc. | ||
| 49 | + writeToken(QPDFTokenizer::Token( | ||
| 50 | + QPDFTokenizer::tt_string, token.getValue())); | ||
| 51 | + break; | ||
| 52 | + | ||
| 53 | + case QPDFTokenizer::tt_name: | ||
| 54 | + writeToken(QPDFTokenizer::Token( | ||
| 55 | + QPDFTokenizer::tt_name, token.getValue())); | ||
| 56 | + break; | ||
| 57 | + | ||
| 58 | + default: | ||
| 59 | + writeToken(token); | ||
| 60 | + break; | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + value = token.getRawValue(); | ||
| 64 | + if (((token_type == QPDFTokenizer::tt_string) || | ||
| 65 | + (token_type == QPDFTokenizer::tt_name)) && | ||
| 66 | + ((value.find('\r') != std::string::npos) || | ||
| 67 | + (value.find('\n') != std::string::npos))) | ||
| 68 | + { | ||
| 69 | + write("\n"); | ||
| 70 | + } | ||
| 71 | +} | ||
| 72 | + | ||
| 73 | +void | ||
| 74 | +ContentNormalizer::handleEOF() | ||
| 75 | +{ | ||
| 76 | + finish(); | ||
| 77 | +} |
libqpdf/Pl_QPDFTokenizer.cc
| 1 | #include <qpdf/Pl_QPDFTokenizer.hh> | 1 | #include <qpdf/Pl_QPDFTokenizer.hh> |
| 2 | -#include <qpdf/QPDF_String.hh> | ||
| 3 | -#include <qpdf/QPDF_Name.hh> | ||
| 4 | #include <qpdf/QTC.hh> | 2 | #include <qpdf/QTC.hh> |
| 5 | -#include <qpdf/QUtil.hh> | ||
| 6 | #include <stdexcept> | 3 | #include <stdexcept> |
| 7 | #include <string.h> | 4 | #include <string.h> |
| 8 | 5 | ||
| 9 | -Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) : | ||
| 10 | - Pipeline(identifier, next), | ||
| 11 | - just_wrote_nl(false), | 6 | +Pl_QPDFTokenizer::Members::Members() : |
| 7 | + filter(0), | ||
| 12 | last_char_was_cr(false), | 8 | last_char_was_cr(false), |
| 13 | unread_char(false), | 9 | unread_char(false), |
| 14 | char_to_unread('\0') | 10 | char_to_unread('\0') |
| 15 | { | 11 | { |
| 16 | - tokenizer.allowEOF(); | ||
| 17 | - tokenizer.includeIgnorable(); | ||
| 18 | } | 12 | } |
| 19 | 13 | ||
| 20 | -Pl_QPDFTokenizer::~Pl_QPDFTokenizer() | 14 | +Pl_QPDFTokenizer::Members::~Members() |
| 21 | { | 15 | { |
| 22 | } | 16 | } |
| 23 | 17 | ||
| 24 | -void | ||
| 25 | -Pl_QPDFTokenizer::writeNext(char const* buf, size_t len) | 18 | +Pl_QPDFTokenizer::Pl_QPDFTokenizer( |
| 19 | + char const* identifier, | ||
| 20 | + QPDFObjectHandle::TokenFilter* filter) | ||
| 21 | + : | ||
| 22 | + Pipeline(identifier, 0), | ||
| 23 | + m(new Members) | ||
| 26 | { | 24 | { |
| 27 | - if (len) | ||
| 28 | - { | ||
| 29 | - getNext()->write(QUtil::unsigned_char_pointer(buf), len); | ||
| 30 | - this->just_wrote_nl = (buf[len-1] == '\n'); | ||
| 31 | - } | 25 | + m->filter = filter; |
| 26 | + m->tokenizer.allowEOF(); | ||
| 27 | + m->tokenizer.includeIgnorable(); | ||
| 32 | } | 28 | } |
| 33 | 29 | ||
| 34 | -void | ||
| 35 | -Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token) | 30 | +Pl_QPDFTokenizer::~Pl_QPDFTokenizer() |
| 36 | { | 31 | { |
| 37 | - std::string value = token.getRawValue(); | ||
| 38 | - | ||
| 39 | - switch (token.getType()) | ||
| 40 | - { | ||
| 41 | - case QPDFTokenizer::tt_space: | ||
| 42 | - { | ||
| 43 | - size_t len = value.length(); | ||
| 44 | - for (size_t i = 0; i < len; ++i) | ||
| 45 | - { | ||
| 46 | - char ch = value.at(i); | ||
| 47 | - if (ch == '\r') | ||
| 48 | - { | ||
| 49 | - if ((i + 1 < len) && (value.at(i + 1) == '\n')) | ||
| 50 | - { | ||
| 51 | - // ignore | ||
| 52 | - } | ||
| 53 | - else | ||
| 54 | - { | ||
| 55 | - writeNext("\n", 1); | ||
| 56 | - } | ||
| 57 | - } | ||
| 58 | - else | ||
| 59 | - { | ||
| 60 | - writeNext(&ch, 1); | ||
| 61 | - } | ||
| 62 | - } | ||
| 63 | - } | ||
| 64 | - value.clear(); | ||
| 65 | - break; | ||
| 66 | - | ||
| 67 | - case QPDFTokenizer::tt_string: | ||
| 68 | - value = QPDF_String(token.getValue()).unparse(); | ||
| 69 | - | ||
| 70 | - break; | ||
| 71 | - | ||
| 72 | - case QPDFTokenizer::tt_name: | ||
| 73 | - value = QPDF_Name(token.getValue()).unparse(); | ||
| 74 | - break; | ||
| 75 | - | ||
| 76 | - default: | ||
| 77 | - break; | ||
| 78 | - } | ||
| 79 | - writeNext(value.c_str(), value.length()); | ||
| 80 | } | 32 | } |
| 81 | 33 | ||
| 82 | void | 34 | void |
| 83 | Pl_QPDFTokenizer::processChar(char ch) | 35 | Pl_QPDFTokenizer::processChar(char ch) |
| 84 | { | 36 | { |
| 85 | - tokenizer.presentCharacter(ch); | 37 | + this->m->tokenizer.presentCharacter(ch); |
| 86 | QPDFTokenizer::Token token; | 38 | QPDFTokenizer::Token token; |
| 87 | - if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) | 39 | + if (this->m->tokenizer.getToken( |
| 40 | + token, this->m->unread_char, this->m->char_to_unread)) | ||
| 88 | { | 41 | { |
| 89 | - writeToken(token); | ||
| 90 | - std::string value = token.getRawValue(); | ||
| 91 | - QPDFTokenizer::token_type_e token_type = token.getType(); | ||
| 92 | - if (((token_type == QPDFTokenizer::tt_string) || | ||
| 93 | - (token_type == QPDFTokenizer::tt_name)) && | ||
| 94 | - ((value.find('\r') != std::string::npos) || | ||
| 95 | - (value.find('\n') != std::string::npos))) | 42 | + this->m->filter->handleToken(token); |
| 43 | + if ((token.getType() == QPDFTokenizer::tt_word) && | ||
| 44 | + (token.getValue() == "ID")) | ||
| 96 | { | 45 | { |
| 97 | - writeNext("\n", 1); | ||
| 98 | - } | ||
| 99 | - if ((token.getType() == QPDFTokenizer::tt_word) && | ||
| 100 | - (token.getValue() == "ID")) | ||
| 101 | - { | ||
| 102 | QTC::TC("qpdf", "Pl_QPDFTokenizer found ID"); | 46 | QTC::TC("qpdf", "Pl_QPDFTokenizer found ID"); |
| 103 | - tokenizer.expectInlineImage(); | ||
| 104 | - } | 47 | + this->m->tokenizer.expectInlineImage(); |
| 48 | + } | ||
| 105 | } | 49 | } |
| 106 | } | 50 | } |
| 107 | 51 | ||
| @@ -109,10 +53,10 @@ Pl_QPDFTokenizer::processChar(char ch) | @@ -109,10 +53,10 @@ Pl_QPDFTokenizer::processChar(char ch) | ||
| 109 | void | 53 | void |
| 110 | Pl_QPDFTokenizer::checkUnread() | 54 | Pl_QPDFTokenizer::checkUnread() |
| 111 | { | 55 | { |
| 112 | - if (this->unread_char) | 56 | + if (this->m->unread_char) |
| 113 | { | 57 | { |
| 114 | - processChar(this->char_to_unread); | ||
| 115 | - if (this->unread_char) | 58 | + processChar(this->m->char_to_unread); |
| 59 | + if (this->m->unread_char) | ||
| 116 | { | 60 | { |
| 117 | throw std::logic_error( | 61 | throw std::logic_error( |
| 118 | "INTERNAL ERROR: unread_char still true after processing " | 62 | "INTERNAL ERROR: unread_char still true after processing " |
| @@ -135,20 +79,13 @@ Pl_QPDFTokenizer::write(unsigned char* buf, size_t len) | @@ -135,20 +79,13 @@ Pl_QPDFTokenizer::write(unsigned char* buf, size_t len) | ||
| 135 | void | 79 | void |
| 136 | Pl_QPDFTokenizer::finish() | 80 | Pl_QPDFTokenizer::finish() |
| 137 | { | 81 | { |
| 138 | - this->tokenizer.presentEOF(); | 82 | + this->m->tokenizer.presentEOF(); |
| 139 | QPDFTokenizer::Token token; | 83 | QPDFTokenizer::Token token; |
| 140 | - if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) | 84 | + if (this->m->tokenizer.getToken( |
| 85 | + token, this->m->unread_char, this->m->char_to_unread)) | ||
| 141 | { | 86 | { |
| 142 | - writeToken(token); | ||
| 143 | - if (unread_char) | ||
| 144 | - { | ||
| 145 | - if (this->char_to_unread == '\r') | ||
| 146 | - { | ||
| 147 | - this->char_to_unread = '\n'; | ||
| 148 | - } | ||
| 149 | - writeNext(&this->char_to_unread, 1); | ||
| 150 | - } | 87 | + this->m->filter->handleToken(token); |
| 151 | } | 88 | } |
| 152 | 89 | ||
| 153 | - getNext()->finish(); | 90 | + this->m->filter->handleEOF(); |
| 154 | } | 91 | } |
libqpdf/QPDFObjectHandle.cc
| @@ -63,6 +63,50 @@ CoalesceProvider::provideStreamData(int, int, Pipeline* p) | @@ -63,6 +63,50 @@ CoalesceProvider::provideStreamData(int, int, Pipeline* p) | ||
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | void | 65 | void |
| 66 | +QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p) | ||
| 67 | +{ | ||
| 68 | + this->pipeline = p; | ||
| 69 | +} | ||
| 70 | + | ||
| 71 | +void | ||
| 72 | +QPDFObjectHandle::TokenFilter::write(char const* data, size_t len) | ||
| 73 | +{ | ||
| 74 | + if (! this->pipeline) | ||
| 75 | + { | ||
| 76 | + throw std::logic_error( | ||
| 77 | + "TokenFilter::write called before setPipeline"); | ||
| 78 | + } | ||
| 79 | + if (len) | ||
| 80 | + { | ||
| 81 | + this->pipeline->write(QUtil::unsigned_char_pointer(data), len); | ||
| 82 | + } | ||
| 83 | +} | ||
| 84 | + | ||
| 85 | +void | ||
| 86 | +QPDFObjectHandle::TokenFilter::write(std::string const& str) | ||
| 87 | +{ | ||
| 88 | + write(str.c_str(), str.length()); | ||
| 89 | +} | ||
| 90 | + | ||
| 91 | +void | ||
| 92 | +QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token) | ||
| 93 | +{ | ||
| 94 | + std::string value = token.getRawValue(); | ||
| 95 | + write(value.c_str(), value.length()); | ||
| 96 | +} | ||
| 97 | + | ||
| 98 | +void | ||
| 99 | +QPDFObjectHandle::TokenFilter::finish() | ||
| 100 | +{ | ||
| 101 | + if (! this->pipeline) | ||
| 102 | + { | ||
| 103 | + throw std::logic_error( | ||
| 104 | + "TokenFilter::finish called before setPipeline"); | ||
| 105 | + } | ||
| 106 | + this->pipeline->finish(); | ||
| 107 | +} | ||
| 108 | + | ||
| 109 | +void | ||
| 66 | QPDFObjectHandle::ParserCallbacks::terminateParsing() | 110 | QPDFObjectHandle::ParserCallbacks::terminateParsing() |
| 67 | { | 111 | { |
| 68 | throw TerminateParsing(); | 112 | throw TerminateParsing(); |
| @@ -508,6 +552,13 @@ QPDFObjectHandle::getDict() | @@ -508,6 +552,13 @@ QPDFObjectHandle::getDict() | ||
| 508 | return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict(); | 552 | return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict(); |
| 509 | } | 553 | } |
| 510 | 554 | ||
| 555 | +bool | ||
| 556 | +QPDFObjectHandle::isDataModified() | ||
| 557 | +{ | ||
| 558 | + assertStream(); | ||
| 559 | + return dynamic_cast<QPDF_Stream*>(obj.getPointer())->isDataModified(); | ||
| 560 | +} | ||
| 561 | + | ||
| 511 | void | 562 | void |
| 512 | QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict) | 563 | QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict) |
| 513 | { | 564 | { |
| @@ -1033,6 +1084,21 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1033,6 +1084,21 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1033 | } | 1084 | } |
| 1034 | } | 1085 | } |
| 1035 | 1086 | ||
| 1087 | +void | ||
| 1088 | +QPDFObjectHandle::addContentTokenFilter(PointerHolder<TokenFilter> filter) | ||
| 1089 | +{ | ||
| 1090 | + coalesceContentStreams(); | ||
| 1091 | + this->getKey("/Contents").addTokenFilter(filter); | ||
| 1092 | +} | ||
| 1093 | + | ||
| 1094 | +void | ||
| 1095 | +QPDFObjectHandle::addTokenFilter(PointerHolder<TokenFilter> filter) | ||
| 1096 | +{ | ||
| 1097 | + assertStream(); | ||
| 1098 | + return dynamic_cast<QPDF_Stream*>( | ||
| 1099 | + obj.getPointer())->addTokenFilter(filter); | ||
| 1100 | +} | ||
| 1101 | + | ||
| 1036 | QPDFObjectHandle | 1102 | QPDFObjectHandle |
| 1037 | QPDFObjectHandle::parse(PointerHolder<InputSource> input, | 1103 | QPDFObjectHandle::parse(PointerHolder<InputSource> input, |
| 1038 | std::string const& object_description, | 1104 | std::string const& object_description, |
libqpdf/QPDFTokenizer.cc
| @@ -7,6 +7,7 @@ | @@ -7,6 +7,7 @@ | ||
| 7 | #include <qpdf/QTC.hh> | 7 | #include <qpdf/QTC.hh> |
| 8 | #include <qpdf/QPDFExc.hh> | 8 | #include <qpdf/QPDFExc.hh> |
| 9 | #include <qpdf/QUtil.hh> | 9 | #include <qpdf/QUtil.hh> |
| 10 | +#include <qpdf/QPDFObjectHandle.hh> | ||
| 10 | 11 | ||
| 11 | #include <stdexcept> | 12 | #include <stdexcept> |
| 12 | #include <string.h> | 13 | #include <string.h> |
| @@ -39,6 +40,23 @@ QPDFTokenizer::Members::~Members() | @@ -39,6 +40,23 @@ QPDFTokenizer::Members::~Members() | ||
| 39 | { | 40 | { |
| 40 | } | 41 | } |
| 41 | 42 | ||
| 43 | +QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : | ||
| 44 | + type(type), | ||
| 45 | + value(value), | ||
| 46 | + raw_value(value) | ||
| 47 | +{ | ||
| 48 | + if (type == tt_string) | ||
| 49 | + { | ||
| 50 | + raw_value = QPDFObjectHandle::newString(value).unparse(); | ||
| 51 | + } | ||
| 52 | + else if (type == tt_string) | ||
| 53 | + { | ||
| 54 | + raw_value = QPDFObjectHandle::newName(value).unparse(); | ||
| 55 | + } | ||
| 56 | +} | ||
| 57 | + | ||
| 58 | + | ||
| 59 | + | ||
| 42 | QPDFTokenizer::QPDFTokenizer() : | 60 | QPDFTokenizer::QPDFTokenizer() : |
| 43 | m(new Members()) | 61 | m(new Members()) |
| 44 | { | 62 | { |
libqpdf/QPDFWriter.cc
| @@ -1591,7 +1591,8 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | @@ -1591,7 +1591,8 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | ||
| 1591 | { | 1591 | { |
| 1592 | is_metadata = true; | 1592 | is_metadata = true; |
| 1593 | } | 1593 | } |
| 1594 | - bool filter = (this->m->compress_streams || | 1594 | + bool filter = (object.isDataModified() || |
| 1595 | + this->m->compress_streams || | ||
| 1595 | this->m->stream_decode_level); | 1596 | this->m->stream_decode_level); |
| 1596 | if (this->m->compress_streams) | 1597 | if (this->m->compress_streams) |
| 1597 | { | 1598 | { |
| @@ -1602,7 +1603,8 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | @@ -1602,7 +1603,8 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, | ||
| 1602 | // compressed with a lossy compression scheme, but we | 1603 | // compressed with a lossy compression scheme, but we |
| 1603 | // don't support any of those right now. | 1604 | // don't support any of those right now. |
| 1604 | QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); | 1605 | QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter"); |
| 1605 | - if (filter_obj.isName() && | 1606 | + if ((! object.isDataModified()) && |
| 1607 | + filter_obj.isName() && | ||
| 1606 | ((filter_obj.getName() == "/FlateDecode") || | 1608 | ((filter_obj.getName() == "/FlateDecode") || |
| 1607 | (filter_obj.getName() == "/Fl"))) | 1609 | (filter_obj.getName() == "/Fl"))) |
| 1608 | { | 1610 | { |
libqpdf/QPDF_Stream.cc
| @@ -13,7 +13,7 @@ | @@ -13,7 +13,7 @@ | ||
| 13 | #include <qpdf/Pl_RunLength.hh> | 13 | #include <qpdf/Pl_RunLength.hh> |
| 14 | #include <qpdf/Pl_DCT.hh> | 14 | #include <qpdf/Pl_DCT.hh> |
| 15 | #include <qpdf/Pl_Count.hh> | 15 | #include <qpdf/Pl_Count.hh> |
| 16 | - | 16 | +#include <qpdf/ContentNormalizer.hh> |
| 17 | #include <qpdf/QTC.hh> | 17 | #include <qpdf/QTC.hh> |
| 18 | #include <qpdf/QPDF.hh> | 18 | #include <qpdf/QPDF.hh> |
| 19 | #include <qpdf/QPDFExc.hh> | 19 | #include <qpdf/QPDFExc.hh> |
| @@ -91,6 +91,12 @@ QPDF_Stream::getDict() const | @@ -91,6 +91,12 @@ QPDF_Stream::getDict() const | ||
| 91 | return this->stream_dict; | 91 | return this->stream_dict; |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | +bool | ||
| 95 | +QPDF_Stream::isDataModified() const | ||
| 96 | +{ | ||
| 97 | + return (! this->token_filters.empty()); | ||
| 98 | +} | ||
| 99 | + | ||
| 94 | PointerHolder<Buffer> | 100 | PointerHolder<Buffer> |
| 95 | QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level) | 101 | QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level) |
| 96 | { | 102 | { |
| @@ -440,21 +446,36 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, | @@ -440,21 +446,36 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, | ||
| 440 | // create to be deleted when this function finishes. | 446 | // create to be deleted when this function finishes. |
| 441 | std::vector<PointerHolder<Pipeline> > to_delete; | 447 | std::vector<PointerHolder<Pipeline> > to_delete; |
| 442 | 448 | ||
| 449 | + PointerHolder<ContentNormalizer> normalizer; | ||
| 443 | if (filter) | 450 | if (filter) |
| 444 | { | 451 | { |
| 445 | if (encode_flags & qpdf_ef_compress) | 452 | if (encode_flags & qpdf_ef_compress) |
| 446 | { | 453 | { |
| 447 | - pipeline = new Pl_Flate("compress object stream", pipeline, | 454 | + pipeline = new Pl_Flate("compress stream", pipeline, |
| 448 | Pl_Flate::a_deflate); | 455 | Pl_Flate::a_deflate); |
| 449 | to_delete.push_back(pipeline); | 456 | to_delete.push_back(pipeline); |
| 450 | } | 457 | } |
| 451 | 458 | ||
| 452 | if (encode_flags & qpdf_ef_normalize) | 459 | if (encode_flags & qpdf_ef_normalize) |
| 453 | { | 460 | { |
| 454 | - pipeline = new Pl_QPDFTokenizer("normalizer", pipeline); | 461 | + normalizer = new ContentNormalizer(); |
| 462 | + normalizer->setPipeline(pipeline); | ||
| 463 | + pipeline = new Pl_QPDFTokenizer( | ||
| 464 | + "normalizer", normalizer.getPointer()); | ||
| 455 | to_delete.push_back(pipeline); | 465 | to_delete.push_back(pipeline); |
| 456 | } | 466 | } |
| 457 | 467 | ||
| 468 | + for (std::vector<PointerHolder< | ||
| 469 | + QPDFObjectHandle::TokenFilter> >::reverse_iterator iter = | ||
| 470 | + this->token_filters.rbegin(); | ||
| 471 | + iter != this->token_filters.rend(); ++iter) | ||
| 472 | + { | ||
| 473 | + (*iter)->setPipeline(pipeline); | ||
| 474 | + pipeline = new Pl_QPDFTokenizer( | ||
| 475 | + "token filter", (*iter).getPointer()); | ||
| 476 | + to_delete.push_back(pipeline); | ||
| 477 | + } | ||
| 478 | + | ||
| 458 | for (std::vector<std::string>::reverse_iterator iter = filters.rbegin(); | 479 | for (std::vector<std::string>::reverse_iterator iter = filters.rbegin(); |
| 459 | iter != filters.rend(); ++iter) | 480 | iter != filters.rend(); ++iter) |
| 460 | { | 481 | { |
| @@ -613,6 +634,13 @@ QPDF_Stream::replaceStreamData( | @@ -613,6 +634,13 @@ QPDF_Stream::replaceStreamData( | ||
| 613 | } | 634 | } |
| 614 | 635 | ||
| 615 | void | 636 | void |
| 637 | +QPDF_Stream::addTokenFilter( | ||
| 638 | + PointerHolder<QPDFObjectHandle::TokenFilter> token_filter) | ||
| 639 | +{ | ||
| 640 | + this->token_filters.push_back(token_filter); | ||
| 641 | +} | ||
| 642 | + | ||
| 643 | +void | ||
| 616 | QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter, | 644 | QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter, |
| 617 | QPDFObjectHandle const& decode_parms, | 645 | QPDFObjectHandle const& decode_parms, |
| 618 | size_t length) | 646 | size_t length) |
libqpdf/build.mk
| @@ -9,6 +9,7 @@ SRCS_libqpdf = \ | @@ -9,6 +9,7 @@ SRCS_libqpdf = \ | ||
| 9 | libqpdf/BitWriter.cc \ | 9 | libqpdf/BitWriter.cc \ |
| 10 | libqpdf/Buffer.cc \ | 10 | libqpdf/Buffer.cc \ |
| 11 | libqpdf/BufferInputSource.cc \ | 11 | libqpdf/BufferInputSource.cc \ |
| 12 | + libqpdf/ContentNormalizer.cc \ | ||
| 12 | libqpdf/FileInputSource.cc \ | 13 | libqpdf/FileInputSource.cc \ |
| 13 | libqpdf/InputSource.cc \ | 14 | libqpdf/InputSource.cc \ |
| 14 | libqpdf/InsecureRandomDataProvider.cc \ | 15 | libqpdf/InsecureRandomDataProvider.cc \ |
libqpdf/qpdf/ContentNormalizer.hh
0 → 100644
| 1 | +#ifndef __CONTENTNORMALIZER_HH__ | ||
| 2 | +#define __CONTENTNORMALIZER_HH__ | ||
| 3 | + | ||
| 4 | +#include <qpdf/QPDFObjectHandle.hh> | ||
| 5 | + | ||
| 6 | +class ContentNormalizer: public QPDFObjectHandle::TokenFilter | ||
| 7 | +{ | ||
| 8 | + public: | ||
| 9 | + ContentNormalizer(); | ||
| 10 | + virtual ~ContentNormalizer(); | ||
| 11 | + virtual void handleToken(QPDFTokenizer::Token const&); | ||
| 12 | + virtual void handleEOF(); | ||
| 13 | +}; | ||
| 14 | + | ||
| 15 | +#endif // __CONTENTNORMALIZER_HH__ |
libqpdf/qpdf/Pl_QPDFTokenizer.hh
| @@ -4,6 +4,8 @@ | @@ -4,6 +4,8 @@ | ||
| 4 | #include <qpdf/Pipeline.hh> | 4 | #include <qpdf/Pipeline.hh> |
| 5 | 5 | ||
| 6 | #include <qpdf/QPDFTokenizer.hh> | 6 | #include <qpdf/QPDFTokenizer.hh> |
| 7 | +#include <qpdf/PointerHolder.hh> | ||
| 8 | +#include <qpdf/QPDFObjectHandle.hh> | ||
| 7 | 9 | ||
| 8 | // | 10 | // |
| 9 | // Treat incoming text as a stream consisting of valid PDF tokens, but | 11 | // Treat incoming text as a stream consisting of valid PDF tokens, but |
| @@ -16,7 +18,8 @@ | @@ -16,7 +18,8 @@ | ||
| 16 | class Pl_QPDFTokenizer: public Pipeline | 18 | class Pl_QPDFTokenizer: public Pipeline |
| 17 | { | 19 | { |
| 18 | public: | 20 | public: |
| 19 | - Pl_QPDFTokenizer(char const* identifier, Pipeline* next); | 21 | + Pl_QPDFTokenizer(char const* identifier, |
| 22 | + QPDFObjectHandle::TokenFilter* filter); | ||
| 20 | virtual ~Pl_QPDFTokenizer(); | 23 | virtual ~Pl_QPDFTokenizer(); |
| 21 | virtual void write(unsigned char* buf, size_t len); | 24 | virtual void write(unsigned char* buf, size_t len); |
| 22 | virtual void finish(); | 25 | virtual void finish(); |
| @@ -24,14 +27,25 @@ class Pl_QPDFTokenizer: public Pipeline | @@ -24,14 +27,25 @@ class Pl_QPDFTokenizer: public Pipeline | ||
| 24 | private: | 27 | private: |
| 25 | void processChar(char ch); | 28 | void processChar(char ch); |
| 26 | void checkUnread(); | 29 | void checkUnread(); |
| 27 | - void writeNext(char const*, size_t len); | ||
| 28 | - void writeToken(QPDFTokenizer::Token&); | ||
| 29 | - | ||
| 30 | - QPDFTokenizer tokenizer; | ||
| 31 | - bool just_wrote_nl; | ||
| 32 | - bool last_char_was_cr; | ||
| 33 | - bool unread_char; | ||
| 34 | - char char_to_unread; | 30 | + |
| 31 | + class Members | ||
| 32 | + { | ||
| 33 | + friend class Pl_QPDFTokenizer; | ||
| 34 | + | ||
| 35 | + public: | ||
| 36 | + ~Members(); | ||
| 37 | + | ||
| 38 | + private: | ||
| 39 | + Members(); | ||
| 40 | + Members(Members const&); | ||
| 41 | + | ||
| 42 | + QPDFObjectHandle::TokenFilter* filter; | ||
| 43 | + QPDFTokenizer tokenizer; | ||
| 44 | + bool last_char_was_cr; | ||
| 45 | + bool unread_char; | ||
| 46 | + char char_to_unread; | ||
| 47 | + }; | ||
| 48 | + PointerHolder<Members> m; | ||
| 35 | }; | 49 | }; |
| 36 | 50 | ||
| 37 | #endif // __PL_QPDFTOKENIZER_HH__ | 51 | #endif // __PL_QPDFTOKENIZER_HH__ |
libqpdf/qpdf/QPDF_Stream.hh
| @@ -20,6 +20,7 @@ class QPDF_Stream: public QPDFObject | @@ -20,6 +20,7 @@ class QPDF_Stream: public QPDFObject | ||
| 20 | virtual QPDFObject::object_type_e getTypeCode() const; | 20 | virtual QPDFObject::object_type_e getTypeCode() const; |
| 21 | virtual char const* getTypeName() const; | 21 | virtual char const* getTypeName() const; |
| 22 | QPDFObjectHandle getDict() const; | 22 | QPDFObjectHandle getDict() const; |
| 23 | + bool isDataModified() const; | ||
| 23 | 24 | ||
| 24 | // See comments in QPDFObjectHandle.hh for these methods. | 25 | // See comments in QPDFObjectHandle.hh for these methods. |
| 25 | bool pipeStreamData(Pipeline*, | 26 | bool pipeStreamData(Pipeline*, |
| @@ -35,6 +36,8 @@ class QPDF_Stream: public QPDFObject | @@ -35,6 +36,8 @@ class QPDF_Stream: public QPDFObject | ||
| 35 | PointerHolder<QPDFObjectHandle::StreamDataProvider> provider, | 36 | PointerHolder<QPDFObjectHandle::StreamDataProvider> provider, |
| 36 | QPDFObjectHandle const& filter, | 37 | QPDFObjectHandle const& filter, |
| 37 | QPDFObjectHandle const& decode_parms); | 38 | QPDFObjectHandle const& decode_parms); |
| 39 | + void addTokenFilter( | ||
| 40 | + PointerHolder<QPDFObjectHandle::TokenFilter> token_filter); | ||
| 38 | 41 | ||
| 39 | void replaceDict(QPDFObjectHandle new_dict); | 42 | void replaceDict(QPDFObjectHandle new_dict); |
| 40 | 43 | ||
| @@ -72,6 +75,8 @@ class QPDF_Stream: public QPDFObject | @@ -72,6 +75,8 @@ class QPDF_Stream: public QPDFObject | ||
| 72 | size_t length; | 75 | size_t length; |
| 73 | PointerHolder<Buffer> stream_data; | 76 | PointerHolder<Buffer> stream_data; |
| 74 | PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider; | 77 | PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider; |
| 78 | + std::vector< | ||
| 79 | + PointerHolder<QPDFObjectHandle::TokenFilter> > token_filters; | ||
| 75 | }; | 80 | }; |
| 76 | 81 | ||
| 77 | #endif // __QPDF_STREAM_HH__ | 82 | #endif // __QPDF_STREAM_HH__ |
qpdf/qtest/qpdf.test
| @@ -758,6 +758,19 @@ $td->runtest("check output", | @@ -758,6 +758,19 @@ $td->runtest("check output", | ||
| 758 | 758 | ||
| 759 | show_ntests(); | 759 | show_ntests(); |
| 760 | # ---------- | 760 | # ---------- |
| 761 | +$td->notify("--- Token filters ---"); | ||
| 762 | +$n_tests += 2; | ||
| 763 | + | ||
| 764 | +$td->runtest("token filter", | ||
| 765 | + {$td->COMMAND => "test_driver 41 coalesce.pdf"}, | ||
| 766 | + {$td->STRING => "test 41 done\n", $td->EXIT_STATUS => 0}, | ||
| 767 | + $td->NORMALIZE_NEWLINES); | ||
| 768 | +$td->runtest("check output", | ||
| 769 | + {$td->FILE => "a.pdf"}, | ||
| 770 | + {$td->FILE => "token-filters-out.pdf"}); | ||
| 771 | + | ||
| 772 | +show_ntests(); | ||
| 773 | +# ---------- | ||
| 761 | $td->notify("--- Newline before endstream ---"); | 774 | $td->notify("--- Newline before endstream ---"); |
| 762 | $n_tests += 10; | 775 | $n_tests += 10; |
| 763 | 776 |
qpdf/qtest/qpdf/token-filters-out.pdf
0 → 100644
No preview for this file type
qpdf/test_driver.cc
| @@ -97,6 +97,36 @@ ParserCallbacks::handleEOF() | @@ -97,6 +97,36 @@ ParserCallbacks::handleEOF() | ||
| 97 | std::cout << "-EOF-" << std::endl; | 97 | std::cout << "-EOF-" << std::endl; |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | +class TokenFilter: public QPDFObjectHandle::TokenFilter | ||
| 101 | +{ | ||
| 102 | + public: | ||
| 103 | + TokenFilter() | ||
| 104 | + { | ||
| 105 | + } | ||
| 106 | + virtual ~TokenFilter() | ||
| 107 | + { | ||
| 108 | + } | ||
| 109 | + virtual void handleToken(QPDFTokenizer::Token const& t) | ||
| 110 | + { | ||
| 111 | + if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_string, "Potato")) | ||
| 112 | + { | ||
| 113 | + // Exercise unparsing of strings by token constructor | ||
| 114 | + writeToken( | ||
| 115 | + QPDFTokenizer::Token(QPDFTokenizer::tt_string, "Salad")); | ||
| 116 | + } | ||
| 117 | + else | ||
| 118 | + { | ||
| 119 | + writeToken(t); | ||
| 120 | + } | ||
| 121 | + } | ||
| 122 | + virtual void handleEOF() | ||
| 123 | + { | ||
| 124 | + writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/bye")); | ||
| 125 | + write("\n"); | ||
| 126 | + finish(); | ||
| 127 | + } | ||
| 128 | +}; | ||
| 129 | + | ||
| 100 | static std::string getPageContents(QPDFObjectHandle page) | 130 | static std::string getPageContents(QPDFObjectHandle page) |
| 101 | { | 131 | { |
| 102 | PointerHolder<Buffer> b1 = | 132 | PointerHolder<Buffer> b1 = |
| @@ -1345,6 +1375,22 @@ void runtest(int n, char const* filename1, char const* arg2) | @@ -1345,6 +1375,22 @@ void runtest(int n, char const* filename1, char const* arg2) | ||
| 1345 | w.setStaticID(true); | 1375 | w.setStaticID(true); |
| 1346 | w.write(); | 1376 | w.write(); |
| 1347 | } | 1377 | } |
| 1378 | + else if (n == 41) | ||
| 1379 | + { | ||
| 1380 | + // Apply a token filter. This test case is crafted to work | ||
| 1381 | + // with coalesce.pdf. | ||
| 1382 | + std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); | ||
| 1383 | + for (std::vector<QPDFObjectHandle>::iterator iter = | ||
| 1384 | + pages.begin(); | ||
| 1385 | + iter != pages.end(); ++iter) | ||
| 1386 | + { | ||
| 1387 | + (*iter).addContentTokenFilter(new TokenFilter); | ||
| 1388 | + } | ||
| 1389 | + QPDFWriter w(pdf, "a.pdf"); | ||
| 1390 | + w.setQDFMode(true); | ||
| 1391 | + w.setStaticID(true); | ||
| 1392 | + w.write(); | ||
| 1393 | + } | ||
| 1348 | else | 1394 | else |
| 1349 | { | 1395 | { |
| 1350 | throw std::runtime_error(std::string("invalid test ") + | 1396 | throw std::runtime_error(std::string("invalid test ") + |