Commit e410b0fe0d8c1da3e0b0e075b54f247b952389ef
1 parent
1fdd86a0
Simplify TokenFilter interface
Expose Pl_QPDFTokenizer, and have it do more of the work of managing the token filter's pipeline.
Showing
10 changed files
with
44 additions
and
67 deletions
examples/pdf-count-strings.cc
| @@ -62,8 +62,6 @@ StringCounter::handleEOF() | @@ -62,8 +62,6 @@ StringCounter::handleEOF() | ||
| 62 | // can enhance the output if we want. | 62 | // can enhance the output if we want. |
| 63 | write("\n% strings found: "); | 63 | write("\n% strings found: "); |
| 64 | write(QUtil::int_to_string(this->count)); | 64 | write(QUtil::int_to_string(this->count)); |
| 65 | - // If you override handleEOF, you must always remember to call finish(). | ||
| 66 | - finish(); | ||
| 67 | } | 65 | } |
| 68 | 66 | ||
| 69 | int | 67 | int |
examples/pdf-filter-tokens.cc
| @@ -184,12 +184,6 @@ ColorToGray::handleEOF() | @@ -184,12 +184,6 @@ ColorToGray::handleEOF() | ||
| 184 | writeToken(this->all_stack.at(0)); | 184 | writeToken(this->all_stack.at(0)); |
| 185 | this->all_stack.pop_front(); | 185 | this->all_stack.pop_front(); |
| 186 | } | 186 | } |
| 187 | - // Remember to call finish(). If you override handleEOF, it is | ||
| 188 | - // essential that you call finish() or else you are likely to lose | ||
| 189 | - // some data in buffers of downstream pipelines that are not | ||
| 190 | - // flushed out. This is also mentioned in comments in | ||
| 191 | - // QPDFObjectHandle.hh. | ||
| 192 | - finish(); | ||
| 193 | } | 187 | } |
| 194 | 188 | ||
| 195 | int main(int argc, char* argv[]) | 189 | int main(int argc, char* argv[]) |
include/qpdf/Pl_QPDFTokenizer.hh
| @@ -41,8 +41,12 @@ | @@ -41,8 +41,12 @@ | ||
| 41 | class Pl_QPDFTokenizer: public Pipeline | 41 | class Pl_QPDFTokenizer: public Pipeline |
| 42 | { | 42 | { |
| 43 | public: | 43 | public: |
| 44 | + // Whatever pipeline is provided as "next" will be set as the | ||
| 45 | + // pipeline that the token filter writes to. If next is not | ||
| 46 | + // provided, any output written by the filter will be discarded. | ||
| 44 | Pl_QPDFTokenizer(char const* identifier, | 47 | Pl_QPDFTokenizer(char const* identifier, |
| 45 | - QPDFObjectHandle::TokenFilter* filter); | 48 | + QPDFObjectHandle::TokenFilter* filter, |
| 49 | + Pipeline* next = 0); | ||
| 46 | virtual ~Pl_QPDFTokenizer(); | 50 | virtual ~Pl_QPDFTokenizer(); |
| 47 | virtual void write(unsigned char* buf, size_t len); | 51 | virtual void write(unsigned char* buf, size_t len); |
| 48 | virtual void finish(); | 52 | virtual void finish(); |
include/qpdf/QPDFObjectHandle.hh
| @@ -45,6 +45,7 @@ class QPDF_Dictionary; | @@ -45,6 +45,7 @@ class QPDF_Dictionary; | ||
| 45 | class QPDF_Array; | 45 | class QPDF_Array; |
| 46 | class QPDFTokenizer; | 46 | class QPDFTokenizer; |
| 47 | class QPDFExc; | 47 | class QPDFExc; |
| 48 | +class Pl_QPDFTokenizer; | ||
| 48 | 49 | ||
| 49 | class QPDFObjectHandle | 50 | class QPDFObjectHandle |
| 50 | { | 51 | { |
| @@ -81,18 +82,13 @@ class QPDFObjectHandle | @@ -81,18 +82,13 @@ class QPDFObjectHandle | ||
| 81 | // in a lexically aware fashion. TokenFilters can be attached to | 82 | // in a lexically aware fashion. TokenFilters can be attached to |
| 82 | // streams using the addTokenFilter or addContentTokenFilter | 83 | // streams using the addTokenFilter or addContentTokenFilter |
| 83 | // methods or can be applied on the spot by filterPageContents. | 84 | // methods or can be applied on the spot by filterPageContents. |
| 85 | + // You may also use Pl_QPDFTokenizer directly if you need full | ||
| 86 | + // control. | ||
| 87 | + // | ||
| 84 | // The handleToken method is called for each token, including the | 88 | // The handleToken method is called for each token, including the |
| 85 | // eof token, and then handleEOF is called at the very end. | 89 | // eof token, and then handleEOF is called at the very end. |
| 86 | // Handlers may call write (or writeToken) to pass data | 90 | // Handlers may call write (or writeToken) to pass data |
| 87 | - // downstream. The finish() method must be called exactly one time | ||
| 88 | - // to ensure that any written data is flushed out. The default | ||
| 89 | - // handleEOF calls finish. If you override handleEOF, you must | ||
| 90 | - // ensure that finish() is called either there or in response to | ||
| 91 | - // whatever event causes you to terminate creation of output. | ||
| 92 | - // Failure to call finish() may result in some of the data you | ||
| 93 | - // have written being lost. You should not rely on a destructor | ||
| 94 | - // for calling finish() since the destructor call may occur later | ||
| 95 | - // than you expect. Please see examples/pdf-filter-tokens.cc and | 91 | + // downstream. Please see examples/pdf-filter-tokens.cc and |
| 96 | // examples/pdf-count-strings.cc for examples of using | 92 | // examples/pdf-count-strings.cc for examples of using |
| 97 | // TokenFilters. | 93 | // TokenFilters. |
| 98 | // | 94 | // |
| @@ -115,15 +111,17 @@ class QPDFObjectHandle | @@ -115,15 +111,17 @@ class QPDFObjectHandle | ||
| 115 | { | 111 | { |
| 116 | } | 112 | } |
| 117 | virtual void handleToken(QPDFTokenizer::Token const&) = 0; | 113 | virtual void handleToken(QPDFTokenizer::Token const&) = 0; |
| 118 | - virtual void handleEOF() | ||
| 119 | - { | ||
| 120 | - // If you override handleEOF, you must be sure to call | ||
| 121 | - // finish(). | ||
| 122 | - finish(); | ||
| 123 | - } | 114 | + virtual void handleEOF(); |
| 124 | 115 | ||
| 125 | - // This is called internally by the qpdf library. | ||
| 126 | - void setPipeline(Pipeline*); | 116 | + class PipelineAccessor |
| 117 | + { | ||
| 118 | + friend class Pl_QPDFTokenizer; | ||
| 119 | + private: | ||
| 120 | + static void setPipeline(TokenFilter* f, Pipeline* p) | ||
| 121 | + { | ||
| 122 | + f->setPipeline(p); | ||
| 123 | + } | ||
| 124 | + }; | ||
| 127 | 125 | ||
| 128 | protected: | 126 | protected: |
| 129 | QPDF_DLL | 127 | QPDF_DLL |
| @@ -132,10 +130,10 @@ class QPDFObjectHandle | @@ -132,10 +130,10 @@ class QPDFObjectHandle | ||
| 132 | void write(std::string const& str); | 130 | void write(std::string const& str); |
| 133 | QPDF_DLL | 131 | QPDF_DLL |
| 134 | void writeToken(QPDFTokenizer::Token const&); | 132 | void writeToken(QPDFTokenizer::Token const&); |
| 135 | - QPDF_DLL | ||
| 136 | - void finish(); | ||
| 137 | 133 | ||
| 138 | private: | 134 | private: |
| 135 | + void setPipeline(Pipeline*); | ||
| 136 | + | ||
| 139 | Pipeline* pipeline; | 137 | Pipeline* pipeline; |
| 140 | }; | 138 | }; |
| 141 | 139 |
libqpdf/ContentNormalizer.cc
| @@ -82,12 +82,6 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) | @@ -82,12 +82,6 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) | ||
| 82 | } | 82 | } |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | -void | ||
| 86 | -ContentNormalizer::handleEOF() | ||
| 87 | -{ | ||
| 88 | - finish(); | ||
| 89 | -} | ||
| 90 | - | ||
| 91 | bool | 85 | bool |
| 92 | ContentNormalizer::anyBadTokens() const | 86 | ContentNormalizer::anyBadTokens() const |
| 93 | { | 87 | { |
libqpdf/Pl_QPDFTokenizer.cc
| @@ -15,14 +15,15 @@ Pl_QPDFTokenizer::Members::~Members() | @@ -15,14 +15,15 @@ Pl_QPDFTokenizer::Members::~Members() | ||
| 15 | { | 15 | { |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | -Pl_QPDFTokenizer::Pl_QPDFTokenizer( | ||
| 19 | - char const* identifier, | ||
| 20 | - QPDFObjectHandle::TokenFilter* filter) | ||
| 21 | - : | ||
| 22 | - Pipeline(identifier, 0), | 18 | +Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, |
| 19 | + QPDFObjectHandle::TokenFilter* filter, | ||
| 20 | + Pipeline* next) : | ||
| 21 | + Pipeline(identifier, next), | ||
| 23 | m(new Members) | 22 | m(new Members) |
| 24 | { | 23 | { |
| 25 | m->filter = filter; | 24 | m->filter = filter; |
| 25 | + QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline( | ||
| 26 | + m->filter, next); | ||
| 26 | m->tokenizer.allowEOF(); | 27 | m->tokenizer.allowEOF(); |
| 27 | m->tokenizer.includeIgnorable(); | 28 | m->tokenizer.includeIgnorable(); |
| 28 | } | 29 | } |
| @@ -88,4 +89,11 @@ Pl_QPDFTokenizer::finish() | @@ -88,4 +89,11 @@ Pl_QPDFTokenizer::finish() | ||
| 88 | } | 89 | } |
| 89 | 90 | ||
| 90 | this->m->filter->handleEOF(); | 91 | this->m->filter->handleEOF(); |
| 92 | + QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline( | ||
| 93 | + m->filter, 0); | ||
| 94 | + Pipeline* next = this->getNext(true); | ||
| 95 | + if (next) | ||
| 96 | + { | ||
| 97 | + next->finish(); | ||
| 98 | + } | ||
| 91 | } | 99 | } |
libqpdf/QPDFObjectHandle.cc
| @@ -16,7 +16,6 @@ | @@ -16,7 +16,6 @@ | ||
| 16 | #include <qpdf/Pl_Buffer.hh> | 16 | #include <qpdf/Pl_Buffer.hh> |
| 17 | #include <qpdf/Pl_Concatenate.hh> | 17 | #include <qpdf/Pl_Concatenate.hh> |
| 18 | #include <qpdf/Pl_QPDFTokenizer.hh> | 18 | #include <qpdf/Pl_QPDFTokenizer.hh> |
| 19 | -#include <qpdf/Pl_Discard.hh> | ||
| 20 | #include <qpdf/BufferInputSource.hh> | 19 | #include <qpdf/BufferInputSource.hh> |
| 21 | #include <qpdf/QPDFExc.hh> | 20 | #include <qpdf/QPDFExc.hh> |
| 22 | 21 | ||
| @@ -65,6 +64,11 @@ CoalesceProvider::provideStreamData(int, int, Pipeline* p) | @@ -65,6 +64,11 @@ CoalesceProvider::provideStreamData(int, int, Pipeline* p) | ||
| 65 | } | 64 | } |
| 66 | 65 | ||
| 67 | void | 66 | void |
| 67 | +QPDFObjectHandle::TokenFilter::handleEOF() | ||
| 68 | +{ | ||
| 69 | +} | ||
| 70 | + | ||
| 71 | +void | ||
| 68 | QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p) | 72 | QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p) |
| 69 | { | 73 | { |
| 70 | this->pipeline = p; | 74 | this->pipeline = p; |
| @@ -75,8 +79,7 @@ QPDFObjectHandle::TokenFilter::write(char const* data, size_t len) | @@ -75,8 +79,7 @@ QPDFObjectHandle::TokenFilter::write(char const* data, size_t len) | ||
| 75 | { | 79 | { |
| 76 | if (! this->pipeline) | 80 | if (! this->pipeline) |
| 77 | { | 81 | { |
| 78 | - throw std::logic_error( | ||
| 79 | - "TokenFilter::write called before setPipeline"); | 82 | + return; |
| 80 | } | 83 | } |
| 81 | if (len) | 84 | if (len) |
| 82 | { | 85 | { |
| @@ -98,17 +101,6 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token) | @@ -98,17 +101,6 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token) | ||
| 98 | } | 101 | } |
| 99 | 102 | ||
| 100 | void | 103 | void |
| 101 | -QPDFObjectHandle::TokenFilter::finish() | ||
| 102 | -{ | ||
| 103 | - if (! this->pipeline) | ||
| 104 | - { | ||
| 105 | - throw std::logic_error( | ||
| 106 | - "TokenFilter::finish called before setPipeline"); | ||
| 107 | - } | ||
| 108 | - this->pipeline->finish(); | ||
| 109 | -} | ||
| 110 | - | ||
| 111 | -void | ||
| 112 | QPDFObjectHandle::ParserCallbacks::terminateParsing() | 104 | QPDFObjectHandle::ParserCallbacks::terminateParsing() |
| 113 | { | 105 | { |
| 114 | throw TerminateParsing(); | 106 | throw TerminateParsing(); |
| @@ -1007,14 +999,7 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) | @@ -1007,14 +999,7 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) | ||
| 1007 | std::string description = "token filter for page object " + | 999 | std::string description = "token filter for page object " + |
| 1008 | QUtil::int_to_string(this->objid) + " " + | 1000 | QUtil::int_to_string(this->objid) + " " + |
| 1009 | QUtil::int_to_string(this->generation); | 1001 | QUtil::int_to_string(this->generation); |
| 1010 | - Pl_QPDFTokenizer token_pipeline(description.c_str(), filter); | ||
| 1011 | - PointerHolder<Pipeline> next_p; | ||
| 1012 | - if (next == 0) | ||
| 1013 | - { | ||
| 1014 | - next_p = new Pl_Discard(); | ||
| 1015 | - next = next_p.getPointer(); | ||
| 1016 | - } | ||
| 1017 | - filter->setPipeline(next); | 1002 | + Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next); |
| 1018 | this->pipePageContents(&token_pipeline); | 1003 | this->pipePageContents(&token_pipeline); |
| 1019 | } | 1004 | } |
| 1020 | 1005 |
libqpdf/QPDF_Stream.cc
| @@ -459,9 +459,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, | @@ -459,9 +459,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, | ||
| 459 | if (encode_flags & qpdf_ef_normalize) | 459 | if (encode_flags & qpdf_ef_normalize) |
| 460 | { | 460 | { |
| 461 | normalizer = new ContentNormalizer(); | 461 | normalizer = new ContentNormalizer(); |
| 462 | - normalizer->setPipeline(pipeline); | ||
| 463 | pipeline = new Pl_QPDFTokenizer( | 462 | pipeline = new Pl_QPDFTokenizer( |
| 464 | - "normalizer", normalizer.getPointer()); | 463 | + "normalizer", normalizer.getPointer(), pipeline); |
| 465 | to_delete.push_back(pipeline); | 464 | to_delete.push_back(pipeline); |
| 466 | } | 465 | } |
| 467 | 466 | ||
| @@ -470,9 +469,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, | @@ -470,9 +469,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, | ||
| 470 | this->token_filters.rbegin(); | 469 | this->token_filters.rbegin(); |
| 471 | iter != this->token_filters.rend(); ++iter) | 470 | iter != this->token_filters.rend(); ++iter) |
| 472 | { | 471 | { |
| 473 | - (*iter)->setPipeline(pipeline); | ||
| 474 | pipeline = new Pl_QPDFTokenizer( | 472 | pipeline = new Pl_QPDFTokenizer( |
| 475 | - "token filter", (*iter).getPointer()); | 473 | + "token filter", (*iter).getPointer(), pipeline); |
| 476 | to_delete.push_back(pipeline); | 474 | to_delete.push_back(pipeline); |
| 477 | } | 475 | } |
| 478 | 476 |
libqpdf/qpdf/ContentNormalizer.hh
| @@ -9,7 +9,6 @@ class ContentNormalizer: public QPDFObjectHandle::TokenFilter | @@ -9,7 +9,6 @@ class ContentNormalizer: public QPDFObjectHandle::TokenFilter | ||
| 9 | ContentNormalizer(); | 9 | ContentNormalizer(); |
| 10 | virtual ~ContentNormalizer(); | 10 | virtual ~ContentNormalizer(); |
| 11 | virtual void handleToken(QPDFTokenizer::Token const&); | 11 | virtual void handleToken(QPDFTokenizer::Token const&); |
| 12 | - virtual void handleEOF(); | ||
| 13 | 12 | ||
| 14 | bool anyBadTokens() const; | 13 | bool anyBadTokens() const; |
| 15 | bool lastTokenWasBad() const; | 14 | bool lastTokenWasBad() const; |
qpdf/test_driver.cc
| @@ -123,7 +123,6 @@ class TokenFilter: public QPDFObjectHandle::TokenFilter | @@ -123,7 +123,6 @@ class TokenFilter: public QPDFObjectHandle::TokenFilter | ||
| 123 | { | 123 | { |
| 124 | writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/bye")); | 124 | writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/bye")); |
| 125 | write("\n"); | 125 | write("\n"); |
| 126 | - finish(); | ||
| 127 | } | 126 | } |
| 128 | }; | 127 | }; |
| 129 | 128 |