Commit e410b0fe0d8c1da3e0b0e075b54f247b952389ef
1 parent
1fdd86a0
Simplify TokenFilter interface
Expose Pl_QPDFTokenizer, and have it do more of the work of managing the token filter's pipeline.
Showing
10 changed files
with
44 additions
and
67 deletions
examples/pdf-count-strings.cc
| ... | ... | @@ -62,8 +62,6 @@ StringCounter::handleEOF() |
| 62 | 62 | // can enhance the output if we want. |
| 63 | 63 | write("\n% strings found: "); |
| 64 | 64 | write(QUtil::int_to_string(this->count)); |
| 65 | - // If you override handleEOF, you must always remember to call finish(). | |
| 66 | - finish(); | |
| 67 | 65 | } |
| 68 | 66 | |
| 69 | 67 | int | ... | ... |
examples/pdf-filter-tokens.cc
| ... | ... | @@ -184,12 +184,6 @@ ColorToGray::handleEOF() |
| 184 | 184 | writeToken(this->all_stack.at(0)); |
| 185 | 185 | this->all_stack.pop_front(); |
| 186 | 186 | } |
| 187 | - // Remember to call finish(). If you override handleEOF, it is | |
| 188 | - // essential that you call finish() or else you are likely to lose | |
| 189 | - // some data in buffers of downstream pipelines that are not | |
| 190 | - // flushed out. This is also mentioned in comments in | |
| 191 | - // QPDFObjectHandle.hh. | |
| 192 | - finish(); | |
| 193 | 187 | } |
| 194 | 188 | |
| 195 | 189 | int main(int argc, char* argv[]) | ... | ... |
include/qpdf/Pl_QPDFTokenizer.hh
| ... | ... | @@ -41,8 +41,12 @@ |
| 41 | 41 | class Pl_QPDFTokenizer: public Pipeline |
| 42 | 42 | { |
| 43 | 43 | public: |
| 44 | + // Whatever pipeline is provided as "next" will be set as the | |
| 45 | + // pipeline that the token filter writes to. If next is not | |
| 46 | + // provided, any output written by the filter will be discarded. | |
| 44 | 47 | Pl_QPDFTokenizer(char const* identifier, |
| 45 | - QPDFObjectHandle::TokenFilter* filter); | |
| 48 | + QPDFObjectHandle::TokenFilter* filter, | |
| 49 | + Pipeline* next = 0); | |
| 46 | 50 | virtual ~Pl_QPDFTokenizer(); |
| 47 | 51 | virtual void write(unsigned char* buf, size_t len); |
| 48 | 52 | virtual void finish(); | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -45,6 +45,7 @@ class QPDF_Dictionary; |
| 45 | 45 | class QPDF_Array; |
| 46 | 46 | class QPDFTokenizer; |
| 47 | 47 | class QPDFExc; |
| 48 | +class Pl_QPDFTokenizer; | |
| 48 | 49 | |
| 49 | 50 | class QPDFObjectHandle |
| 50 | 51 | { |
| ... | ... | @@ -81,18 +82,13 @@ class QPDFObjectHandle |
| 81 | 82 | // in a lexically aware fashion. TokenFilters can be attached to |
| 82 | 83 | // streams using the addTokenFilter or addContentTokenFilter |
| 83 | 84 | // methods or can be applied on the spot by filterPageContents. |
| 85 | + // You may also use Pl_QPDFTokenizer directly if you need full | |
| 86 | + // control. | |
| 87 | + // | |
| 84 | 88 | // The handleToken method is called for each token, including the |
| 85 | 89 | // eof token, and then handleEOF is called at the very end. |
| 86 | 90 | // Handlers may call write (or writeToken) to pass data |
| 87 | - // downstream. The finish() method must be called exactly one time | |
| 88 | - // to ensure that any written data is flushed out. The default | |
| 89 | - // handleEOF calls finish. If you override handleEOF, you must | |
| 90 | - // ensure that finish() is called either there or in response to | |
| 91 | - // whatever event causes you to terminate creation of output. | |
| 92 | - // Failure to call finish() may result in some of the data you | |
| 93 | - // have written being lost. You should not rely on a destructor | |
| 94 | - // for calling finish() since the destructor call may occur later | |
| 95 | - // than you expect. Please see examples/pdf-filter-tokens.cc and | |
| 91 | + // downstream. Please see examples/pdf-filter-tokens.cc and | |
| 96 | 92 | // examples/pdf-count-strings.cc for examples of using |
| 97 | 93 | // TokenFilters. |
| 98 | 94 | // |
| ... | ... | @@ -115,15 +111,17 @@ class QPDFObjectHandle |
| 115 | 111 | { |
| 116 | 112 | } |
| 117 | 113 | virtual void handleToken(QPDFTokenizer::Token const&) = 0; |
| 118 | - virtual void handleEOF() | |
| 119 | - { | |
| 120 | - // If you override handleEOF, you must be sure to call | |
| 121 | - // finish(). | |
| 122 | - finish(); | |
| 123 | - } | |
| 114 | + virtual void handleEOF(); | |
| 124 | 115 | |
| 125 | - // This is called internally by the qpdf library. | |
| 126 | - void setPipeline(Pipeline*); | |
| 116 | + class PipelineAccessor | |
| 117 | + { | |
| 118 | + friend class Pl_QPDFTokenizer; | |
| 119 | + private: | |
| 120 | + static void setPipeline(TokenFilter* f, Pipeline* p) | |
| 121 | + { | |
| 122 | + f->setPipeline(p); | |
| 123 | + } | |
| 124 | + }; | |
| 127 | 125 | |
| 128 | 126 | protected: |
| 129 | 127 | QPDF_DLL |
| ... | ... | @@ -132,10 +130,10 @@ class QPDFObjectHandle |
| 132 | 130 | void write(std::string const& str); |
| 133 | 131 | QPDF_DLL |
| 134 | 132 | void writeToken(QPDFTokenizer::Token const&); |
| 135 | - QPDF_DLL | |
| 136 | - void finish(); | |
| 137 | 133 | |
| 138 | 134 | private: |
| 135 | + void setPipeline(Pipeline*); | |
| 136 | + | |
| 139 | 137 | Pipeline* pipeline; |
| 140 | 138 | }; |
| 141 | 139 | ... | ... |
libqpdf/ContentNormalizer.cc
libqpdf/Pl_QPDFTokenizer.cc
| ... | ... | @@ -15,14 +15,15 @@ Pl_QPDFTokenizer::Members::~Members() |
| 15 | 15 | { |
| 16 | 16 | } |
| 17 | 17 | |
| 18 | -Pl_QPDFTokenizer::Pl_QPDFTokenizer( | |
| 19 | - char const* identifier, | |
| 20 | - QPDFObjectHandle::TokenFilter* filter) | |
| 21 | - : | |
| 22 | - Pipeline(identifier, 0), | |
| 18 | +Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, | |
| 19 | + QPDFObjectHandle::TokenFilter* filter, | |
| 20 | + Pipeline* next) : | |
| 21 | + Pipeline(identifier, next), | |
| 23 | 22 | m(new Members) |
| 24 | 23 | { |
| 25 | 24 | m->filter = filter; |
| 25 | + QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline( | |
| 26 | + m->filter, next); | |
| 26 | 27 | m->tokenizer.allowEOF(); |
| 27 | 28 | m->tokenizer.includeIgnorable(); |
| 28 | 29 | } |
| ... | ... | @@ -88,4 +89,11 @@ Pl_QPDFTokenizer::finish() |
| 88 | 89 | } |
| 89 | 90 | |
| 90 | 91 | this->m->filter->handleEOF(); |
| 92 | + QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline( | |
| 93 | + m->filter, 0); | |
| 94 | + Pipeline* next = this->getNext(true); | |
| 95 | + if (next) | |
| 96 | + { | |
| 97 | + next->finish(); | |
| 98 | + } | |
| 91 | 99 | } | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -16,7 +16,6 @@ |
| 16 | 16 | #include <qpdf/Pl_Buffer.hh> |
| 17 | 17 | #include <qpdf/Pl_Concatenate.hh> |
| 18 | 18 | #include <qpdf/Pl_QPDFTokenizer.hh> |
| 19 | -#include <qpdf/Pl_Discard.hh> | |
| 20 | 19 | #include <qpdf/BufferInputSource.hh> |
| 21 | 20 | #include <qpdf/QPDFExc.hh> |
| 22 | 21 | |
| ... | ... | @@ -65,6 +64,11 @@ CoalesceProvider::provideStreamData(int, int, Pipeline* p) |
| 65 | 64 | } |
| 66 | 65 | |
| 67 | 66 | void |
| 67 | +QPDFObjectHandle::TokenFilter::handleEOF() | |
| 68 | +{ | |
| 69 | +} | |
| 70 | + | |
| 71 | +void | |
| 68 | 72 | QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p) |
| 69 | 73 | { |
| 70 | 74 | this->pipeline = p; |
| ... | ... | @@ -75,8 +79,7 @@ QPDFObjectHandle::TokenFilter::write(char const* data, size_t len) |
| 75 | 79 | { |
| 76 | 80 | if (! this->pipeline) |
| 77 | 81 | { |
| 78 | - throw std::logic_error( | |
| 79 | - "TokenFilter::write called before setPipeline"); | |
| 82 | + return; | |
| 80 | 83 | } |
| 81 | 84 | if (len) |
| 82 | 85 | { |
| ... | ... | @@ -98,17 +101,6 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token) |
| 98 | 101 | } |
| 99 | 102 | |
| 100 | 103 | void |
| 101 | -QPDFObjectHandle::TokenFilter::finish() | |
| 102 | -{ | |
| 103 | - if (! this->pipeline) | |
| 104 | - { | |
| 105 | - throw std::logic_error( | |
| 106 | - "TokenFilter::finish called before setPipeline"); | |
| 107 | - } | |
| 108 | - this->pipeline->finish(); | |
| 109 | -} | |
| 110 | - | |
| 111 | -void | |
| 112 | 104 | QPDFObjectHandle::ParserCallbacks::terminateParsing() |
| 113 | 105 | { |
| 114 | 106 | throw TerminateParsing(); |
| ... | ... | @@ -1007,14 +999,7 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) |
| 1007 | 999 | std::string description = "token filter for page object " + |
| 1008 | 1000 | QUtil::int_to_string(this->objid) + " " + |
| 1009 | 1001 | QUtil::int_to_string(this->generation); |
| 1010 | - Pl_QPDFTokenizer token_pipeline(description.c_str(), filter); | |
| 1011 | - PointerHolder<Pipeline> next_p; | |
| 1012 | - if (next == 0) | |
| 1013 | - { | |
| 1014 | - next_p = new Pl_Discard(); | |
| 1015 | - next = next_p.getPointer(); | |
| 1016 | - } | |
| 1017 | - filter->setPipeline(next); | |
| 1002 | + Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next); | |
| 1018 | 1003 | this->pipePageContents(&token_pipeline); |
| 1019 | 1004 | } |
| 1020 | 1005 | ... | ... |
libqpdf/QPDF_Stream.cc
| ... | ... | @@ -459,9 +459,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, |
| 459 | 459 | if (encode_flags & qpdf_ef_normalize) |
| 460 | 460 | { |
| 461 | 461 | normalizer = new ContentNormalizer(); |
| 462 | - normalizer->setPipeline(pipeline); | |
| 463 | 462 | pipeline = new Pl_QPDFTokenizer( |
| 464 | - "normalizer", normalizer.getPointer()); | |
| 463 | + "normalizer", normalizer.getPointer(), pipeline); | |
| 465 | 464 | to_delete.push_back(pipeline); |
| 466 | 465 | } |
| 467 | 466 | |
| ... | ... | @@ -470,9 +469,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, |
| 470 | 469 | this->token_filters.rbegin(); |
| 471 | 470 | iter != this->token_filters.rend(); ++iter) |
| 472 | 471 | { |
| 473 | - (*iter)->setPipeline(pipeline); | |
| 474 | 472 | pipeline = new Pl_QPDFTokenizer( |
| 475 | - "token filter", (*iter).getPointer()); | |
| 473 | + "token filter", (*iter).getPointer(), pipeline); | |
| 476 | 474 | to_delete.push_back(pipeline); |
| 477 | 475 | } |
| 478 | 476 | ... | ... |
libqpdf/qpdf/ContentNormalizer.hh
| ... | ... | @@ -9,7 +9,6 @@ class ContentNormalizer: public QPDFObjectHandle::TokenFilter |
| 9 | 9 | ContentNormalizer(); |
| 10 | 10 | virtual ~ContentNormalizer(); |
| 11 | 11 | virtual void handleToken(QPDFTokenizer::Token const&); |
| 12 | - virtual void handleEOF(); | |
| 13 | 12 | |
| 14 | 13 | bool anyBadTokens() const; |
| 15 | 14 | bool lastTokenWasBad() const; | ... | ... |