Commit e410b0fe0d8c1da3e0b0e075b54f247b952389ef

Authored by Jay Berkenbilt
1 parent 1fdd86a0

Simplify TokenFilter interface

Expose Pl_QPDFTokenizer, and have it do more of the work of managing
the token filter's pipeline.
examples/pdf-count-strings.cc
@@ -62,8 +62,6 @@ StringCounter::handleEOF() @@ -62,8 +62,6 @@ StringCounter::handleEOF()
62 // can enhance the output if we want. 62 // can enhance the output if we want.
63 write("\n% strings found: "); 63 write("\n% strings found: ");
64 write(QUtil::int_to_string(this->count)); 64 write(QUtil::int_to_string(this->count));
65 - // If you override handleEOF, you must always remember to call finish().  
66 - finish();  
67 } 65 }
68 66
69 int 67 int
examples/pdf-filter-tokens.cc
@@ -184,12 +184,6 @@ ColorToGray::handleEOF() @@ -184,12 +184,6 @@ ColorToGray::handleEOF()
184 writeToken(this->all_stack.at(0)); 184 writeToken(this->all_stack.at(0));
185 this->all_stack.pop_front(); 185 this->all_stack.pop_front();
186 } 186 }
187 - // Remember to call finish(). If you override handleEOF, it is  
188 - // essential that you call finish() or else you are likely to lose  
189 - // some data in buffers of downstream pipelines that are not  
190 - // flushed out. This is also mentioned in comments in  
191 - // QPDFObjectHandle.hh.  
192 - finish();  
193 } 187 }
194 188
195 int main(int argc, char* argv[]) 189 int main(int argc, char* argv[])
include/qpdf/Pl_QPDFTokenizer.hh
@@ -41,8 +41,12 @@ @@ -41,8 +41,12 @@
41 class Pl_QPDFTokenizer: public Pipeline 41 class Pl_QPDFTokenizer: public Pipeline
42 { 42 {
43 public: 43 public:
  44 + // Whatever pipeline is provided as "next" will be set as the
  45 + // pipeline that the token filter writes to. If next is not
  46 + // provided, any output written by the filter will be discarded.
44 Pl_QPDFTokenizer(char const* identifier, 47 Pl_QPDFTokenizer(char const* identifier,
45 - QPDFObjectHandle::TokenFilter* filter); 48 + QPDFObjectHandle::TokenFilter* filter,
  49 + Pipeline* next = 0);
46 virtual ~Pl_QPDFTokenizer(); 50 virtual ~Pl_QPDFTokenizer();
47 virtual void write(unsigned char* buf, size_t len); 51 virtual void write(unsigned char* buf, size_t len);
48 virtual void finish(); 52 virtual void finish();
include/qpdf/QPDFObjectHandle.hh
@@ -45,6 +45,7 @@ class QPDF_Dictionary; @@ -45,6 +45,7 @@ class QPDF_Dictionary;
45 class QPDF_Array; 45 class QPDF_Array;
46 class QPDFTokenizer; 46 class QPDFTokenizer;
47 class QPDFExc; 47 class QPDFExc;
  48 +class Pl_QPDFTokenizer;
48 49
49 class QPDFObjectHandle 50 class QPDFObjectHandle
50 { 51 {
@@ -81,18 +82,13 @@ class QPDFObjectHandle @@ -81,18 +82,13 @@ class QPDFObjectHandle
81 // in a lexically aware fashion. TokenFilters can be attached to 82 // in a lexically aware fashion. TokenFilters can be attached to
82 // streams using the addTokenFilter or addContentTokenFilter 83 // streams using the addTokenFilter or addContentTokenFilter
83 // methods or can be applied on the spot by filterPageContents. 84 // methods or can be applied on the spot by filterPageContents.
  85 + // You may also use Pl_QPDFTokenizer directly if you need full
  86 + // control.
  87 + //
84 // The handleToken method is called for each token, including the 88 // The handleToken method is called for each token, including the
85 // eof token, and then handleEOF is called at the very end. 89 // eof token, and then handleEOF is called at the very end.
86 // Handlers may call write (or writeToken) to pass data 90 // Handlers may call write (or writeToken) to pass data
87 - // downstream. The finish() method must be called exactly one time  
88 - // to ensure that any written data is flushed out. The default  
89 - // handleEOF calls finish. If you override handleEOF, you must  
90 - // ensure that finish() is called either there or in response to  
91 - // whatever event causes you to terminate creation of output.  
92 - // Failure to call finish() may result in some of the data you  
93 - // have written being lost. You should not rely on a destructor  
94 - // for calling finish() since the destructor call may occur later  
95 - // than you expect. Please see examples/pdf-filter-tokens.cc and 91 + // downstream. Please see examples/pdf-filter-tokens.cc and
96 // examples/pdf-count-strings.cc for examples of using 92 // examples/pdf-count-strings.cc for examples of using
97 // TokenFilters. 93 // TokenFilters.
98 // 94 //
@@ -115,15 +111,17 @@ class QPDFObjectHandle @@ -115,15 +111,17 @@ class QPDFObjectHandle
115 { 111 {
116 } 112 }
117 virtual void handleToken(QPDFTokenizer::Token const&) = 0; 113 virtual void handleToken(QPDFTokenizer::Token const&) = 0;
118 - virtual void handleEOF()  
119 - {  
120 - // If you override handleEOF, you must be sure to call  
121 - // finish().  
122 - finish();  
123 - } 114 + virtual void handleEOF();
124 115
125 - // This is called internally by the qpdf library.  
126 - void setPipeline(Pipeline*); 116 + class PipelineAccessor
  117 + {
  118 + friend class Pl_QPDFTokenizer;
  119 + private:
  120 + static void setPipeline(TokenFilter* f, Pipeline* p)
  121 + {
  122 + f->setPipeline(p);
  123 + }
  124 + };
127 125
128 protected: 126 protected:
129 QPDF_DLL 127 QPDF_DLL
@@ -132,10 +130,10 @@ class QPDFObjectHandle @@ -132,10 +130,10 @@ class QPDFObjectHandle
132 void write(std::string const& str); 130 void write(std::string const& str);
133 QPDF_DLL 131 QPDF_DLL
134 void writeToken(QPDFTokenizer::Token const&); 132 void writeToken(QPDFTokenizer::Token const&);
135 - QPDF_DLL  
136 - void finish();  
137 133
138 private: 134 private:
  135 + void setPipeline(Pipeline*);
  136 +
139 Pipeline* pipeline; 137 Pipeline* pipeline;
140 }; 138 };
141 139
libqpdf/ContentNormalizer.cc
@@ -82,12 +82,6 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) @@ -82,12 +82,6 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token)
82 } 82 }
83 } 83 }
84 84
85 -void  
86 -ContentNormalizer::handleEOF()  
87 -{  
88 - finish();  
89 -}  
90 -  
91 bool 85 bool
92 ContentNormalizer::anyBadTokens() const 86 ContentNormalizer::anyBadTokens() const
93 { 87 {
libqpdf/Pl_QPDFTokenizer.cc
@@ -15,14 +15,15 @@ Pl_QPDFTokenizer::Members::~Members() @@ -15,14 +15,15 @@ Pl_QPDFTokenizer::Members::~Members()
15 { 15 {
16 } 16 }
17 17
18 -Pl_QPDFTokenizer::Pl_QPDFTokenizer(  
19 - char const* identifier,  
20 - QPDFObjectHandle::TokenFilter* filter)  
21 - :  
22 - Pipeline(identifier, 0), 18 +Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier,
  19 + QPDFObjectHandle::TokenFilter* filter,
  20 + Pipeline* next) :
  21 + Pipeline(identifier, next),
23 m(new Members) 22 m(new Members)
24 { 23 {
25 m->filter = filter; 24 m->filter = filter;
  25 + QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(
  26 + m->filter, next);
26 m->tokenizer.allowEOF(); 27 m->tokenizer.allowEOF();
27 m->tokenizer.includeIgnorable(); 28 m->tokenizer.includeIgnorable();
28 } 29 }
@@ -88,4 +89,11 @@ Pl_QPDFTokenizer::finish() @@ -88,4 +89,11 @@ Pl_QPDFTokenizer::finish()
88 } 89 }
89 90
90 this->m->filter->handleEOF(); 91 this->m->filter->handleEOF();
  92 + QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(
  93 + m->filter, 0);
  94 + Pipeline* next = this->getNext(true);
  95 + if (next)
  96 + {
  97 + next->finish();
  98 + }
91 } 99 }
libqpdf/QPDFObjectHandle.cc
@@ -16,7 +16,6 @@ @@ -16,7 +16,6 @@
16 #include <qpdf/Pl_Buffer.hh> 16 #include <qpdf/Pl_Buffer.hh>
17 #include <qpdf/Pl_Concatenate.hh> 17 #include <qpdf/Pl_Concatenate.hh>
18 #include <qpdf/Pl_QPDFTokenizer.hh> 18 #include <qpdf/Pl_QPDFTokenizer.hh>
19 -#include <qpdf/Pl_Discard.hh>  
20 #include <qpdf/BufferInputSource.hh> 19 #include <qpdf/BufferInputSource.hh>
21 #include <qpdf/QPDFExc.hh> 20 #include <qpdf/QPDFExc.hh>
22 21
@@ -65,6 +64,11 @@ CoalesceProvider::provideStreamData(int, int, Pipeline* p) @@ -65,6 +64,11 @@ CoalesceProvider::provideStreamData(int, int, Pipeline* p)
65 } 64 }
66 65
67 void 66 void
  67 +QPDFObjectHandle::TokenFilter::handleEOF()
  68 +{
  69 +}
  70 +
  71 +void
68 QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p) 72 QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p)
69 { 73 {
70 this->pipeline = p; 74 this->pipeline = p;
@@ -75,8 +79,7 @@ QPDFObjectHandle::TokenFilter::write(char const* data, size_t len) @@ -75,8 +79,7 @@ QPDFObjectHandle::TokenFilter::write(char const* data, size_t len)
75 { 79 {
76 if (! this->pipeline) 80 if (! this->pipeline)
77 { 81 {
78 - throw std::logic_error(  
79 - "TokenFilter::write called before setPipeline"); 82 + return;
80 } 83 }
81 if (len) 84 if (len)
82 { 85 {
@@ -98,17 +101,6 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const&amp; token) @@ -98,17 +101,6 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const&amp; token)
98 } 101 }
99 102
100 void 103 void
101 -QPDFObjectHandle::TokenFilter::finish()  
102 -{  
103 - if (! this->pipeline)  
104 - {  
105 - throw std::logic_error(  
106 - "TokenFilter::finish called before setPipeline");  
107 - }  
108 - this->pipeline->finish();  
109 -}  
110 -  
111 -void  
112 QPDFObjectHandle::ParserCallbacks::terminateParsing() 104 QPDFObjectHandle::ParserCallbacks::terminateParsing()
113 { 105 {
114 throw TerminateParsing(); 106 throw TerminateParsing();
@@ -1007,14 +999,7 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) @@ -1007,14 +999,7 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
1007 std::string description = "token filter for page object " + 999 std::string description = "token filter for page object " +
1008 QUtil::int_to_string(this->objid) + " " + 1000 QUtil::int_to_string(this->objid) + " " +
1009 QUtil::int_to_string(this->generation); 1001 QUtil::int_to_string(this->generation);
1010 - Pl_QPDFTokenizer token_pipeline(description.c_str(), filter);  
1011 - PointerHolder<Pipeline> next_p;  
1012 - if (next == 0)  
1013 - {  
1014 - next_p = new Pl_Discard();  
1015 - next = next_p.getPointer();  
1016 - }  
1017 - filter->setPipeline(next); 1002 + Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
1018 this->pipePageContents(&token_pipeline); 1003 this->pipePageContents(&token_pipeline);
1019 } 1004 }
1020 1005
libqpdf/QPDF_Stream.cc
@@ -459,9 +459,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, @@ -459,9 +459,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
459 if (encode_flags & qpdf_ef_normalize) 459 if (encode_flags & qpdf_ef_normalize)
460 { 460 {
461 normalizer = new ContentNormalizer(); 461 normalizer = new ContentNormalizer();
462 - normalizer->setPipeline(pipeline);  
463 pipeline = new Pl_QPDFTokenizer( 462 pipeline = new Pl_QPDFTokenizer(
464 - "normalizer", normalizer.getPointer()); 463 + "normalizer", normalizer.getPointer(), pipeline);
465 to_delete.push_back(pipeline); 464 to_delete.push_back(pipeline);
466 } 465 }
467 466
@@ -470,9 +469,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, @@ -470,9 +469,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
470 this->token_filters.rbegin(); 469 this->token_filters.rbegin();
471 iter != this->token_filters.rend(); ++iter) 470 iter != this->token_filters.rend(); ++iter)
472 { 471 {
473 - (*iter)->setPipeline(pipeline);  
474 pipeline = new Pl_QPDFTokenizer( 472 pipeline = new Pl_QPDFTokenizer(
475 - "token filter", (*iter).getPointer()); 473 + "token filter", (*iter).getPointer(), pipeline);
476 to_delete.push_back(pipeline); 474 to_delete.push_back(pipeline);
477 } 475 }
478 476
libqpdf/qpdf/ContentNormalizer.hh
@@ -9,7 +9,6 @@ class ContentNormalizer: public QPDFObjectHandle::TokenFilter @@ -9,7 +9,6 @@ class ContentNormalizer: public QPDFObjectHandle::TokenFilter
9 ContentNormalizer(); 9 ContentNormalizer();
10 virtual ~ContentNormalizer(); 10 virtual ~ContentNormalizer();
11 virtual void handleToken(QPDFTokenizer::Token const&); 11 virtual void handleToken(QPDFTokenizer::Token const&);
12 - virtual void handleEOF();  
13 12
14 bool anyBadTokens() const; 13 bool anyBadTokens() const;
15 bool lastTokenWasBad() const; 14 bool lastTokenWasBad() const;
qpdf/test_driver.cc
@@ -123,7 +123,6 @@ class TokenFilter: public QPDFObjectHandle::TokenFilter @@ -123,7 +123,6 @@ class TokenFilter: public QPDFObjectHandle::TokenFilter
123 { 123 {
124 writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/bye")); 124 writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/bye"));
125 write("\n"); 125 write("\n");
126 - finish();  
127 } 126 }
128 }; 127 };
129 128