Commit ec538792fac039daa9636f9c94000b7bc1f3a669

Authored by Jay Berkenbilt
1 parent 5b5f45e9

Use inline image token type in tokenizer filter

libqpdf/Pl_QPDFTokenizer.cc
@@ -12,10 +12,8 @@ Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) : @@ -12,10 +12,8 @@ Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
12 just_wrote_nl(false), 12 just_wrote_nl(false),
13 last_char_was_cr(false), 13 last_char_was_cr(false),
14 unread_char(false), 14 unread_char(false),
15 - char_to_unread('\0'),  
16 - in_inline_image(false) 15 + char_to_unread('\0')
17 { 16 {
18 - memset(this->image_buf, 0, IMAGE_BUF_SIZE);  
19 } 17 }
20 18
21 Pl_QPDFTokenizer::~Pl_QPDFTokenizer() 19 Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
@@ -56,37 +54,6 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token) @@ -56,37 +54,6 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
56 void 54 void
57 Pl_QPDFTokenizer::processChar(char ch) 55 Pl_QPDFTokenizer::processChar(char ch)
58 { 56 {
59 - if (this->in_inline_image)  
60 - {  
61 - // Scan through the input looking for EI surrounded by  
62 - // whitespace. If that pattern appears in the inline image's  
63 - // representation, we're hosed, but this situation seems  
64 - // excessively unlikely, and this code path is only followed  
65 - // during content stream normalization, which is pretty much  
66 - // used for debugging and human inspection of PDF files.  
67 - memmove(this->image_buf,  
68 - this->image_buf + 1,  
69 - IMAGE_BUF_SIZE - 1);  
70 - this->image_buf[IMAGE_BUF_SIZE - 1] = ch;  
71 - if (strchr(" \t\n\v\f\r", this->image_buf[0]) &&  
72 - (this->image_buf[1] == 'E') &&  
73 - (this->image_buf[2] == 'I') &&  
74 - strchr(" \t\n\v\f\r", this->image_buf[3]))  
75 - {  
76 - // We've found an EI operator. We've already written the  
77 - // EI operator to output; terminate with a newline  
78 - // character and resume normal processing.  
79 - writeNext("\n", 1);  
80 - this->in_inline_image = false;  
81 - QTC::TC("qpdf", "Pl_QPDFTokenizer found EI");  
82 - }  
83 - else  
84 - {  
85 - writeNext(&ch, 1);  
86 - }  
87 - return;  
88 - }  
89 -  
90 tokenizer.presentCharacter(ch); 57 tokenizer.presentCharacter(ch);
91 QPDFTokenizer::Token token; 58 QPDFTokenizer::Token token;
92 if (tokenizer.getToken(token, this->unread_char, this->char_to_unread)) 59 if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
@@ -100,13 +67,8 @@ Pl_QPDFTokenizer::processChar(char ch) @@ -100,13 +67,8 @@ Pl_QPDFTokenizer::processChar(char ch)
100 if ((token.getType() == QPDFTokenizer::tt_word) && 67 if ((token.getType() == QPDFTokenizer::tt_word) &&
101 (token.getValue() == "ID")) 68 (token.getValue() == "ID"))
102 { 69 {
103 - // Suspend normal scanning until we find an EI token.  
104 - this->in_inline_image = true;  
105 - if (this->unread_char)  
106 - {  
107 - writeNext(&this->char_to_unread, 1);  
108 - this->unread_char = false;  
109 - } 70 + QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
  71 + tokenizer.expectInlineImage();
110 } 72 }
111 } 73 }
112 else 74 else
@@ -171,21 +133,18 @@ void @@ -171,21 +133,18 @@ void
171 Pl_QPDFTokenizer::finish() 133 Pl_QPDFTokenizer::finish()
172 { 134 {
173 this->tokenizer.presentEOF(); 135 this->tokenizer.presentEOF();
174 - if (! this->in_inline_image) 136 + QPDFTokenizer::Token token;
  137 + if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
175 { 138 {
176 - QPDFTokenizer::Token token;  
177 - if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))  
178 - {  
179 - writeToken(token);  
180 - if (unread_char)  
181 - {  
182 - if (this->char_to_unread == '\r')  
183 - {  
184 - this->char_to_unread = '\n';  
185 - }  
186 - writeNext(&this->char_to_unread, 1);  
187 - }  
188 - } 139 + writeToken(token);
  140 + if (unread_char)
  141 + {
  142 + if (this->char_to_unread == '\r')
  143 + {
  144 + this->char_to_unread = '\n';
  145 + }
  146 + writeNext(&this->char_to_unread, 1);
  147 + }
189 } 148 }
190 if (! this->just_wrote_nl) 149 if (! this->just_wrote_nl)
191 { 150 {
libqpdf/qpdf/Pl_QPDFTokenizer.hh
@@ -33,9 +33,6 @@ class Pl_QPDFTokenizer: public Pipeline @@ -33,9 +33,6 @@ class Pl_QPDFTokenizer: public Pipeline
33 bool last_char_was_cr; 33 bool last_char_was_cr;
34 bool unread_char; 34 bool unread_char;
35 char char_to_unread; 35 char char_to_unread;
36 - bool in_inline_image;  
37 - static int const IMAGE_BUF_SIZE = 4; // must be >= 4  
38 - char image_buf[IMAGE_BUF_SIZE];  
39 }; 36 };
40 37
41 #endif // __PL_QPDFTOKENIZER_HH__ 38 #endif // __PL_QPDFTOKENIZER_HH__
qpdf/qpdf.testcov
@@ -182,7 +182,6 @@ QPDF_Stream getRawStreamData 0 @@ -182,7 +182,6 @@ QPDF_Stream getRawStreamData 0
182 QPDF_Stream getStreamData 0 182 QPDF_Stream getStreamData 0
183 QPDF_Stream expand filter abbreviation 0 183 QPDF_Stream expand filter abbreviation 0
184 qpdf-c called qpdf_read_memory 0 184 qpdf-c called qpdf_read_memory 0
185 -Pl_QPDFTokenizer found EI 0  
186 QPDF stream without newline 0 185 QPDF stream without newline 0
187 QPDF stream with CR only 0 186 QPDF stream with CR only 0
188 QPDF stream with CRNL 0 187 QPDF stream with CRNL 0
@@ -304,3 +303,4 @@ qpdf-c called qpdf_set_newline_before_endstream 0 @@ -304,3 +303,4 @@ qpdf-c called qpdf_set_newline_before_endstream 0
304 QPDF_Stream TIFF predictor 0 303 QPDF_Stream TIFF predictor 0
305 QPDFTokenizer EOF when not allowed 0 304 QPDFTokenizer EOF when not allowed 0
306 QPDFTokenizer inline image at EOF 0 305 QPDFTokenizer inline image at EOF 0
  306 +Pl_QPDFTokenizer found ID 0