Commit 706106dabbebf90542bc1ba04648609baabaca68

Authored by m-holger
1 parent 6371b90a

Refactor QPDFTokenizer::betweenTokens()

include/qpdf/QPDFTokenizer.hh
@@ -216,13 +216,14 @@ class QPDFTokenizer @@ -216,13 +216,14 @@ class QPDFTokenizer
216 st_number, 216 st_number,
217 st_real, 217 st_real,
218 st_decimal, 218 st_decimal,
219 -  
220 st_name_hex1, 219 st_name_hex1,
221 st_name_hex2, 220 st_name_hex2,
  221 + st_before_token,
222 st_token_ready 222 st_token_ready
223 }; 223 };
224 224
225 void handleCharacter(char); 225 void handleCharacter(char);
  226 + void inBeforeToken(char);
226 void inTop(char); 227 void inTop(char);
227 void inSpace(char); 228 void inSpace(char);
228 void inComment(char); 229 void inComment(char);
@@ -257,6 +258,8 @@ class QPDFTokenizer @@ -257,6 +258,8 @@ class QPDFTokenizer
257 std::string val; 258 std::string val;
258 std::string raw_val; 259 std::string raw_val;
259 std::string error_message; 260 std::string error_message;
  261 + bool before_token;
  262 + bool in_token;
260 bool unread_char; 263 bool unread_char;
261 char char_to_unread; 264 char char_to_unread;
262 size_t inline_image_bytes; 265 size_t inline_image_bytes;
libqpdf/QPDFTokenizer.cc
@@ -76,11 +76,13 @@ QPDFWordTokenFinder::check() @@ -76,11 +76,13 @@ QPDFWordTokenFinder::check()
76 void 76 void
77 QPDFTokenizer::reset() 77 QPDFTokenizer::reset()
78 { 78 {
79 - state = st_top; 79 + state = st_before_token;
80 type = tt_bad; 80 type = tt_bad;
81 val.clear(); 81 val.clear();
82 raw_val.clear(); 82 raw_val.clear();
83 error_message = ""; 83 error_message = "";
  84 + before_token = true;
  85 + in_token = false;
84 unread_char = false; 86 unread_char = false;
85 char_to_unread = '\0'; 87 char_to_unread = '\0';
86 inline_image_bytes = 0; 88 inline_image_bytes = 0;
@@ -136,8 +138,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -136,8 +138,7 @@ QPDFTokenizer::presentCharacter(char ch)
136 { 138 {
137 handleCharacter(ch); 139 handleCharacter(ch);
138 140
139 - if (!(betweenTokens() ||  
140 - ((this->state == st_token_ready) && this->unread_char))) { 141 + if (this->in_token && !this->unread_char) {
141 this->raw_val += ch; 142 this->raw_val += ch;
142 } 143 }
143 } 144 }
@@ -230,6 +231,10 @@ QPDFTokenizer::handleCharacter(char ch) @@ -230,6 +231,10 @@ QPDFTokenizer::handleCharacter(char ch)
230 inDecimal(ch); 231 inDecimal(ch);
231 return; 232 return;
232 233
  234 + case (st_before_token):
  235 + inBeforeToken(ch);
  236 + return;
  237 +
233 case (st_token_ready): 238 case (st_token_ready):
234 inTokenReady(ch); 239 inTokenReady(ch);
235 return; 240 return;
@@ -248,26 +253,35 @@ QPDFTokenizer::inTokenReady(char ch) @@ -248,26 +253,35 @@ QPDFTokenizer::inTokenReady(char ch)
248 } 253 }
249 254
250 void 255 void
251 -QPDFTokenizer::inTop(char ch) 256 +QPDFTokenizer::inBeforeToken(char ch)
252 { 257 {
253 // Note: we specifically do not use ctype here. It is 258 // Note: we specifically do not use ctype here. It is
254 // locale-dependent. 259 // locale-dependent.
255 if (isSpace(ch)) { 260 if (isSpace(ch)) {
  261 + this->before_token = !this->include_ignorable;
  262 + this->in_token = this->include_ignorable;
256 if (this->include_ignorable) { 263 if (this->include_ignorable) {
257 this->state = st_in_space; 264 this->state = st_in_space;
258 this->val += ch; 265 this->val += ch;
259 - return;  
260 } 266 }
261 - return;  
262 - }  
263 - switch (ch) {  
264 - case '%': 267 + } else if (ch == '%') {
  268 + this->before_token = !this->include_ignorable;
  269 + this->in_token = this->include_ignorable;
265 this->state = st_in_comment; 270 this->state = st_in_comment;
266 if (this->include_ignorable) { 271 if (this->include_ignorable) {
267 this->val += ch; 272 this->val += ch;
268 } 273 }
269 - return; 274 + } else {
  275 + this->before_token = false;
  276 + this->in_token = true;
  277 + inTop(ch);
  278 + }
  279 +}
270 280
  281 +void
  282 +QPDFTokenizer::inTop(char ch)
  283 +{
  284 + switch (ch) {
271 case '(': 285 case '(':
272 this->string_depth = 1; 286 this->string_depth = 1;
273 this->state = st_in_string; 287 this->state = st_in_string;
@@ -376,7 +390,7 @@ QPDFTokenizer::inComment(char ch) @@ -376,7 +390,7 @@ QPDFTokenizer::inComment(char ch)
376 this->char_to_unread = ch; 390 this->char_to_unread = ch;
377 this->state = st_token_ready; 391 this->state = st_token_ready;
378 } else { 392 } else {
379 - this->state = st_top; 393 + this->state = st_before_token;
380 } 394 }
381 } else if (this->include_ignorable) { 395 } else if (this->include_ignorable) {
382 this->val += ch; 396 this->val += ch;
@@ -799,6 +813,7 @@ QPDFTokenizer::presentEOF() @@ -799,6 +813,7 @@ QPDFTokenizer::presentEOF()
799 break; 813 break;
800 814
801 case st_top: 815 case st_top:
  816 + case st_before_token:
802 this->type = tt_eof; 817 this->type = tt_eof;
803 break; 818 break;
804 819
@@ -824,11 +839,13 @@ QPDFTokenizer::presentEOF() @@ -824,11 +839,13 @@ QPDFTokenizer::presentEOF()
824 void 839 void
825 QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) 840 QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input)
826 { 841 {
827 - if (this->state != st_top) { 842 + if (this->state != st_before_token) {
828 throw std::logic_error("QPDFTokenizer::expectInlineImage called" 843 throw std::logic_error("QPDFTokenizer::expectInlineImage called"
829 " when tokenizer is in improper state"); 844 " when tokenizer is in improper state");
830 } 845 }
831 findEI(input); 846 findEI(input);
  847 + this->before_token = false;
  848 + this->in_token = true;
832 this->state = st_inline_image; 849 this->state = st_inline_image;
833 } 850 }
834 851
@@ -949,10 +966,7 @@ QPDFTokenizer::getToken(Token&amp; token, bool&amp; unread_char, char&amp; ch) @@ -949,10 +966,7 @@ QPDFTokenizer::getToken(Token&amp; token, bool&amp; unread_char, char&amp; ch)
949 bool 966 bool
950 QPDFTokenizer::betweenTokens() 967 QPDFTokenizer::betweenTokens()
951 { 968 {
952 - return (  
953 - (this->state == st_top) ||  
954 - ((!this->include_ignorable) &&  
955 - ((this->state == st_in_comment) || (this->state == st_in_space)))); 969 + return this->before_token;
956 } 970 }
957 971
958 QPDFTokenizer::Token 972 QPDFTokenizer::Token
@@ -987,7 +1001,7 @@ QPDFTokenizer::readToken( @@ -987,7 +1001,7 @@ QPDFTokenizer::readToken(
987 } 1001 }
988 } else { 1002 } else {
989 presentCharacter(ch); 1003 presentCharacter(ch);
990 - if (betweenTokens() && (input->getLastOffset() == offset)) { 1004 + if (this->before_token && (input->getLastOffset() == offset)) {
991 ++offset; 1005 ++offset;
992 } 1006 }
993 if (max_len && (this->raw_val.length() >= max_len) && 1007 if (max_len && (this->raw_val.length() >= max_len) &&