Commit 706106dabbebf90542bc1ba04648609baabaca68

Authored by m-holger
1 parent 6371b90a

Refactor QPDFTokenizer::betweenTokens()

include/qpdf/QPDFTokenizer.hh
... ... @@ -216,13 +216,14 @@ class QPDFTokenizer
216 216 st_number,
217 217 st_real,
218 218 st_decimal,
219   -
220 219 st_name_hex1,
221 220 st_name_hex2,
  221 + st_before_token,
222 222 st_token_ready
223 223 };
224 224  
225 225 void handleCharacter(char);
  226 + void inBeforeToken(char);
226 227 void inTop(char);
227 228 void inSpace(char);
228 229 void inComment(char);
... ... @@ -257,6 +258,8 @@ class QPDFTokenizer
257 258 std::string val;
258 259 std::string raw_val;
259 260 std::string error_message;
  261 + bool before_token;
  262 + bool in_token;
260 263 bool unread_char;
261 264 char char_to_unread;
262 265 size_t inline_image_bytes;
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -76,11 +76,13 @@ QPDFWordTokenFinder::check()
76 76 void
77 77 QPDFTokenizer::reset()
78 78 {
79   - state = st_top;
  79 + state = st_before_token;
80 80 type = tt_bad;
81 81 val.clear();
82 82 raw_val.clear();
83 83 error_message = "";
  84 + before_token = true;
  85 + in_token = false;
84 86 unread_char = false;
85 87 char_to_unread = '\0';
86 88 inline_image_bytes = 0;
... ... @@ -136,8 +138,7 @@ QPDFTokenizer::presentCharacter(char ch)
136 138 {
137 139 handleCharacter(ch);
138 140  
139   - if (!(betweenTokens() ||
140   - ((this->state == st_token_ready) && this->unread_char))) {
  141 + if (this->in_token && !this->unread_char) {
141 142 this->raw_val += ch;
142 143 }
143 144 }
... ... @@ -230,6 +231,10 @@ QPDFTokenizer::handleCharacter(char ch)
230 231 inDecimal(ch);
231 232 return;
232 233  
  234 + case (st_before_token):
  235 + inBeforeToken(ch);
  236 + return;
  237 +
233 238 case (st_token_ready):
234 239 inTokenReady(ch);
235 240 return;
... ... @@ -248,26 +253,35 @@ QPDFTokenizer::inTokenReady(char ch)
248 253 }
249 254  
250 255 void
251   -QPDFTokenizer::inTop(char ch)
  256 +QPDFTokenizer::inBeforeToken(char ch)
252 257 {
253 258 // Note: we specifically do not use ctype here. It is
254 259 // locale-dependent.
255 260 if (isSpace(ch)) {
  261 + this->before_token = !this->include_ignorable;
  262 + this->in_token = this->include_ignorable;
256 263 if (this->include_ignorable) {
257 264 this->state = st_in_space;
258 265 this->val += ch;
259   - return;
260 266 }
261   - return;
262   - }
263   - switch (ch) {
264   - case '%':
  267 + } else if (ch == '%') {
  268 + this->before_token = !this->include_ignorable;
  269 + this->in_token = this->include_ignorable;
265 270 this->state = st_in_comment;
266 271 if (this->include_ignorable) {
267 272 this->val += ch;
268 273 }
269   - return;
  274 + } else {
  275 + this->before_token = false;
  276 + this->in_token = true;
  277 + inTop(ch);
  278 + }
  279 +}
270 280  
  281 +void
  282 +QPDFTokenizer::inTop(char ch)
  283 +{
  284 + switch (ch) {
271 285 case '(':
272 286 this->string_depth = 1;
273 287 this->state = st_in_string;
... ... @@ -376,7 +390,7 @@ QPDFTokenizer::inComment(char ch)
376 390 this->char_to_unread = ch;
377 391 this->state = st_token_ready;
378 392 } else {
379   - this->state = st_top;
  393 + this->state = st_before_token;
380 394 }
381 395 } else if (this->include_ignorable) {
382 396 this->val += ch;
... ... @@ -799,6 +813,7 @@ QPDFTokenizer::presentEOF()
799 813 break;
800 814  
801 815 case st_top:
  816 + case st_before_token:
802 817 this->type = tt_eof;
803 818 break;
804 819  
... ... @@ -824,11 +839,13 @@ QPDFTokenizer::presentEOF()
824 839 void
825 840 QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input)
826 841 {
827   - if (this->state != st_top) {
  842 + if (this->state != st_before_token) {
828 843 throw std::logic_error("QPDFTokenizer::expectInlineImage called"
829 844 " when tokenizer is in improper state");
830 845 }
831 846 findEI(input);
  847 + this->before_token = false;
  848 + this->in_token = true;
832 849 this->state = st_inline_image;
833 850 }
834 851  
... ... @@ -949,10 +966,7 @@ QPDFTokenizer::getToken(Token&amp; token, bool&amp; unread_char, char&amp; ch)
949 966 bool
950 967 QPDFTokenizer::betweenTokens()
951 968 {
952   - return (
953   - (this->state == st_top) ||
954   - ((!this->include_ignorable) &&
955   - ((this->state == st_in_comment) || (this->state == st_in_space))));
  969 + return this->before_token;
956 970 }
957 971  
958 972 QPDFTokenizer::Token
... ... @@ -987,7 +1001,7 @@ QPDFTokenizer::readToken(
987 1001 }
988 1002 } else {
989 1003 presentCharacter(ch);
990   - if (betweenTokens() && (input->getLastOffset() == offset)) {
  1004 + if (this->before_token && (input->getLastOffset() == offset)) {
991 1005 ++offset;
992 1006 }
993 1007 if (max_len && (this->raw_val.length() >= max_len) &&
... ...