Commit 706106dabbebf90542bc1ba04648609baabaca68
1 parent
6371b90a
Refactor QPDFTokenizer::betweenTokens()
Showing
2 changed files
with
35 additions
and
18 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -216,13 +216,14 @@ class QPDFTokenizer | @@ -216,13 +216,14 @@ class QPDFTokenizer | ||
| 216 | st_number, | 216 | st_number, |
| 217 | st_real, | 217 | st_real, |
| 218 | st_decimal, | 218 | st_decimal, |
| 219 | - | ||
| 220 | st_name_hex1, | 219 | st_name_hex1, |
| 221 | st_name_hex2, | 220 | st_name_hex2, |
| 221 | + st_before_token, | ||
| 222 | st_token_ready | 222 | st_token_ready |
| 223 | }; | 223 | }; |
| 224 | 224 | ||
| 225 | void handleCharacter(char); | 225 | void handleCharacter(char); |
| 226 | + void inBeforeToken(char); | ||
| 226 | void inTop(char); | 227 | void inTop(char); |
| 227 | void inSpace(char); | 228 | void inSpace(char); |
| 228 | void inComment(char); | 229 | void inComment(char); |
| @@ -257,6 +258,8 @@ class QPDFTokenizer | @@ -257,6 +258,8 @@ class QPDFTokenizer | ||
| 257 | std::string val; | 258 | std::string val; |
| 258 | std::string raw_val; | 259 | std::string raw_val; |
| 259 | std::string error_message; | 260 | std::string error_message; |
| 261 | + bool before_token; | ||
| 262 | + bool in_token; | ||
| 260 | bool unread_char; | 263 | bool unread_char; |
| 261 | char char_to_unread; | 264 | char char_to_unread; |
| 262 | size_t inline_image_bytes; | 265 | size_t inline_image_bytes; |
libqpdf/QPDFTokenizer.cc
| @@ -76,11 +76,13 @@ QPDFWordTokenFinder::check() | @@ -76,11 +76,13 @@ QPDFWordTokenFinder::check() | ||
| 76 | void | 76 | void |
| 77 | QPDFTokenizer::reset() | 77 | QPDFTokenizer::reset() |
| 78 | { | 78 | { |
| 79 | - state = st_top; | 79 | + state = st_before_token; |
| 80 | type = tt_bad; | 80 | type = tt_bad; |
| 81 | val.clear(); | 81 | val.clear(); |
| 82 | raw_val.clear(); | 82 | raw_val.clear(); |
| 83 | error_message = ""; | 83 | error_message = ""; |
| 84 | + before_token = true; | ||
| 85 | + in_token = false; | ||
| 84 | unread_char = false; | 86 | unread_char = false; |
| 85 | char_to_unread = '\0'; | 87 | char_to_unread = '\0'; |
| 86 | inline_image_bytes = 0; | 88 | inline_image_bytes = 0; |
| @@ -136,8 +138,7 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -136,8 +138,7 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 136 | { | 138 | { |
| 137 | handleCharacter(ch); | 139 | handleCharacter(ch); |
| 138 | 140 | ||
| 139 | - if (!(betweenTokens() || | ||
| 140 | - ((this->state == st_token_ready) && this->unread_char))) { | 141 | + if (this->in_token && !this->unread_char) { |
| 141 | this->raw_val += ch; | 142 | this->raw_val += ch; |
| 142 | } | 143 | } |
| 143 | } | 144 | } |
| @@ -230,6 +231,10 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -230,6 +231,10 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 230 | inDecimal(ch); | 231 | inDecimal(ch); |
| 231 | return; | 232 | return; |
| 232 | 233 | ||
| 234 | + case (st_before_token): | ||
| 235 | + inBeforeToken(ch); | ||
| 236 | + return; | ||
| 237 | + | ||
| 233 | case (st_token_ready): | 238 | case (st_token_ready): |
| 234 | inTokenReady(ch); | 239 | inTokenReady(ch); |
| 235 | return; | 240 | return; |
| @@ -248,26 +253,35 @@ QPDFTokenizer::inTokenReady(char ch) | @@ -248,26 +253,35 @@ QPDFTokenizer::inTokenReady(char ch) | ||
| 248 | } | 253 | } |
| 249 | 254 | ||
| 250 | void | 255 | void |
| 251 | -QPDFTokenizer::inTop(char ch) | 256 | +QPDFTokenizer::inBeforeToken(char ch) |
| 252 | { | 257 | { |
| 253 | // Note: we specifically do not use ctype here. It is | 258 | // Note: we specifically do not use ctype here. It is |
| 254 | // locale-dependent. | 259 | // locale-dependent. |
| 255 | if (isSpace(ch)) { | 260 | if (isSpace(ch)) { |
| 261 | + this->before_token = !this->include_ignorable; | ||
| 262 | + this->in_token = this->include_ignorable; | ||
| 256 | if (this->include_ignorable) { | 263 | if (this->include_ignorable) { |
| 257 | this->state = st_in_space; | 264 | this->state = st_in_space; |
| 258 | this->val += ch; | 265 | this->val += ch; |
| 259 | - return; | ||
| 260 | } | 266 | } |
| 261 | - return; | ||
| 262 | - } | ||
| 263 | - switch (ch) { | ||
| 264 | - case '%': | 267 | + } else if (ch == '%') { |
| 268 | + this->before_token = !this->include_ignorable; | ||
| 269 | + this->in_token = this->include_ignorable; | ||
| 265 | this->state = st_in_comment; | 270 | this->state = st_in_comment; |
| 266 | if (this->include_ignorable) { | 271 | if (this->include_ignorable) { |
| 267 | this->val += ch; | 272 | this->val += ch; |
| 268 | } | 273 | } |
| 269 | - return; | 274 | + } else { |
| 275 | + this->before_token = false; | ||
| 276 | + this->in_token = true; | ||
| 277 | + inTop(ch); | ||
| 278 | + } | ||
| 279 | +} | ||
| 270 | 280 | ||
| 281 | +void | ||
| 282 | +QPDFTokenizer::inTop(char ch) | ||
| 283 | +{ | ||
| 284 | + switch (ch) { | ||
| 271 | case '(': | 285 | case '(': |
| 272 | this->string_depth = 1; | 286 | this->string_depth = 1; |
| 273 | this->state = st_in_string; | 287 | this->state = st_in_string; |
| @@ -376,7 +390,7 @@ QPDFTokenizer::inComment(char ch) | @@ -376,7 +390,7 @@ QPDFTokenizer::inComment(char ch) | ||
| 376 | this->char_to_unread = ch; | 390 | this->char_to_unread = ch; |
| 377 | this->state = st_token_ready; | 391 | this->state = st_token_ready; |
| 378 | } else { | 392 | } else { |
| 379 | - this->state = st_top; | 393 | + this->state = st_before_token; |
| 380 | } | 394 | } |
| 381 | } else if (this->include_ignorable) { | 395 | } else if (this->include_ignorable) { |
| 382 | this->val += ch; | 396 | this->val += ch; |
| @@ -799,6 +813,7 @@ QPDFTokenizer::presentEOF() | @@ -799,6 +813,7 @@ QPDFTokenizer::presentEOF() | ||
| 799 | break; | 813 | break; |
| 800 | 814 | ||
| 801 | case st_top: | 815 | case st_top: |
| 816 | + case st_before_token: | ||
| 802 | this->type = tt_eof; | 817 | this->type = tt_eof; |
| 803 | break; | 818 | break; |
| 804 | 819 | ||
| @@ -824,11 +839,13 @@ QPDFTokenizer::presentEOF() | @@ -824,11 +839,13 @@ QPDFTokenizer::presentEOF() | ||
| 824 | void | 839 | void |
| 825 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) | 840 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) |
| 826 | { | 841 | { |
| 827 | - if (this->state != st_top) { | 842 | + if (this->state != st_before_token) { |
| 828 | throw std::logic_error("QPDFTokenizer::expectInlineImage called" | 843 | throw std::logic_error("QPDFTokenizer::expectInlineImage called" |
| 829 | " when tokenizer is in improper state"); | 844 | " when tokenizer is in improper state"); |
| 830 | } | 845 | } |
| 831 | findEI(input); | 846 | findEI(input); |
| 847 | + this->before_token = false; | ||
| 848 | + this->in_token = true; | ||
| 832 | this->state = st_inline_image; | 849 | this->state = st_inline_image; |
| 833 | } | 850 | } |
| 834 | 851 | ||
| @@ -949,10 +966,7 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) | @@ -949,10 +966,7 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) | ||
| 949 | bool | 966 | bool |
| 950 | QPDFTokenizer::betweenTokens() | 967 | QPDFTokenizer::betweenTokens() |
| 951 | { | 968 | { |
| 952 | - return ( | ||
| 953 | - (this->state == st_top) || | ||
| 954 | - ((!this->include_ignorable) && | ||
| 955 | - ((this->state == st_in_comment) || (this->state == st_in_space)))); | 969 | + return this->before_token; |
| 956 | } | 970 | } |
| 957 | 971 | ||
| 958 | QPDFTokenizer::Token | 972 | QPDFTokenizer::Token |
| @@ -987,7 +1001,7 @@ QPDFTokenizer::readToken( | @@ -987,7 +1001,7 @@ QPDFTokenizer::readToken( | ||
| 987 | } | 1001 | } |
| 988 | } else { | 1002 | } else { |
| 989 | presentCharacter(ch); | 1003 | presentCharacter(ch); |
| 990 | - if (betweenTokens() && (input->getLastOffset() == offset)) { | 1004 | + if (this->before_token && (input->getLastOffset() == offset)) { |
| 991 | ++offset; | 1005 | ++offset; |
| 992 | } | 1006 | } |
| 993 | if (max_len && (this->raw_val.length() >= max_len) && | 1007 | if (max_len && (this->raw_val.length() >= max_len) && |