Commit 706106dabbebf90542bc1ba04648609baabaca68
1 parent
6371b90a
Refactor QPDFTokenizer::betweenTokens()
Showing
2 changed files
with
35 additions
and
18 deletions
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -216,13 +216,14 @@ class QPDFTokenizer |
| 216 | 216 | st_number, |
| 217 | 217 | st_real, |
| 218 | 218 | st_decimal, |
| 219 | - | |
| 220 | 219 | st_name_hex1, |
| 221 | 220 | st_name_hex2, |
| 221 | + st_before_token, | |
| 222 | 222 | st_token_ready |
| 223 | 223 | }; |
| 224 | 224 | |
| 225 | 225 | void handleCharacter(char); |
| 226 | + void inBeforeToken(char); | |
| 226 | 227 | void inTop(char); |
| 227 | 228 | void inSpace(char); |
| 228 | 229 | void inComment(char); |
| ... | ... | @@ -257,6 +258,8 @@ class QPDFTokenizer |
| 257 | 258 | std::string val; |
| 258 | 259 | std::string raw_val; |
| 259 | 260 | std::string error_message; |
| 261 | + bool before_token; | |
| 262 | + bool in_token; | |
| 260 | 263 | bool unread_char; |
| 261 | 264 | char char_to_unread; |
| 262 | 265 | size_t inline_image_bytes; | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -76,11 +76,13 @@ QPDFWordTokenFinder::check() |
| 76 | 76 | void |
| 77 | 77 | QPDFTokenizer::reset() |
| 78 | 78 | { |
| 79 | - state = st_top; | |
| 79 | + state = st_before_token; | |
| 80 | 80 | type = tt_bad; |
| 81 | 81 | val.clear(); |
| 82 | 82 | raw_val.clear(); |
| 83 | 83 | error_message = ""; |
| 84 | + before_token = true; | |
| 85 | + in_token = false; | |
| 84 | 86 | unread_char = false; |
| 85 | 87 | char_to_unread = '\0'; |
| 86 | 88 | inline_image_bytes = 0; |
| ... | ... | @@ -136,8 +138,7 @@ QPDFTokenizer::presentCharacter(char ch) |
| 136 | 138 | { |
| 137 | 139 | handleCharacter(ch); |
| 138 | 140 | |
| 139 | - if (!(betweenTokens() || | |
| 140 | - ((this->state == st_token_ready) && this->unread_char))) { | |
| 141 | + if (this->in_token && !this->unread_char) { | |
| 141 | 142 | this->raw_val += ch; |
| 142 | 143 | } |
| 143 | 144 | } |
| ... | ... | @@ -230,6 +231,10 @@ QPDFTokenizer::handleCharacter(char ch) |
| 230 | 231 | inDecimal(ch); |
| 231 | 232 | return; |
| 232 | 233 | |
| 234 | + case (st_before_token): | |
| 235 | + inBeforeToken(ch); | |
| 236 | + return; | |
| 237 | + | |
| 233 | 238 | case (st_token_ready): |
| 234 | 239 | inTokenReady(ch); |
| 235 | 240 | return; |
| ... | ... | @@ -248,26 +253,35 @@ QPDFTokenizer::inTokenReady(char ch) |
| 248 | 253 | } |
| 249 | 254 | |
| 250 | 255 | void |
| 251 | -QPDFTokenizer::inTop(char ch) | |
| 256 | +QPDFTokenizer::inBeforeToken(char ch) | |
| 252 | 257 | { |
| 253 | 258 | // Note: we specifically do not use ctype here. It is |
| 254 | 259 | // locale-dependent. |
| 255 | 260 | if (isSpace(ch)) { |
| 261 | + this->before_token = !this->include_ignorable; | |
| 262 | + this->in_token = this->include_ignorable; | |
| 256 | 263 | if (this->include_ignorable) { |
| 257 | 264 | this->state = st_in_space; |
| 258 | 265 | this->val += ch; |
| 259 | - return; | |
| 260 | 266 | } |
| 261 | - return; | |
| 262 | - } | |
| 263 | - switch (ch) { | |
| 264 | - case '%': | |
| 267 | + } else if (ch == '%') { | |
| 268 | + this->before_token = !this->include_ignorable; | |
| 269 | + this->in_token = this->include_ignorable; | |
| 265 | 270 | this->state = st_in_comment; |
| 266 | 271 | if (this->include_ignorable) { |
| 267 | 272 | this->val += ch; |
| 268 | 273 | } |
| 269 | - return; | |
| 274 | + } else { | |
| 275 | + this->before_token = false; | |
| 276 | + this->in_token = true; | |
| 277 | + inTop(ch); | |
| 278 | + } | |
| 279 | +} | |
| 270 | 280 | |
| 281 | +void | |
| 282 | +QPDFTokenizer::inTop(char ch) | |
| 283 | +{ | |
| 284 | + switch (ch) { | |
| 271 | 285 | case '(': |
| 272 | 286 | this->string_depth = 1; |
| 273 | 287 | this->state = st_in_string; |
| ... | ... | @@ -376,7 +390,7 @@ QPDFTokenizer::inComment(char ch) |
| 376 | 390 | this->char_to_unread = ch; |
| 377 | 391 | this->state = st_token_ready; |
| 378 | 392 | } else { |
| 379 | - this->state = st_top; | |
| 393 | + this->state = st_before_token; | |
| 380 | 394 | } |
| 381 | 395 | } else if (this->include_ignorable) { |
| 382 | 396 | this->val += ch; |
| ... | ... | @@ -799,6 +813,7 @@ QPDFTokenizer::presentEOF() |
| 799 | 813 | break; |
| 800 | 814 | |
| 801 | 815 | case st_top: |
| 816 | + case st_before_token: | |
| 802 | 817 | this->type = tt_eof; |
| 803 | 818 | break; |
| 804 | 819 | |
| ... | ... | @@ -824,11 +839,13 @@ QPDFTokenizer::presentEOF() |
| 824 | 839 | void |
| 825 | 840 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) |
| 826 | 841 | { |
| 827 | - if (this->state != st_top) { | |
| 842 | + if (this->state != st_before_token) { | |
| 828 | 843 | throw std::logic_error("QPDFTokenizer::expectInlineImage called" |
| 829 | 844 | " when tokenizer is in improper state"); |
| 830 | 845 | } |
| 831 | 846 | findEI(input); |
| 847 | + this->before_token = false; | |
| 848 | + this->in_token = true; | |
| 832 | 849 | this->state = st_inline_image; |
| 833 | 850 | } |
| 834 | 851 | |
| ... | ... | @@ -949,10 +966,7 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) |
| 949 | 966 | bool |
| 950 | 967 | QPDFTokenizer::betweenTokens() |
| 951 | 968 | { |
| 952 | - return ( | |
| 953 | - (this->state == st_top) || | |
| 954 | - ((!this->include_ignorable) && | |
| 955 | - ((this->state == st_in_comment) || (this->state == st_in_space)))); | |
| 969 | + return this->before_token; | |
| 956 | 970 | } |
| 957 | 971 | |
| 958 | 972 | QPDFTokenizer::Token |
| ... | ... | @@ -987,7 +1001,7 @@ QPDFTokenizer::readToken( |
| 987 | 1001 | } |
| 988 | 1002 | } else { |
| 989 | 1003 | presentCharacter(ch); |
| 990 | - if (betweenTokens() && (input->getLastOffset() == offset)) { | |
| 1004 | + if (this->before_token && (input->getLastOffset() == offset)) { | |
| 991 | 1005 | ++offset; |
| 992 | 1006 | } |
| 993 | 1007 | if (max_len && (this->raw_val.length() >= max_len) && | ... | ... |