Commit fe33b7ca18ced0654313ea5abba461ac59e887b3
1 parent
931fbb61
Integrate numbers into state machine in QPDFTokenizer
Showing
2 changed files
with
115 additions
and
10 deletions
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -213,6 +213,11 @@ class QPDFTokenizer |
| 213 | 213 | st_lt, |
| 214 | 214 | st_gt, |
| 215 | 215 | st_inline_image, |
| 216 | + st_sign, | |
| 217 | + st_number, | |
| 218 | + st_real, | |
| 219 | + st_decimal, | |
| 220 | + | |
| 216 | 221 | st_name_hex1, |
| 217 | 222 | st_name_hex2, |
| 218 | 223 | st_token_ready |
| ... | ... | @@ -236,6 +241,10 @@ class QPDFTokenizer |
| 236 | 241 | void inTokenReady(char); |
| 237 | 242 | void inNameHex1(char); |
| 238 | 243 | void inNameHex2(char); |
| 244 | + void inSign(char); | |
| 245 | + void inDecimal(char); | |
| 246 | + void inNumber(char); | |
| 247 | + void inReal(char); | |
| 239 | 248 | void reset(); |
| 240 | 249 | |
| 241 | 250 | // Lexer state | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -134,13 +134,7 @@ QPDFTokenizer::isDelimiter(char ch) |
| 134 | 134 | void |
| 135 | 135 | QPDFTokenizer::resolveLiteral() |
| 136 | 136 | { |
| 137 | - if (QUtil::is_number(this->val.c_str())) { | |
| 138 | - if (this->val.find('.') != std::string::npos) { | |
| 139 | - this->type = tt_real; | |
| 140 | - } else { | |
| 141 | - this->type = tt_integer; | |
| 142 | - } | |
| 143 | - } else if ((this->val == "true") || (this->val == "false")) { | |
| 137 | + if ((this->val == "true") || (this->val == "false")) { | |
| 144 | 138 | this->type = tt_bool; |
| 145 | 139 | } else if (this->val == "null") { |
| 146 | 140 | this->type = tt_null; |
| ... | ... | @@ -205,6 +199,14 @@ QPDFTokenizer::handleCharacter(char ch) |
| 205 | 199 | inName(ch); |
| 206 | 200 | return; |
| 207 | 201 | |
| 202 | + case st_number: | |
| 203 | + inNumber(ch); | |
| 204 | + return; | |
| 205 | + | |
| 206 | + case st_real: | |
| 207 | + inReal(ch); | |
| 208 | + return; | |
| 209 | + | |
| 208 | 210 | case st_string_after_cr: |
| 209 | 211 | inStringAfterCR(ch); |
| 210 | 212 | return; |
| ... | ... | @@ -224,7 +226,6 @@ QPDFTokenizer::handleCharacter(char ch) |
| 224 | 226 | case st_inline_image: |
| 225 | 227 | inInlineImage(ch); |
| 226 | 228 | return; |
| 227 | - this->val += ch; | |
| 228 | 229 | |
| 229 | 230 | case st_in_hexstring: |
| 230 | 231 | inHexstring(ch); |
| ... | ... | @@ -242,6 +243,14 @@ QPDFTokenizer::handleCharacter(char ch) |
| 242 | 243 | inNameHex2(ch); |
| 243 | 244 | return; |
| 244 | 245 | |
| 246 | + case st_sign: | |
| 247 | + inSign(ch); | |
| 248 | + return; | |
| 249 | + | |
| 250 | + case st_decimal: | |
| 251 | + inDecimal(ch); | |
| 252 | + return; | |
| 253 | + | |
| 245 | 254 | case (st_token_ready): |
| 246 | 255 | inTokenReady(ch); |
| 247 | 256 | return; |
| ... | ... | @@ -330,6 +339,31 @@ QPDFTokenizer::inTop(char ch) |
| 330 | 339 | this->val += ch; |
| 331 | 340 | return; |
| 332 | 341 | |
| 342 | + case '0': | |
| 343 | + case '1': | |
| 344 | + case '2': | |
| 345 | + case '3': | |
| 346 | + case '4': | |
| 347 | + case '5': | |
| 348 | + case '6': | |
| 349 | + case '7': | |
| 350 | + case '8': | |
| 351 | + case '9': | |
| 352 | + this->state = st_number; | |
| 353 | + this->val += ch; | |
| 354 | + return; | |
| 355 | + | |
| 356 | + case '+': | |
| 357 | + case '-': | |
| 358 | + this->state = st_sign; | |
| 359 | + this->val += ch; | |
| 360 | + return; | |
| 361 | + | |
| 362 | + case '.': | |
| 363 | + this->state = st_decimal; | |
| 364 | + this->val += ch; | |
| 365 | + return; | |
| 366 | + | |
| 333 | 367 | default: |
| 334 | 368 | this->state = st_literal; |
| 335 | 369 | this->val += ch; |
| ... | ... | @@ -497,6 +531,67 @@ QPDFTokenizer::inNameHex2(char ch) |
| 497 | 531 | } |
| 498 | 532 | |
| 499 | 533 | void |
| 534 | +QPDFTokenizer::inSign(char ch) | |
| 535 | +{ | |
| 536 | + if (QUtil::is_digit(ch)) { | |
| 537 | + this->state = st_number; | |
| 538 | + this->val += ch; | |
| 539 | + } else if (ch == '.') { | |
| 540 | + this->state = st_decimal; | |
| 541 | + this->val += ch; | |
| 542 | + } else { | |
| 543 | + this->state = st_literal; | |
| 544 | + inLiteral(ch); | |
| 545 | + } | |
| 546 | +} | |
| 547 | + | |
| 548 | +void | |
| 549 | +QPDFTokenizer::inDecimal(char ch) | |
| 550 | +{ | |
| 551 | + if (QUtil::is_digit(ch)) { | |
| 552 | + this->state = st_real; | |
| 553 | + this->val += ch; | |
| 554 | + } else { | |
| 555 | + this->state = st_literal; | |
| 556 | + inLiteral(ch); | |
| 557 | + } | |
| 558 | +} | |
| 559 | + | |
| 560 | +void | |
| 561 | +QPDFTokenizer::inNumber(char ch) | |
| 562 | +{ | |
| 563 | + if (QUtil::is_digit(ch)) { | |
| 564 | + this->val += ch; | |
| 565 | + } else if (ch == '.') { | |
| 566 | + this->state = st_real; | |
| 567 | + this->val += ch; | |
| 568 | + } else if (isDelimiter(ch)) { | |
| 569 | + this->type = tt_integer; | |
| 570 | + this->state = st_token_ready; | |
| 571 | + this->unread_char = true; | |
| 572 | + this->char_to_unread = ch; | |
| 573 | + } else { | |
| 574 | + this->state = st_literal; | |
| 575 | + this->val += ch; | |
| 576 | + } | |
| 577 | +} | |
| 578 | + | |
| 579 | +void | |
| 580 | +QPDFTokenizer::inReal(char ch) | |
| 581 | +{ | |
| 582 | + if (QUtil::is_digit(ch)) { | |
| 583 | + this->val += ch; | |
| 584 | + } else if (isDelimiter(ch)) { | |
| 585 | + this->type = tt_real; | |
| 586 | + this->state = st_token_ready; | |
| 587 | + this->unread_char = true; | |
| 588 | + this->char_to_unread = ch; | |
| 589 | + } else { | |
| 590 | + this->state = st_literal; | |
| 591 | + this->val += ch; | |
| 592 | + } | |
| 593 | +} | |
| 594 | +void | |
| 500 | 595 | QPDFTokenizer::inStringEscape(char ch) |
| 501 | 596 | { |
| 502 | 597 | this->state = st_in_string; |
| ... | ... | @@ -707,7 +802,9 @@ void |
| 707 | 802 | QPDFTokenizer::presentEOF() |
| 708 | 803 | { |
| 709 | 804 | if (this->state == st_name || this->state == st_name_hex1 || |
| 710 | - this->state == st_name_hex2) { | |
| 805 | + this->state == st_name_hex2 || this->state == st_number || | |
| 806 | + this->state == st_real || this->state == st_sign || | |
| 807 | + this->state == st_decimal) { | |
| 711 | 808 | // Push any delimiter to the state machine to finish off the final |
| 712 | 809 | // token. |
| 713 | 810 | presentCharacter('\f'); |
| ... | ... | @@ -715,7 +812,6 @@ QPDFTokenizer::presentEOF() |
| 715 | 812 | } else if (this->state == st_literal) { |
| 716 | 813 | QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); |
| 717 | 814 | resolveLiteral(); |
| 718 | - | |
| 719 | 815 | } else if ((this->include_ignorable) && (this->state == st_in_space)) { |
| 720 | 816 | this->type = tt_space; |
| 721 | 817 | } else if ((this->include_ignorable) && (this->state == st_in_comment)) { | ... | ... |