Commit fe33b7ca18ced0654313ea5abba461ac59e887b3
1 parent
931fbb61
Integrate numbers into state machine in QPDFTokenizer
Showing
2 changed files
with
115 additions
and
10 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -213,6 +213,11 @@ class QPDFTokenizer | @@ -213,6 +213,11 @@ class QPDFTokenizer | ||
| 213 | st_lt, | 213 | st_lt, |
| 214 | st_gt, | 214 | st_gt, |
| 215 | st_inline_image, | 215 | st_inline_image, |
| 216 | + st_sign, | ||
| 217 | + st_number, | ||
| 218 | + st_real, | ||
| 219 | + st_decimal, | ||
| 220 | + | ||
| 216 | st_name_hex1, | 221 | st_name_hex1, |
| 217 | st_name_hex2, | 222 | st_name_hex2, |
| 218 | st_token_ready | 223 | st_token_ready |
| @@ -236,6 +241,10 @@ class QPDFTokenizer | @@ -236,6 +241,10 @@ class QPDFTokenizer | ||
| 236 | void inTokenReady(char); | 241 | void inTokenReady(char); |
| 237 | void inNameHex1(char); | 242 | void inNameHex1(char); |
| 238 | void inNameHex2(char); | 243 | void inNameHex2(char); |
| 244 | + void inSign(char); | ||
| 245 | + void inDecimal(char); | ||
| 246 | + void inNumber(char); | ||
| 247 | + void inReal(char); | ||
| 239 | void reset(); | 248 | void reset(); |
| 240 | 249 | ||
| 241 | // Lexer state | 250 | // Lexer state |
libqpdf/QPDFTokenizer.cc
| @@ -134,13 +134,7 @@ QPDFTokenizer::isDelimiter(char ch) | @@ -134,13 +134,7 @@ QPDFTokenizer::isDelimiter(char ch) | ||
| 134 | void | 134 | void |
| 135 | QPDFTokenizer::resolveLiteral() | 135 | QPDFTokenizer::resolveLiteral() |
| 136 | { | 136 | { |
| 137 | - if (QUtil::is_number(this->val.c_str())) { | ||
| 138 | - if (this->val.find('.') != std::string::npos) { | ||
| 139 | - this->type = tt_real; | ||
| 140 | - } else { | ||
| 141 | - this->type = tt_integer; | ||
| 142 | - } | ||
| 143 | - } else if ((this->val == "true") || (this->val == "false")) { | 137 | + if ((this->val == "true") || (this->val == "false")) { |
| 144 | this->type = tt_bool; | 138 | this->type = tt_bool; |
| 145 | } else if (this->val == "null") { | 139 | } else if (this->val == "null") { |
| 146 | this->type = tt_null; | 140 | this->type = tt_null; |
| @@ -205,6 +199,14 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -205,6 +199,14 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 205 | inName(ch); | 199 | inName(ch); |
| 206 | return; | 200 | return; |
| 207 | 201 | ||
| 202 | + case st_number: | ||
| 203 | + inNumber(ch); | ||
| 204 | + return; | ||
| 205 | + | ||
| 206 | + case st_real: | ||
| 207 | + inReal(ch); | ||
| 208 | + return; | ||
| 209 | + | ||
| 208 | case st_string_after_cr: | 210 | case st_string_after_cr: |
| 209 | inStringAfterCR(ch); | 211 | inStringAfterCR(ch); |
| 210 | return; | 212 | return; |
| @@ -224,7 +226,6 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -224,7 +226,6 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 224 | case st_inline_image: | 226 | case st_inline_image: |
| 225 | inInlineImage(ch); | 227 | inInlineImage(ch); |
| 226 | return; | 228 | return; |
| 227 | - this->val += ch; | ||
| 228 | 229 | ||
| 229 | case st_in_hexstring: | 230 | case st_in_hexstring: |
| 230 | inHexstring(ch); | 231 | inHexstring(ch); |
| @@ -242,6 +243,14 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -242,6 +243,14 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 242 | inNameHex2(ch); | 243 | inNameHex2(ch); |
| 243 | return; | 244 | return; |
| 244 | 245 | ||
| 246 | + case st_sign: | ||
| 247 | + inSign(ch); | ||
| 248 | + return; | ||
| 249 | + | ||
| 250 | + case st_decimal: | ||
| 251 | + inDecimal(ch); | ||
| 252 | + return; | ||
| 253 | + | ||
| 245 | case (st_token_ready): | 254 | case (st_token_ready): |
| 246 | inTokenReady(ch); | 255 | inTokenReady(ch); |
| 247 | return; | 256 | return; |
| @@ -330,6 +339,31 @@ QPDFTokenizer::inTop(char ch) | @@ -330,6 +339,31 @@ QPDFTokenizer::inTop(char ch) | ||
| 330 | this->val += ch; | 339 | this->val += ch; |
| 331 | return; | 340 | return; |
| 332 | 341 | ||
| 342 | + case '0': | ||
| 343 | + case '1': | ||
| 344 | + case '2': | ||
| 345 | + case '3': | ||
| 346 | + case '4': | ||
| 347 | + case '5': | ||
| 348 | + case '6': | ||
| 349 | + case '7': | ||
| 350 | + case '8': | ||
| 351 | + case '9': | ||
| 352 | + this->state = st_number; | ||
| 353 | + this->val += ch; | ||
| 354 | + return; | ||
| 355 | + | ||
| 356 | + case '+': | ||
| 357 | + case '-': | ||
| 358 | + this->state = st_sign; | ||
| 359 | + this->val += ch; | ||
| 360 | + return; | ||
| 361 | + | ||
| 362 | + case '.': | ||
| 363 | + this->state = st_decimal; | ||
| 364 | + this->val += ch; | ||
| 365 | + return; | ||
| 366 | + | ||
| 333 | default: | 367 | default: |
| 334 | this->state = st_literal; | 368 | this->state = st_literal; |
| 335 | this->val += ch; | 369 | this->val += ch; |
| @@ -497,6 +531,67 @@ QPDFTokenizer::inNameHex2(char ch) | @@ -497,6 +531,67 @@ QPDFTokenizer::inNameHex2(char ch) | ||
| 497 | } | 531 | } |
| 498 | 532 | ||
| 499 | void | 533 | void |
| 534 | +QPDFTokenizer::inSign(char ch) | ||
| 535 | +{ | ||
| 536 | + if (QUtil::is_digit(ch)) { | ||
| 537 | + this->state = st_number; | ||
| 538 | + this->val += ch; | ||
| 539 | + } else if (ch == '.') { | ||
| 540 | + this->state = st_decimal; | ||
| 541 | + this->val += ch; | ||
| 542 | + } else { | ||
| 543 | + this->state = st_literal; | ||
| 544 | + inLiteral(ch); | ||
| 545 | + } | ||
| 546 | +} | ||
| 547 | + | ||
| 548 | +void | ||
| 549 | +QPDFTokenizer::inDecimal(char ch) | ||
| 550 | +{ | ||
| 551 | + if (QUtil::is_digit(ch)) { | ||
| 552 | + this->state = st_real; | ||
| 553 | + this->val += ch; | ||
| 554 | + } else { | ||
| 555 | + this->state = st_literal; | ||
| 556 | + inLiteral(ch); | ||
| 557 | + } | ||
| 558 | +} | ||
| 559 | + | ||
| 560 | +void | ||
| 561 | +QPDFTokenizer::inNumber(char ch) | ||
| 562 | +{ | ||
| 563 | + if (QUtil::is_digit(ch)) { | ||
| 564 | + this->val += ch; | ||
| 565 | + } else if (ch == '.') { | ||
| 566 | + this->state = st_real; | ||
| 567 | + this->val += ch; | ||
| 568 | + } else if (isDelimiter(ch)) { | ||
| 569 | + this->type = tt_integer; | ||
| 570 | + this->state = st_token_ready; | ||
| 571 | + this->unread_char = true; | ||
| 572 | + this->char_to_unread = ch; | ||
| 573 | + } else { | ||
| 574 | + this->state = st_literal; | ||
| 575 | + this->val += ch; | ||
| 576 | + } | ||
| 577 | +} | ||
| 578 | + | ||
| 579 | +void | ||
| 580 | +QPDFTokenizer::inReal(char ch) | ||
| 581 | +{ | ||
| 582 | + if (QUtil::is_digit(ch)) { | ||
| 583 | + this->val += ch; | ||
| 584 | + } else if (isDelimiter(ch)) { | ||
| 585 | + this->type = tt_real; | ||
| 586 | + this->state = st_token_ready; | ||
| 587 | + this->unread_char = true; | ||
| 588 | + this->char_to_unread = ch; | ||
| 589 | + } else { | ||
| 590 | + this->state = st_literal; | ||
| 591 | + this->val += ch; | ||
| 592 | + } | ||
| 593 | +} | ||
| 594 | +void | ||
| 500 | QPDFTokenizer::inStringEscape(char ch) | 595 | QPDFTokenizer::inStringEscape(char ch) |
| 501 | { | 596 | { |
| 502 | this->state = st_in_string; | 597 | this->state = st_in_string; |
| @@ -707,7 +802,9 @@ void | @@ -707,7 +802,9 @@ void | ||
| 707 | QPDFTokenizer::presentEOF() | 802 | QPDFTokenizer::presentEOF() |
| 708 | { | 803 | { |
| 709 | if (this->state == st_name || this->state == st_name_hex1 || | 804 | if (this->state == st_name || this->state == st_name_hex1 || |
| 710 | - this->state == st_name_hex2) { | 805 | + this->state == st_name_hex2 || this->state == st_number || |
| 806 | + this->state == st_real || this->state == st_sign || | ||
| 807 | + this->state == st_decimal) { | ||
| 711 | // Push any delimiter to the state machine to finish off the final | 808 | // Push any delimiter to the state machine to finish off the final |
| 712 | // token. | 809 | // token. |
| 713 | presentCharacter('\f'); | 810 | presentCharacter('\f'); |
| @@ -715,7 +812,6 @@ QPDFTokenizer::presentEOF() | @@ -715,7 +812,6 @@ QPDFTokenizer::presentEOF() | ||
| 715 | } else if (this->state == st_literal) { | 812 | } else if (this->state == st_literal) { |
| 716 | QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); | 813 | QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); |
| 717 | resolveLiteral(); | 814 | resolveLiteral(); |
| 718 | - | ||
| 719 | } else if ((this->include_ignorable) && (this->state == st_in_space)) { | 815 | } else if ((this->include_ignorable) && (this->state == st_in_space)) { |
| 720 | this->type = tt_space; | 816 | this->type = tt_space; |
| 721 | } else if ((this->include_ignorable) && (this->state == st_in_comment)) { | 817 | } else if ((this->include_ignorable) && (this->state == st_in_comment)) { |