Commit fe33b7ca18ced0654313ea5abba461ac59e887b3

Authored by m-holger
1 parent 931fbb61

Integrate numbers into state machine in QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
... ... @@ -213,6 +213,11 @@ class QPDFTokenizer
213 213 st_lt,
214 214 st_gt,
215 215 st_inline_image,
  216 + st_sign,
  217 + st_number,
  218 + st_real,
  219 + st_decimal,
  220 +
216 221 st_name_hex1,
217 222 st_name_hex2,
218 223 st_token_ready
... ... @@ -236,6 +241,10 @@ class QPDFTokenizer
236 241 void inTokenReady(char);
237 242 void inNameHex1(char);
238 243 void inNameHex2(char);
  244 + void inSign(char);
  245 + void inDecimal(char);
  246 + void inNumber(char);
  247 + void inReal(char);
239 248 void reset();
240 249  
241 250 // Lexer state
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -134,13 +134,7 @@ QPDFTokenizer::isDelimiter(char ch)
134 134 void
135 135 QPDFTokenizer::resolveLiteral()
136 136 {
137   - if (QUtil::is_number(this->val.c_str())) {
138   - if (this->val.find('.') != std::string::npos) {
139   - this->type = tt_real;
140   - } else {
141   - this->type = tt_integer;
142   - }
143   - } else if ((this->val == "true") || (this->val == "false")) {
  137 + if ((this->val == "true") || (this->val == "false")) {
144 138 this->type = tt_bool;
145 139 } else if (this->val == "null") {
146 140 this->type = tt_null;
... ... @@ -205,6 +199,14 @@ QPDFTokenizer::handleCharacter(char ch)
205 199 inName(ch);
206 200 return;
207 201  
  202 + case st_number:
  203 + inNumber(ch);
  204 + return;
  205 +
  206 + case st_real:
  207 + inReal(ch);
  208 + return;
  209 +
208 210 case st_string_after_cr:
209 211 inStringAfterCR(ch);
210 212 return;
... ... @@ -224,7 +226,6 @@ QPDFTokenizer::handleCharacter(char ch)
224 226 case st_inline_image:
225 227 inInlineImage(ch);
226 228 return;
227   - this->val += ch;
228 229  
229 230 case st_in_hexstring:
230 231 inHexstring(ch);
... ... @@ -242,6 +243,14 @@ QPDFTokenizer::handleCharacter(char ch)
242 243 inNameHex2(ch);
243 244 return;
244 245  
  246 + case st_sign:
  247 + inSign(ch);
  248 + return;
  249 +
  250 + case st_decimal:
  251 + inDecimal(ch);
  252 + return;
  253 +
245 254 case (st_token_ready):
246 255 inTokenReady(ch);
247 256 return;
... ... @@ -330,6 +339,31 @@ QPDFTokenizer::inTop(char ch)
330 339 this->val += ch;
331 340 return;
332 341  
  342 + case '0':
  343 + case '1':
  344 + case '2':
  345 + case '3':
  346 + case '4':
  347 + case '5':
  348 + case '6':
  349 + case '7':
  350 + case '8':
  351 + case '9':
  352 + this->state = st_number;
  353 + this->val += ch;
  354 + return;
  355 +
  356 + case '+':
  357 + case '-':
  358 + this->state = st_sign;
  359 + this->val += ch;
  360 + return;
  361 +
  362 + case '.':
  363 + this->state = st_decimal;
  364 + this->val += ch;
  365 + return;
  366 +
333 367 default:
334 368 this->state = st_literal;
335 369 this->val += ch;
... ... @@ -497,6 +531,67 @@ QPDFTokenizer::inNameHex2(char ch)
497 531 }
498 532  
499 533 void
  534 +QPDFTokenizer::inSign(char ch)
  535 +{
  536 + if (QUtil::is_digit(ch)) {
  537 + this->state = st_number;
  538 + this->val += ch;
  539 + } else if (ch == '.') {
  540 + this->state = st_decimal;
  541 + this->val += ch;
  542 + } else {
  543 + this->state = st_literal;
  544 + inLiteral(ch);
  545 + }
  546 +}
  547 +
  548 +void
  549 +QPDFTokenizer::inDecimal(char ch)
  550 +{
  551 + if (QUtil::is_digit(ch)) {
  552 + this->state = st_real;
  553 + this->val += ch;
  554 + } else {
  555 + this->state = st_literal;
  556 + inLiteral(ch);
  557 + }
  558 +}
  559 +
  560 +void
  561 +QPDFTokenizer::inNumber(char ch)
  562 +{
  563 + if (QUtil::is_digit(ch)) {
  564 + this->val += ch;
  565 + } else if (ch == '.') {
  566 + this->state = st_real;
  567 + this->val += ch;
  568 + } else if (isDelimiter(ch)) {
  569 + this->type = tt_integer;
  570 + this->state = st_token_ready;
  571 + this->unread_char = true;
  572 + this->char_to_unread = ch;
  573 + } else {
  574 + this->state = st_literal;
  575 + this->val += ch;
  576 + }
  577 +}
  578 +
  579 +void
  580 +QPDFTokenizer::inReal(char ch)
  581 +{
  582 + if (QUtil::is_digit(ch)) {
  583 + this->val += ch;
  584 + } else if (isDelimiter(ch)) {
  585 + this->type = tt_real;
  586 + this->state = st_token_ready;
  587 + this->unread_char = true;
  588 + this->char_to_unread = ch;
  589 + } else {
  590 + this->state = st_literal;
  591 + this->val += ch;
  592 + }
  593 +}
  594 +void
500 595 QPDFTokenizer::inStringEscape(char ch)
501 596 {
502 597 this->state = st_in_string;
... ... @@ -707,7 +802,9 @@ void
707 802 QPDFTokenizer::presentEOF()
708 803 {
709 804 if (this->state == st_name || this->state == st_name_hex1 ||
710   - this->state == st_name_hex2) {
  805 + this->state == st_name_hex2 || this->state == st_number ||
  806 + this->state == st_real || this->state == st_sign ||
  807 + this->state == st_decimal) {
711 808 // Push any delimiter to the state machine to finish off the final
712 809 // token.
713 810 presentCharacter('\f');
... ... @@ -715,7 +812,6 @@ QPDFTokenizer::presentEOF()
715 812 } else if (this->state == st_literal) {
716 813 QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
717 814 resolveLiteral();
718   -
719 815 } else if ((this->include_ignorable) && (this->state == st_in_space)) {
720 816 this->type = tt_space;
721 817 } else if ((this->include_ignorable) && (this->state == st_in_comment)) {
... ...