Commit fe33b7ca18ced0654313ea5abba461ac59e887b3

Authored by m-holger
1 parent 931fbb61

Integrate numbers into state machine in QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
@@ -213,6 +213,11 @@ class QPDFTokenizer @@ -213,6 +213,11 @@ class QPDFTokenizer
213 st_lt, 213 st_lt,
214 st_gt, 214 st_gt,
215 st_inline_image, 215 st_inline_image,
  216 + st_sign,
  217 + st_number,
  218 + st_real,
  219 + st_decimal,
  220 +
216 st_name_hex1, 221 st_name_hex1,
217 st_name_hex2, 222 st_name_hex2,
218 st_token_ready 223 st_token_ready
@@ -236,6 +241,10 @@ class QPDFTokenizer @@ -236,6 +241,10 @@ class QPDFTokenizer
236 void inTokenReady(char); 241 void inTokenReady(char);
237 void inNameHex1(char); 242 void inNameHex1(char);
238 void inNameHex2(char); 243 void inNameHex2(char);
  244 + void inSign(char);
  245 + void inDecimal(char);
  246 + void inNumber(char);
  247 + void inReal(char);
239 void reset(); 248 void reset();
240 249
241 // Lexer state 250 // Lexer state
libqpdf/QPDFTokenizer.cc
@@ -134,13 +134,7 @@ QPDFTokenizer::isDelimiter(char ch) @@ -134,13 +134,7 @@ QPDFTokenizer::isDelimiter(char ch)
134 void 134 void
135 QPDFTokenizer::resolveLiteral() 135 QPDFTokenizer::resolveLiteral()
136 { 136 {
137 - if (QUtil::is_number(this->val.c_str())) {  
138 - if (this->val.find('.') != std::string::npos) {  
139 - this->type = tt_real;  
140 - } else {  
141 - this->type = tt_integer;  
142 - }  
143 - } else if ((this->val == "true") || (this->val == "false")) { 137 + if ((this->val == "true") || (this->val == "false")) {
144 this->type = tt_bool; 138 this->type = tt_bool;
145 } else if (this->val == "null") { 139 } else if (this->val == "null") {
146 this->type = tt_null; 140 this->type = tt_null;
@@ -205,6 +199,14 @@ QPDFTokenizer::handleCharacter(char ch) @@ -205,6 +199,14 @@ QPDFTokenizer::handleCharacter(char ch)
205 inName(ch); 199 inName(ch);
206 return; 200 return;
207 201
  202 + case st_number:
  203 + inNumber(ch);
  204 + return;
  205 +
  206 + case st_real:
  207 + inReal(ch);
  208 + return;
  209 +
208 case st_string_after_cr: 210 case st_string_after_cr:
209 inStringAfterCR(ch); 211 inStringAfterCR(ch);
210 return; 212 return;
@@ -224,7 +226,6 @@ QPDFTokenizer::handleCharacter(char ch) @@ -224,7 +226,6 @@ QPDFTokenizer::handleCharacter(char ch)
224 case st_inline_image: 226 case st_inline_image:
225 inInlineImage(ch); 227 inInlineImage(ch);
226 return; 228 return;
227 - this->val += ch;  
228 229
229 case st_in_hexstring: 230 case st_in_hexstring:
230 inHexstring(ch); 231 inHexstring(ch);
@@ -242,6 +243,14 @@ QPDFTokenizer::handleCharacter(char ch) @@ -242,6 +243,14 @@ QPDFTokenizer::handleCharacter(char ch)
242 inNameHex2(ch); 243 inNameHex2(ch);
243 return; 244 return;
244 245
  246 + case st_sign:
  247 + inSign(ch);
  248 + return;
  249 +
  250 + case st_decimal:
  251 + inDecimal(ch);
  252 + return;
  253 +
245 case (st_token_ready): 254 case (st_token_ready):
246 inTokenReady(ch); 255 inTokenReady(ch);
247 return; 256 return;
@@ -330,6 +339,31 @@ QPDFTokenizer::inTop(char ch) @@ -330,6 +339,31 @@ QPDFTokenizer::inTop(char ch)
330 this->val += ch; 339 this->val += ch;
331 return; 340 return;
332 341
  342 + case '0':
  343 + case '1':
  344 + case '2':
  345 + case '3':
  346 + case '4':
  347 + case '5':
  348 + case '6':
  349 + case '7':
  350 + case '8':
  351 + case '9':
  352 + this->state = st_number;
  353 + this->val += ch;
  354 + return;
  355 +
  356 + case '+':
  357 + case '-':
  358 + this->state = st_sign;
  359 + this->val += ch;
  360 + return;
  361 +
  362 + case '.':
  363 + this->state = st_decimal;
  364 + this->val += ch;
  365 + return;
  366 +
333 default: 367 default:
334 this->state = st_literal; 368 this->state = st_literal;
335 this->val += ch; 369 this->val += ch;
@@ -497,6 +531,67 @@ QPDFTokenizer::inNameHex2(char ch) @@ -497,6 +531,67 @@ QPDFTokenizer::inNameHex2(char ch)
497 } 531 }
498 532
499 void 533 void
  534 +QPDFTokenizer::inSign(char ch)
  535 +{
  536 + if (QUtil::is_digit(ch)) {
  537 + this->state = st_number;
  538 + this->val += ch;
  539 + } else if (ch == '.') {
  540 + this->state = st_decimal;
  541 + this->val += ch;
  542 + } else {
  543 + this->state = st_literal;
  544 + inLiteral(ch);
  545 + }
  546 +}
  547 +
  548 +void
  549 +QPDFTokenizer::inDecimal(char ch)
  550 +{
  551 + if (QUtil::is_digit(ch)) {
  552 + this->state = st_real;
  553 + this->val += ch;
  554 + } else {
  555 + this->state = st_literal;
  556 + inLiteral(ch);
  557 + }
  558 +}
  559 +
  560 +void
  561 +QPDFTokenizer::inNumber(char ch)
  562 +{
  563 + if (QUtil::is_digit(ch)) {
  564 + this->val += ch;
  565 + } else if (ch == '.') {
  566 + this->state = st_real;
  567 + this->val += ch;
  568 + } else if (isDelimiter(ch)) {
  569 + this->type = tt_integer;
  570 + this->state = st_token_ready;
  571 + this->unread_char = true;
  572 + this->char_to_unread = ch;
  573 + } else {
  574 + this->state = st_literal;
  575 + this->val += ch;
  576 + }
  577 +}
  578 +
  579 +void
  580 +QPDFTokenizer::inReal(char ch)
  581 +{
  582 + if (QUtil::is_digit(ch)) {
  583 + this->val += ch;
  584 + } else if (isDelimiter(ch)) {
  585 + this->type = tt_real;
  586 + this->state = st_token_ready;
  587 + this->unread_char = true;
  588 + this->char_to_unread = ch;
  589 + } else {
  590 + this->state = st_literal;
  591 + this->val += ch;
  592 + }
  593 +}
  594 +void
500 QPDFTokenizer::inStringEscape(char ch) 595 QPDFTokenizer::inStringEscape(char ch)
501 { 596 {
502 this->state = st_in_string; 597 this->state = st_in_string;
@@ -707,7 +802,9 @@ void @@ -707,7 +802,9 @@ void
707 QPDFTokenizer::presentEOF() 802 QPDFTokenizer::presentEOF()
708 { 803 {
709 if (this->state == st_name || this->state == st_name_hex1 || 804 if (this->state == st_name || this->state == st_name_hex1 ||
710 - this->state == st_name_hex2) { 805 + this->state == st_name_hex2 || this->state == st_number ||
  806 + this->state == st_real || this->state == st_sign ||
  807 + this->state == st_decimal) {
711 // Push any delimiter to the state machine to finish off the final 808 // Push any delimiter to the state machine to finish off the final
712 // token. 809 // token.
713 presentCharacter('\f'); 810 presentCharacter('\f');
@@ -715,7 +812,6 @@ QPDFTokenizer::presentEOF() @@ -715,7 +812,6 @@ QPDFTokenizer::presentEOF()
715 } else if (this->state == st_literal) { 812 } else if (this->state == st_literal) {
716 QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); 813 QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
717 resolveLiteral(); 814 resolveLiteral();
718 -  
719 } else if ((this->include_ignorable) && (this->state == st_in_space)) { 815 } else if ((this->include_ignorable) && (this->state == st_in_space)) {
720 this->type = tt_space; 816 this->type = tt_space;
721 } else if ((this->include_ignorable) && (this->state == st_in_comment)) { 817 } else if ((this->include_ignorable) && (this->state == st_in_comment)) {