Commit dd8dad74f47b6068281dd605a04bc2d0b6283423
1 parent
0a745021
Move lexer helper functions to QUtil
Showing
3 changed files
with
80 additions
and
60 deletions
include/qpdf/QUtil.hh
| @@ -157,6 +157,21 @@ namespace QUtil | @@ -157,6 +157,21 @@ namespace QUtil | ||
| 157 | // exception will be thrown. | 157 | // exception will be thrown. |
| 158 | QPDF_DLL | 158 | QPDF_DLL |
| 159 | RandomDataProvider* getRandomDataProvider(); | 159 | RandomDataProvider* getRandomDataProvider(); |
| 160 | + | ||
| 161 | + // These routines help the tokenizer recognize certain character | ||
| 162 | + // classes without using ctype, which we avoid because of locale | ||
| 163 | + // considerations. | ||
| 164 | + QPDF_DLL | ||
| 165 | + bool is_hex_digit(char); | ||
| 166 | + | ||
| 167 | + QPDF_DLL | ||
| 168 | + bool is_space(char); | ||
| 169 | + | ||
| 170 | + QPDF_DLL | ||
| 171 | + bool is_digit(char); | ||
| 172 | + | ||
| 173 | + QPDF_DLL | ||
| 174 | + bool is_number(char const*); | ||
| 160 | }; | 175 | }; |
| 161 | 176 | ||
| 162 | #endif // __QUTIL_HH__ | 177 | #endif // __QUTIL_HH__ |
libqpdf/QPDFTokenizer.cc
| @@ -6,66 +6,11 @@ | @@ -6,66 +6,11 @@ | ||
| 6 | 6 | ||
| 7 | #include <qpdf/QTC.hh> | 7 | #include <qpdf/QTC.hh> |
| 8 | #include <qpdf/QPDFExc.hh> | 8 | #include <qpdf/QPDFExc.hh> |
| 9 | +#include <qpdf/QUtil.hh> | ||
| 9 | 10 | ||
| 10 | #include <stdexcept> | 11 | #include <stdexcept> |
| 11 | #include <string.h> | 12 | #include <string.h> |
| 12 | 13 | ||
| 13 | -// See note above about ctype. | ||
| 14 | -static bool is_hex_digit(char ch) | ||
| 15 | -{ | ||
| 16 | - return (strchr("0123456789abcdefABCDEF", ch) != 0); | ||
| 17 | -} | ||
| 18 | -static bool is_space(char ch) | ||
| 19 | -{ | ||
| 20 | - return (strchr(" \f\n\r\t\v", ch) != 0); | ||
| 21 | -} | ||
| 22 | -static bool is_digit(char ch) | ||
| 23 | -{ | ||
| 24 | - return ((ch >= '0') && (ch <= '9')); | ||
| 25 | -} | ||
| 26 | -static bool | ||
| 27 | -is_number(std::string const& str) | ||
| 28 | -{ | ||
| 29 | - // ^[\+\-]?(\.\d+|\d+(\.\d+)?)$ | ||
| 30 | - char const* p = str.c_str(); | ||
| 31 | - if (! *p) | ||
| 32 | - { | ||
| 33 | - return false; | ||
| 34 | - } | ||
| 35 | - if ((*p == '-') || (*p == '+')) | ||
| 36 | - { | ||
| 37 | - ++p; | ||
| 38 | - } | ||
| 39 | - bool found_dot = false; | ||
| 40 | - bool found_digit = false; | ||
| 41 | - for (; *p; ++p) | ||
| 42 | - { | ||
| 43 | - if (*p == '.') | ||
| 44 | - { | ||
| 45 | - if (found_dot) | ||
| 46 | - { | ||
| 47 | - // only one dot | ||
| 48 | - return false; | ||
| 49 | - } | ||
| 50 | - if (! *(p+1)) | ||
| 51 | - { | ||
| 52 | - // dot can't be last | ||
| 53 | - return false; | ||
| 54 | - } | ||
| 55 | - found_dot = true; | ||
| 56 | - } | ||
| 57 | - else if (is_digit(*p)) | ||
| 58 | - { | ||
| 59 | - found_digit = true; | ||
| 60 | - } | ||
| 61 | - else | ||
| 62 | - { | ||
| 63 | - return false; | ||
| 64 | - } | ||
| 65 | - } | ||
| 66 | - return found_digit; | ||
| 67 | -} | ||
| 68 | - | ||
| 69 | QPDFTokenizer::QPDFTokenizer() : | 14 | QPDFTokenizer::QPDFTokenizer() : |
| 70 | pound_special_in_name(true), | 15 | pound_special_in_name(true), |
| 71 | allow_eof(false) | 16 | allow_eof(false) |
| @@ -117,7 +62,7 @@ QPDFTokenizer::resolveLiteral() | @@ -117,7 +62,7 @@ QPDFTokenizer::resolveLiteral() | ||
| 117 | if ((*p == '#') && this->pound_special_in_name) | 62 | if ((*p == '#') && this->pound_special_in_name) |
| 118 | { | 63 | { |
| 119 | if (p[1] && p[2] && | 64 | if (p[1] && p[2] && |
| 120 | - is_hex_digit(p[1]) && is_hex_digit(p[2])) | 65 | + QUtil::is_hex_digit(p[1]) && QUtil::is_hex_digit(p[2])) |
| 121 | { | 66 | { |
| 122 | char num[3]; | 67 | char num[3]; |
| 123 | num[0] = p[1]; | 68 | num[0] = p[1]; |
| @@ -153,7 +98,7 @@ QPDFTokenizer::resolveLiteral() | @@ -153,7 +98,7 @@ QPDFTokenizer::resolveLiteral() | ||
| 153 | } | 98 | } |
| 154 | val = nval; | 99 | val = nval; |
| 155 | } | 100 | } |
| 156 | - else if (is_number(val)) | 101 | + else if (QUtil::is_number(val.c_str())) |
| 157 | { | 102 | { |
| 158 | if (val.find('.') != std::string::npos) | 103 | if (val.find('.') != std::string::npos) |
| 159 | { | 104 | { |
| @@ -447,7 +392,7 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -447,7 +392,7 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 447 | } | 392 | } |
| 448 | val = nval; | 393 | val = nval; |
| 449 | } | 394 | } |
| 450 | - else if (is_hex_digit(ch)) | 395 | + else if (QUtil::is_hex_digit(ch)) |
| 451 | { | 396 | { |
| 452 | val += ch; | 397 | val += ch; |
| 453 | } | 398 | } |
| @@ -554,7 +499,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | @@ -554,7 +499,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | ||
| 554 | } | 499 | } |
| 555 | else | 500 | else |
| 556 | { | 501 | { |
| 557 | - if (is_space(static_cast<unsigned char>(ch)) && | 502 | + if (QUtil::is_space(static_cast<unsigned char>(ch)) && |
| 558 | (input->getLastOffset() == offset)) | 503 | (input->getLastOffset() == offset)) |
| 559 | { | 504 | { |
| 560 | ++offset; | 505 | ++offset; |
libqpdf/QUtil.cc
| @@ -456,3 +456,63 @@ QUtil::srandom(unsigned int seed) | @@ -456,3 +456,63 @@ QUtil::srandom(unsigned int seed) | ||
| 456 | srand(seed); | 456 | srand(seed); |
| 457 | #endif | 457 | #endif |
| 458 | } | 458 | } |
| 459 | + | ||
| 460 | +bool | ||
| 461 | +QUtil::is_hex_digit(char ch) | ||
| 462 | +{ | ||
| 463 | + return (strchr("0123456789abcdefABCDEF", ch) != 0); | ||
| 464 | +} | ||
| 465 | + | ||
| 466 | +bool | ||
| 467 | +QUtil::is_space(char ch) | ||
| 468 | +{ | ||
| 469 | + return (strchr(" \f\n\r\t\v", ch) != 0); | ||
| 470 | +} | ||
| 471 | + | ||
| 472 | +bool | ||
| 473 | +QUtil::is_digit(char ch) | ||
| 474 | +{ | ||
| 475 | + return ((ch >= '0') && (ch <= '9')); | ||
| 476 | +} | ||
| 477 | + | ||
| 478 | +bool | ||
| 479 | +QUtil::is_number(char const* p) | ||
| 480 | +{ | ||
| 481 | + // ^[\+\-]?(\.\d+|\d+(\.\d+)?)$ | ||
| 482 | + if (! *p) | ||
| 483 | + { | ||
| 484 | + return false; | ||
| 485 | + } | ||
| 486 | + if ((*p == '-') || (*p == '+')) | ||
| 487 | + { | ||
| 488 | + ++p; | ||
| 489 | + } | ||
| 490 | + bool found_dot = false; | ||
| 491 | + bool found_digit = false; | ||
| 492 | + for (; *p; ++p) | ||
| 493 | + { | ||
| 494 | + if (*p == '.') | ||
| 495 | + { | ||
| 496 | + if (found_dot) | ||
| 497 | + { | ||
| 498 | + // only one dot | ||
| 499 | + return false; | ||
| 500 | + } | ||
| 501 | + if (! *(p+1)) | ||
| 502 | + { | ||
| 503 | + // dot can't be last | ||
| 504 | + return false; | ||
| 505 | + } | ||
| 506 | + found_dot = true; | ||
| 507 | + } | ||
| 508 | + else if (QUtil::is_digit(*p)) | ||
| 509 | + { | ||
| 510 | + found_digit = true; | ||
| 511 | + } | ||
| 512 | + else | ||
| 513 | + { | ||
| 514 | + return false; | ||
| 515 | + } | ||
| 516 | + } | ||
| 517 | + return found_digit; | ||
| 518 | +} |