Commit dd8dad74f47b6068281dd605a04bc2d0b6283423
1 parent
0a745021
Move lexer helper functions to QUtil
Showing
3 changed files
with
80 additions
and
60 deletions
include/qpdf/QUtil.hh
| ... | ... | @@ -157,6 +157,21 @@ namespace QUtil |
| 157 | 157 | // exception will be thrown. |
| 158 | 158 | QPDF_DLL |
| 159 | 159 | RandomDataProvider* getRandomDataProvider(); |
| 160 | + | |
| 161 | + // These routines help the tokenizer recognize certain character | |
| 162 | + // classes without using ctype, which we avoid because of locale | |
| 163 | + // considerations. | |
| 164 | + QPDF_DLL | |
| 165 | + bool is_hex_digit(char); | |
| 166 | + | |
| 167 | + QPDF_DLL | |
| 168 | + bool is_space(char); | |
| 169 | + | |
| 170 | + QPDF_DLL | |
| 171 | + bool is_digit(char); | |
| 172 | + | |
| 173 | + QPDF_DLL | |
| 174 | + bool is_number(char const*); | |
| 160 | 175 | }; |
| 161 | 176 | |
| 162 | 177 | #endif // __QUTIL_HH__ | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -6,66 +6,11 @@ |
| 6 | 6 | |
| 7 | 7 | #include <qpdf/QTC.hh> |
| 8 | 8 | #include <qpdf/QPDFExc.hh> |
| 9 | +#include <qpdf/QUtil.hh> | |
| 9 | 10 | |
| 10 | 11 | #include <stdexcept> |
| 11 | 12 | #include <string.h> |
| 12 | 13 | |
| 13 | -// See note above about ctype. | |
| 14 | -static bool is_hex_digit(char ch) | |
| 15 | -{ | |
| 16 | - return (strchr("0123456789abcdefABCDEF", ch) != 0); | |
| 17 | -} | |
| 18 | -static bool is_space(char ch) | |
| 19 | -{ | |
| 20 | - return (strchr(" \f\n\r\t\v", ch) != 0); | |
| 21 | -} | |
| 22 | -static bool is_digit(char ch) | |
| 23 | -{ | |
| 24 | - return ((ch >= '0') && (ch <= '9')); | |
| 25 | -} | |
| 26 | -static bool | |
| 27 | -is_number(std::string const& str) | |
| 28 | -{ | |
| 29 | - // ^[\+\-]?(\.\d+|\d+(\.\d+)?)$ | |
| 30 | - char const* p = str.c_str(); | |
| 31 | - if (! *p) | |
| 32 | - { | |
| 33 | - return false; | |
| 34 | - } | |
| 35 | - if ((*p == '-') || (*p == '+')) | |
| 36 | - { | |
| 37 | - ++p; | |
| 38 | - } | |
| 39 | - bool found_dot = false; | |
| 40 | - bool found_digit = false; | |
| 41 | - for (; *p; ++p) | |
| 42 | - { | |
| 43 | - if (*p == '.') | |
| 44 | - { | |
| 45 | - if (found_dot) | |
| 46 | - { | |
| 47 | - // only one dot | |
| 48 | - return false; | |
| 49 | - } | |
| 50 | - if (! *(p+1)) | |
| 51 | - { | |
| 52 | - // dot can't be last | |
| 53 | - return false; | |
| 54 | - } | |
| 55 | - found_dot = true; | |
| 56 | - } | |
| 57 | - else if (is_digit(*p)) | |
| 58 | - { | |
| 59 | - found_digit = true; | |
| 60 | - } | |
| 61 | - else | |
| 62 | - { | |
| 63 | - return false; | |
| 64 | - } | |
| 65 | - } | |
| 66 | - return found_digit; | |
| 67 | -} | |
| 68 | - | |
| 69 | 14 | QPDFTokenizer::QPDFTokenizer() : |
| 70 | 15 | pound_special_in_name(true), |
| 71 | 16 | allow_eof(false) |
| ... | ... | @@ -117,7 +62,7 @@ QPDFTokenizer::resolveLiteral() |
| 117 | 62 | if ((*p == '#') && this->pound_special_in_name) |
| 118 | 63 | { |
| 119 | 64 | if (p[1] && p[2] && |
| 120 | - is_hex_digit(p[1]) && is_hex_digit(p[2])) | |
| 65 | + QUtil::is_hex_digit(p[1]) && QUtil::is_hex_digit(p[2])) | |
| 121 | 66 | { |
| 122 | 67 | char num[3]; |
| 123 | 68 | num[0] = p[1]; |
| ... | ... | @@ -153,7 +98,7 @@ QPDFTokenizer::resolveLiteral() |
| 153 | 98 | } |
| 154 | 99 | val = nval; |
| 155 | 100 | } |
| 156 | - else if (is_number(val)) | |
| 101 | + else if (QUtil::is_number(val.c_str())) | |
| 157 | 102 | { |
| 158 | 103 | if (val.find('.') != std::string::npos) |
| 159 | 104 | { |
| ... | ... | @@ -447,7 +392,7 @@ QPDFTokenizer::presentCharacter(char ch) |
| 447 | 392 | } |
| 448 | 393 | val = nval; |
| 449 | 394 | } |
| 450 | - else if (is_hex_digit(ch)) | |
| 395 | + else if (QUtil::is_hex_digit(ch)) | |
| 451 | 396 | { |
| 452 | 397 | val += ch; |
| 453 | 398 | } |
| ... | ... | @@ -554,7 +499,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, |
| 554 | 499 | } |
| 555 | 500 | else |
| 556 | 501 | { |
| 557 | - if (is_space(static_cast<unsigned char>(ch)) && | |
| 502 | + if (QUtil::is_space(static_cast<unsigned char>(ch)) && | |
| 558 | 503 | (input->getLastOffset() == offset)) |
| 559 | 504 | { |
| 560 | 505 | ++offset; | ... | ... |
libqpdf/QUtil.cc
| ... | ... | @@ -456,3 +456,63 @@ QUtil::srandom(unsigned int seed) |
| 456 | 456 | srand(seed); |
| 457 | 457 | #endif |
| 458 | 458 | } |
| 459 | + | |
| 460 | +bool | |
| 461 | +QUtil::is_hex_digit(char ch) | |
| 462 | +{ | |
| 463 | + return (strchr("0123456789abcdefABCDEF", ch) != 0); | |
| 464 | +} | |
| 465 | + | |
| 466 | +bool | |
| 467 | +QUtil::is_space(char ch) | |
| 468 | +{ | |
| 469 | + return (strchr(" \f\n\r\t\v", ch) != 0); | |
| 470 | +} | |
| 471 | + | |
| 472 | +bool | |
| 473 | +QUtil::is_digit(char ch) | |
| 474 | +{ | |
| 475 | + return ((ch >= '0') && (ch <= '9')); | |
| 476 | +} | |
| 477 | + | |
| 478 | +bool | |
| 479 | +QUtil::is_number(char const* p) | |
| 480 | +{ | |
| 481 | + // ^[\+\-]?(\.\d+|\d+(\.\d+)?)$ | |
| 482 | + if (! *p) | |
| 483 | + { | |
| 484 | + return false; | |
| 485 | + } | |
| 486 | + if ((*p == '-') || (*p == '+')) | |
| 487 | + { | |
| 488 | + ++p; | |
| 489 | + } | |
| 490 | + bool found_dot = false; | |
| 491 | + bool found_digit = false; | |
| 492 | + for (; *p; ++p) | |
| 493 | + { | |
| 494 | + if (*p == '.') | |
| 495 | + { | |
| 496 | + if (found_dot) | |
| 497 | + { | |
| 498 | + // only one dot | |
| 499 | + return false; | |
| 500 | + } | |
| 501 | + if (! *(p+1)) | |
| 502 | + { | |
| 503 | + // dot can't be last | |
| 504 | + return false; | |
| 505 | + } | |
| 506 | + found_dot = true; | |
| 507 | + } | |
| 508 | + else if (QUtil::is_digit(*p)) | |
| 509 | + { | |
| 510 | + found_digit = true; | |
| 511 | + } | |
| 512 | + else | |
| 513 | + { | |
| 514 | + return false; | |
| 515 | + } | |
| 516 | + } | |
| 517 | + return found_digit; | |
| 518 | +} | ... | ... |