Commit e4fe0d5cf57d1d5b7d34492ffb616746ecd6ae35
1 parent
a5d2e887
Refactor QPDFTokenizer::inHexstring
Showing
3 changed files
with
57 additions
and
20 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -210,6 +210,7 @@ class QPDFTokenizer | @@ -210,6 +210,7 @@ class QPDFTokenizer | ||
| 210 | st_gt, | 210 | st_gt, |
| 211 | st_literal, | 211 | st_literal, |
| 212 | st_in_hexstring, | 212 | st_in_hexstring, |
| 213 | + st_in_hexstring_2nd, | ||
| 213 | st_inline_image, | 214 | st_inline_image, |
| 214 | st_token_ready | 215 | st_token_ready |
| 215 | }; | 216 | }; |
| @@ -217,6 +218,7 @@ class QPDFTokenizer | @@ -217,6 +218,7 @@ class QPDFTokenizer | ||
| 217 | void handleCharacter(char); | 218 | void handleCharacter(char); |
| 218 | void inCharCode(char); | 219 | void inCharCode(char); |
| 219 | void inHexstring(char); | 220 | void inHexstring(char); |
| 221 | + void inHexstring2nd(char); | ||
| 220 | void inString(char); | 222 | void inString(char); |
| 221 | 223 | ||
| 222 | void reset(); | 224 | void reset(); |
| @@ -238,6 +240,7 @@ class QPDFTokenizer | @@ -238,6 +240,7 @@ class QPDFTokenizer | ||
| 238 | 240 | ||
| 239 | // State for strings | 241 | // State for strings |
| 240 | int string_depth; | 242 | int string_depth; |
| 243 | + int char_code; | ||
| 241 | char bs_num_register[4]; | 244 | char bs_num_register[4]; |
| 242 | }; | 245 | }; |
| 243 | 246 |
libqpdf/QPDFTokenizer.cc
| @@ -442,6 +442,10 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -442,6 +442,10 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 442 | inHexstring(ch); | 442 | inHexstring(ch); |
| 443 | return; | 443 | return; |
| 444 | 444 | ||
| 445 | + case (st_in_hexstring_2nd): | ||
| 446 | + inHexstring2nd(ch); | ||
| 447 | + return; | ||
| 448 | + | ||
| 445 | default: | 449 | default: |
| 446 | throw std::logic_error( | 450 | throw std::logic_error( |
| 447 | "INTERNAL ERROR: invalid state while reading token"); | 451 | "INTERNAL ERROR: invalid state while reading token"); |
| @@ -451,29 +455,25 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -451,29 +455,25 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 451 | void | 455 | void |
| 452 | QPDFTokenizer::inHexstring(char ch) | 456 | QPDFTokenizer::inHexstring(char ch) |
| 453 | { | 457 | { |
| 454 | - if (ch == '>') { | 458 | + if ('0' <= ch && ch <= '9') { |
| 459 | + this->char_code = 16 * (int(ch) - int('0')); | ||
| 460 | + this->state = st_in_hexstring_2nd; | ||
| 461 | + | ||
| 462 | + } else if ('A' <= ch && ch <= 'F') { | ||
| 463 | + this->char_code = 16 * (10 + int(ch) - int('A')); | ||
| 464 | + this->state = st_in_hexstring_2nd; | ||
| 465 | + | ||
| 466 | + } else if ('a' <= ch && ch <= 'f') { | ||
| 467 | + this->char_code = 16 * (10 + int(ch) - int('a')); | ||
| 468 | + this->state = st_in_hexstring_2nd; | ||
| 469 | + | ||
| 470 | + } else if (ch == '>') { | ||
| 455 | this->type = tt_string; | 471 | this->type = tt_string; |
| 456 | this->state = st_token_ready; | 472 | this->state = st_token_ready; |
| 457 | - if (this->val.length() % 2) { | ||
| 458 | - // PDF spec says odd hexstrings have implicit | ||
| 459 | - // trailing 0. | ||
| 460 | - this->val += '0'; | ||
| 461 | - } | ||
| 462 | - char num[3]; | ||
| 463 | - num[2] = '\0'; | ||
| 464 | - std::string nval; | ||
| 465 | - for (unsigned int i = 0; i < this->val.length(); i += 2) { | ||
| 466 | - num[0] = this->val.at(i); | ||
| 467 | - num[1] = this->val.at(i + 1); | ||
| 468 | - char nch = static_cast<char>(strtol(num, nullptr, 16)); | ||
| 469 | - nval += nch; | ||
| 470 | - } | ||
| 471 | - this->val.clear(); | ||
| 472 | - this->val += nval; | ||
| 473 | - } else if (QUtil::is_hex_digit(ch)) { | ||
| 474 | - this->val += ch; | 473 | + |
| 475 | } else if (isSpace(ch)) { | 474 | } else if (isSpace(ch)) { |
| 476 | // ignore | 475 | // ignore |
| 476 | + | ||
| 477 | } else { | 477 | } else { |
| 478 | this->type = tt_bad; | 478 | this->type = tt_bad; |
| 479 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); | 479 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); |
| @@ -484,6 +484,39 @@ QPDFTokenizer::inHexstring(char ch) | @@ -484,6 +484,39 @@ QPDFTokenizer::inHexstring(char ch) | ||
| 484 | } | 484 | } |
| 485 | 485 | ||
| 486 | void | 486 | void |
| 487 | +QPDFTokenizer::inHexstring2nd(char ch) | ||
| 488 | +{ | ||
| 489 | + if ('0' <= ch && ch <= '9') { | ||
| 490 | + this->val += char(this->char_code + int(ch) - int('0')); | ||
| 491 | + this->state = st_in_hexstring; | ||
| 492 | + | ||
| 493 | + } else if ('A' <= ch && ch <= 'F') { | ||
| 494 | + this->val += char(this->char_code + 10 + int(ch) - int('A')); | ||
| 495 | + this->state = st_in_hexstring; | ||
| 496 | + | ||
| 497 | + } else if ('a' <= ch && ch <= 'f') { | ||
| 498 | + this->val += char(this->char_code + 10 + int(ch) - int('a')); | ||
| 499 | + this->state = st_in_hexstring; | ||
| 500 | + | ||
| 501 | + } else if (ch == '>') { | ||
| 502 | + // PDF spec says odd hexstrings have implicit trailing 0. | ||
| 503 | + this->val += char(this->char_code); | ||
| 504 | + this->type = tt_string; | ||
| 505 | + this->state = st_token_ready; | ||
| 506 | + | ||
| 507 | + } else if (isSpace(ch)) { | ||
| 508 | + // ignore | ||
| 509 | + | ||
| 510 | + } else { | ||
| 511 | + this->type = tt_bad; | ||
| 512 | + QTC::TC("qpdf", "QPDFTokenizer bad hexstring 2nd character"); | ||
| 513 | + this->error_message = | ||
| 514 | + std::string("invalid character (") + ch + ") in hexstring"; | ||
| 515 | + this->state = st_token_ready; | ||
| 516 | + } | ||
| 517 | +} | ||
| 518 | + | ||
| 519 | +void | ||
| 487 | QPDFTokenizer::inString(char ch) | 520 | QPDFTokenizer::inString(char ch) |
| 488 | { | 521 | { |
| 489 | switch (ch) { | 522 | switch (ch) { |
| @@ -526,7 +559,7 @@ void | @@ -526,7 +559,7 @@ void | ||
| 526 | QPDFTokenizer::inCharCode(char ch) | 559 | QPDFTokenizer::inCharCode(char ch) |
| 527 | { | 560 | { |
| 528 | size_t bs_num_count = strlen(this->bs_num_register); | 561 | size_t bs_num_count = strlen(this->bs_num_register); |
| 529 | - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | 562 | + bool ch_is_octal = ('0' <= ch && ch <= '7'); |
| 530 | if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { | 563 | if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { |
| 531 | // We've accumulated \ddd. PDF Spec says to ignore | 564 | // We've accumulated \ddd. PDF Spec says to ignore |
| 532 | // high-order overflow. | 565 | // high-order overflow. |
qpdf/qpdf.testcov
| @@ -66,6 +66,7 @@ QPDF can't find xref 0 | @@ -66,6 +66,7 @@ QPDF can't find xref 0 | ||
| 66 | QPDFTokenizer bad ) 0 | 66 | QPDFTokenizer bad ) 0 |
| 67 | QPDFTokenizer bad > 0 | 67 | QPDFTokenizer bad > 0 |
| 68 | QPDFTokenizer bad hexstring character 0 | 68 | QPDFTokenizer bad hexstring character 0 |
| 69 | +QPDFTokenizer bad hexstring 2nd character 0 | ||
| 69 | QPDFTokenizer null in name 0 | 70 | QPDFTokenizer null in name 0 |
| 70 | QPDFTokenizer bad name 0 | 71 | QPDFTokenizer bad name 0 |
| 71 | QPDF_Stream invalid filter 0 | 72 | QPDF_Stream invalid filter 0 |