Commit e4fe0d5cf57d1d5b7d34492ffb616746ecd6ae35
1 parent
a5d2e887
Refactor QPDFTokenizer::inHexstring
Showing
3 changed files
with
57 additions
and
20 deletions
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -210,6 +210,7 @@ class QPDFTokenizer |
| 210 | 210 | st_gt, |
| 211 | 211 | st_literal, |
| 212 | 212 | st_in_hexstring, |
| 213 | + st_in_hexstring_2nd, | |
| 213 | 214 | st_inline_image, |
| 214 | 215 | st_token_ready |
| 215 | 216 | }; |
| ... | ... | @@ -217,6 +218,7 @@ class QPDFTokenizer |
| 217 | 218 | void handleCharacter(char); |
| 218 | 219 | void inCharCode(char); |
| 219 | 220 | void inHexstring(char); |
| 221 | + void inHexstring2nd(char); | |
| 220 | 222 | void inString(char); |
| 221 | 223 | |
| 222 | 224 | void reset(); |
| ... | ... | @@ -238,6 +240,7 @@ class QPDFTokenizer |
| 238 | 240 | |
| 239 | 241 | // State for strings |
| 240 | 242 | int string_depth; |
| 243 | + int char_code; | |
| 241 | 244 | char bs_num_register[4]; |
| 242 | 245 | }; |
| 243 | 246 | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -442,6 +442,10 @@ QPDFTokenizer::handleCharacter(char ch) |
| 442 | 442 | inHexstring(ch); |
| 443 | 443 | return; |
| 444 | 444 | |
| 445 | + case (st_in_hexstring_2nd): | |
| 446 | + inHexstring2nd(ch); | |
| 447 | + return; | |
| 448 | + | |
| 445 | 449 | default: |
| 446 | 450 | throw std::logic_error( |
| 447 | 451 | "INTERNAL ERROR: invalid state while reading token"); |
| ... | ... | @@ -451,29 +455,25 @@ QPDFTokenizer::handleCharacter(char ch) |
| 451 | 455 | void |
| 452 | 456 | QPDFTokenizer::inHexstring(char ch) |
| 453 | 457 | { |
| 454 | - if (ch == '>') { | |
| 458 | + if ('0' <= ch && ch <= '9') { | |
| 459 | + this->char_code = 16 * (int(ch) - int('0')); | |
| 460 | + this->state = st_in_hexstring_2nd; | |
| 461 | + | |
| 462 | + } else if ('A' <= ch && ch <= 'F') { | |
| 463 | + this->char_code = 16 * (10 + int(ch) - int('A')); | |
| 464 | + this->state = st_in_hexstring_2nd; | |
| 465 | + | |
| 466 | + } else if ('a' <= ch && ch <= 'f') { | |
| 467 | + this->char_code = 16 * (10 + int(ch) - int('a')); | |
| 468 | + this->state = st_in_hexstring_2nd; | |
| 469 | + | |
| 470 | + } else if (ch == '>') { | |
| 455 | 471 | this->type = tt_string; |
| 456 | 472 | this->state = st_token_ready; |
| 457 | - if (this->val.length() % 2) { | |
| 458 | - // PDF spec says odd hexstrings have implicit | |
| 459 | - // trailing 0. | |
| 460 | - this->val += '0'; | |
| 461 | - } | |
| 462 | - char num[3]; | |
| 463 | - num[2] = '\0'; | |
| 464 | - std::string nval; | |
| 465 | - for (unsigned int i = 0; i < this->val.length(); i += 2) { | |
| 466 | - num[0] = this->val.at(i); | |
| 467 | - num[1] = this->val.at(i + 1); | |
| 468 | - char nch = static_cast<char>(strtol(num, nullptr, 16)); | |
| 469 | - nval += nch; | |
| 470 | - } | |
| 471 | - this->val.clear(); | |
| 472 | - this->val += nval; | |
| 473 | - } else if (QUtil::is_hex_digit(ch)) { | |
| 474 | - this->val += ch; | |
| 473 | + | |
| 475 | 474 | } else if (isSpace(ch)) { |
| 476 | 475 | // ignore |
| 476 | + | |
| 477 | 477 | } else { |
| 478 | 478 | this->type = tt_bad; |
| 479 | 479 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); |
| ... | ... | @@ -484,6 +484,39 @@ QPDFTokenizer::inHexstring(char ch) |
| 484 | 484 | } |
| 485 | 485 | |
| 486 | 486 | void |
| 487 | +QPDFTokenizer::inHexstring2nd(char ch) | |
| 488 | +{ | |
| 489 | + if ('0' <= ch && ch <= '9') { | |
| 490 | + this->val += char(this->char_code + int(ch) - int('0')); | |
| 491 | + this->state = st_in_hexstring; | |
| 492 | + | |
| 493 | + } else if ('A' <= ch && ch <= 'F') { | |
| 494 | + this->val += char(this->char_code + 10 + int(ch) - int('A')); | |
| 495 | + this->state = st_in_hexstring; | |
| 496 | + | |
| 497 | + } else if ('a' <= ch && ch <= 'f') { | |
| 498 | + this->val += char(this->char_code + 10 + int(ch) - int('a')); | |
| 499 | + this->state = st_in_hexstring; | |
| 500 | + | |
| 501 | + } else if (ch == '>') { | |
| 502 | + // PDF spec says odd hexstrings have implicit trailing 0. | |
| 503 | + this->val += char(this->char_code); | |
| 504 | + this->type = tt_string; | |
| 505 | + this->state = st_token_ready; | |
| 506 | + | |
| 507 | + } else if (isSpace(ch)) { | |
| 508 | + // ignore | |
| 509 | + | |
| 510 | + } else { | |
| 511 | + this->type = tt_bad; | |
| 512 | + QTC::TC("qpdf", "QPDFTokenizer bad hexstring 2nd character"); | |
| 513 | + this->error_message = | |
| 514 | + std::string("invalid character (") + ch + ") in hexstring"; | |
| 515 | + this->state = st_token_ready; | |
| 516 | + } | |
| 517 | +} | |
| 518 | + | |
| 519 | +void | |
| 487 | 520 | QPDFTokenizer::inString(char ch) |
| 488 | 521 | { |
| 489 | 522 | switch (ch) { |
| ... | ... | @@ -526,7 +559,7 @@ void |
| 526 | 559 | QPDFTokenizer::inCharCode(char ch) |
| 527 | 560 | { |
| 528 | 561 | size_t bs_num_count = strlen(this->bs_num_register); |
| 529 | - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | |
| 562 | + bool ch_is_octal = ('0' <= ch && ch <= '7'); | |
| 530 | 563 | if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { |
| 531 | 564 | // We've accumulated \ddd. PDF Spec says to ignore |
| 532 | 565 | // high-order overflow. | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -66,6 +66,7 @@ QPDF can't find xref 0 |
| 66 | 66 | QPDFTokenizer bad ) 0 |
| 67 | 67 | QPDFTokenizer bad > 0 |
| 68 | 68 | QPDFTokenizer bad hexstring character 0 |
| 69 | +QPDFTokenizer bad hexstring 2nd character 0 | |
| 69 | 70 | QPDFTokenizer null in name 0 |
| 70 | 71 | QPDFTokenizer bad name 0 |
| 71 | 72 | QPDF_Stream invalid filter 0 | ... | ... |