Commit ef6f5a97f42a6d59abc9ca81ae4e9ab3b7168f55
Committed by
GitHub
Merge pull request #936 from m-holger/hex
Refactor QUtil::hex_decode
Showing
4 changed files
with
39 additions
and
65 deletions
include/qpdf/QUtil.hh
| ... | ... | @@ -223,6 +223,11 @@ namespace QUtil |
| 223 | 223 | QPDF_DLL |
| 224 | 224 | std::string hex_decode(std::string const&); |
| 225 | 225 | |
| 226 | + // Decode a single hex digit into a char in the range 0 <= char < 16. Return | |
| 227 | + // a char >= 16 if digit is not a valid hex digit. | |
| 228 | + QPDF_DLL | |
| 229 | + inline constexpr char hex_decode_char(char digit) noexcept; | |
| 230 | + | |
| 226 | 231 | // Set stdin, stdout to binary mode |
| 227 | 232 | QPDF_DLL |
| 228 | 233 | void binary_stdout(); |
| ... | ... | @@ -550,8 +555,7 @@ namespace QUtil |
| 550 | 555 | inline bool |
| 551 | 556 | QUtil::is_hex_digit(char ch) |
| 552 | 557 | { |
| 553 | - return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || | |
| 554 | - ('A' <= ch && ch <= 'F'); | |
| 558 | + return hex_decode_char(ch) < '\20'; | |
| 555 | 559 | } |
| 556 | 560 | |
| 557 | 561 | inline bool |
| ... | ... | @@ -603,4 +607,13 @@ QUtil::hex_encode_char(char c) |
| 603 | 607 | '#', hexchars[static_cast<unsigned char>(c) >> 4], hexchars[c & 0x0f]}; |
| 604 | 608 | } |
| 605 | 609 | |
| 610 | +inline constexpr char | |
| 611 | +QUtil::hex_decode_char(char digit) noexcept | |
| 612 | +{ | |
| 613 | + return digit <= '9' && digit >= '0' | |
| 614 | + ? char(digit - '0') | |
| 615 | + : (digit >= 'a' ? char(digit - 'a' + 10) | |
| 616 | + : (digit >= 'A' ? char(digit - 'A' + 10) : '\20')); | |
| 617 | +} | |
| 618 | + | |
| 606 | 619 | #endif // QUTIL_HH | ... | ... |
libqpdf/JSON.cc
| ... | ... | @@ -1121,12 +1121,8 @@ JSONParser::getToken() |
| 1121 | 1121 | |
| 1122 | 1122 | case ls_u4: |
| 1123 | 1123 | using ui = unsigned int; |
| 1124 | - if ('0' <= *p && *p <= '9') { | |
| 1125 | - u_value = 16 * u_value + (ui(*p) - ui('0')); | |
| 1126 | - } else if ('a' <= *p && *p <= 'f') { | |
| 1127 | - u_value = 16 * u_value + (10 + ui(*p) - ui('a')); | |
| 1128 | - } else if ('A' <= *p && *p <= 'F') { | |
| 1129 | - u_value = 16 * u_value + (10 + ui(*p) - ui('A')); | |
| 1124 | + if (ui val = ui(QUtil::hex_decode_char(*p)); val < 16) { | |
| 1125 | + u_value = 16 * u_value + val; | |
| 1130 | 1126 | } else { |
| 1131 | 1127 | tokenError(); |
| 1132 | 1128 | } | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -449,18 +449,9 @@ QPDFTokenizer::inNameHex1(char ch) |
| 449 | 449 | { |
| 450 | 450 | this->hex_char = ch; |
| 451 | 451 | |
| 452 | - if ('0' <= ch && ch <= '9') { | |
| 453 | - this->char_code = 16 * (int(ch) - int('0')); | |
| 452 | + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { | |
| 453 | + this->char_code = int(hval) << 4; | |
| 454 | 454 | this->state = st_name_hex2; |
| 455 | - | |
| 456 | - } else if ('A' <= ch && ch <= 'F') { | |
| 457 | - this->char_code = 16 * (10 + int(ch) - int('A')); | |
| 458 | - this->state = st_name_hex2; | |
| 459 | - | |
| 460 | - } else if ('a' <= ch && ch <= 'f') { | |
| 461 | - this->char_code = 16 * (10 + int(ch) - int('a')); | |
| 462 | - this->state = st_name_hex2; | |
| 463 | - | |
| 464 | 455 | } else { |
| 465 | 456 | QTC::TC("qpdf", "QPDFTokenizer bad name 1"); |
| 466 | 457 | this->error_message = "name with stray # will not work with PDF >= 1.2"; |
| ... | ... | @@ -475,15 +466,8 @@ QPDFTokenizer::inNameHex1(char ch) |
| 475 | 466 | void |
| 476 | 467 | QPDFTokenizer::inNameHex2(char ch) |
| 477 | 468 | { |
| 478 | - if ('0' <= ch && ch <= '9') { | |
| 479 | - this->char_code += int(ch) - int('0'); | |
| 480 | - | |
| 481 | - } else if ('A' <= ch && ch <= 'F') { | |
| 482 | - this->char_code += 10 + int(ch) - int('A'); | |
| 483 | - | |
| 484 | - } else if ('a' <= ch && ch <= 'f') { | |
| 485 | - this->char_code += 10 + int(ch) - int('a'); | |
| 486 | - | |
| 469 | + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { | |
| 470 | + this->char_code |= int(hval); | |
| 487 | 471 | } else { |
| 488 | 472 | QTC::TC("qpdf", "QPDFTokenizer bad name 2"); |
| 489 | 473 | this->error_message = "name with stray # will not work with PDF >= 1.2"; |
| ... | ... | @@ -675,16 +659,8 @@ QPDFTokenizer::inLiteral(char ch) |
| 675 | 659 | void |
| 676 | 660 | QPDFTokenizer::inHexstring(char ch) |
| 677 | 661 | { |
| 678 | - if ('0' <= ch && ch <= '9') { | |
| 679 | - this->char_code = 16 * (int(ch) - int('0')); | |
| 680 | - this->state = st_in_hexstring_2nd; | |
| 681 | - | |
| 682 | - } else if ('A' <= ch && ch <= 'F') { | |
| 683 | - this->char_code = 16 * (10 + int(ch) - int('A')); | |
| 684 | - this->state = st_in_hexstring_2nd; | |
| 685 | - | |
| 686 | - } else if ('a' <= ch && ch <= 'f') { | |
| 687 | - this->char_code = 16 * (10 + int(ch) - int('a')); | |
| 662 | + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { | |
| 663 | + this->char_code = int(hval) << 4; | |
| 688 | 664 | this->state = st_in_hexstring_2nd; |
| 689 | 665 | |
| 690 | 666 | } else if (ch == '>') { |
| ... | ... | @@ -706,16 +682,8 @@ QPDFTokenizer::inHexstring(char ch) |
| 706 | 682 | void |
| 707 | 683 | QPDFTokenizer::inHexstring2nd(char ch) |
| 708 | 684 | { |
| 709 | - if ('0' <= ch && ch <= '9') { | |
| 710 | - this->val += char(this->char_code + int(ch) - int('0')); | |
| 711 | - this->state = st_in_hexstring; | |
| 712 | - | |
| 713 | - } else if ('A' <= ch && ch <= 'F') { | |
| 714 | - this->val += char(this->char_code + 10 + int(ch) - int('A')); | |
| 715 | - this->state = st_in_hexstring; | |
| 716 | - | |
| 717 | - } else if ('a' <= ch && ch <= 'f') { | |
| 718 | - this->val += char(this->char_code + 10 + int(ch) - int('a')); | |
| 685 | + if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { | |
| 686 | + this->val += char(this->char_code) | hval; | |
| 719 | 687 | this->state = st_in_hexstring; |
| 720 | 688 | |
| 721 | 689 | } else if (ch == '>') { | ... | ... |
libqpdf/QUtil.cc
| ... | ... | @@ -783,28 +783,25 @@ std::string |
| 783 | 783 | QUtil::hex_decode(std::string const& input) |
| 784 | 784 | { |
| 785 | 785 | std::string result; |
| 786 | - size_t pos = 0; | |
| 786 | + // We know result.size() <= 0.5 * input.size() + 1. However, reserving | |
| 787 | + // string space for this upper bound has a negative impact. | |
| 788 | + bool first = true; | |
| 789 | + char decoded; | |
| 787 | 790 | for (auto ch: input) { |
| 788 | - bool skip = false; | |
| 789 | - if ((ch >= 'A') && (ch <= 'F')) { | |
| 790 | - ch = QIntC::to_char(ch - 'A' + 10); | |
| 791 | - } else if ((ch >= 'a') && (ch <= 'f')) { | |
| 792 | - ch = QIntC::to_char(ch - 'a' + 10); | |
| 793 | - } else if ((ch >= '0') && (ch <= '9')) { | |
| 794 | - ch = QIntC::to_char(ch - '0'); | |
| 795 | - } else { | |
| 796 | - skip = true; | |
| 797 | - } | |
| 798 | - if (!skip) { | |
| 799 | - if (pos == 0) { | |
| 800 | - result.push_back(static_cast<char>(ch << 4)); | |
| 801 | - pos = 1; | |
| 791 | + ch = hex_decode_char(ch); | |
| 792 | + if (ch < '\20') { | |
| 793 | + if (first) { | |
| 794 | + decoded = static_cast<char>(ch << 4); | |
| 795 | + first = false; | |
| 802 | 796 | } else { |
| 803 | - result[result.length() - 1] |= ch; | |
| 804 | - pos = 0; | |
| 797 | + result.push_back(decoded | ch); | |
| 798 | + first = true; | |
| 805 | 799 | } |
| 806 | 800 | } |
| 807 | 801 | } |
| 802 | + if (!first) { | |
| 803 | + result.push_back(decoded); | |
| 804 | + } | |
| 808 | 805 | return result; |
| 809 | 806 | } |
| 810 | 807 | ... | ... |