Commit 98d9ae51fc4e1a6967b52e7708f6ddc66c684276

Authored by m-holger
1 parent 320245e0

Integrate JSONParser::decode_string into getToken

Showing 1 changed file with 30 additions and 87 deletions
libqpdf/JSON.cc
@@ -616,12 +616,9 @@ namespace @@ -616,12 +616,9 @@ namespace
616 void getToken(); 616 void getToken();
617 void handleToken(); 617 void handleToken();
618 void numberError(); 618 void numberError();
619 - static std::string  
620 - decode_string(std::string const& json, qpdf_offset_t offset);  
621 static void handle_u_code( 619 static void handle_u_code(
622 - char const* s, 620 + unsigned long codepoint,
623 qpdf_offset_t offset, 621 qpdf_offset_t offset,
624 - qpdf_offset_t i,  
625 unsigned long& high_surrogate, 622 unsigned long& high_surrogate,
626 qpdf_offset_t& high_offset, 623 qpdf_offset_t& high_offset,
627 std::string& result); 624 std::string& result);
@@ -680,6 +677,7 @@ namespace @@ -680,6 +677,7 @@ namespace
680 size_t bytes; 677 size_t bytes;
681 char const* p; 678 char const* p;
682 qpdf_offset_t u_count; 679 qpdf_offset_t u_count;
  680 + unsigned long u_value{0};
683 qpdf_offset_t offset; 681 qpdf_offset_t offset;
684 bool done; 682 bool done;
685 std::string token; 683 std::string token;
@@ -693,22 +691,15 @@ namespace @@ -693,22 +691,15 @@ namespace
693 691
694 void 692 void
695 JSONParser::handle_u_code( 693 JSONParser::handle_u_code(
696 - char const* s, 694 + unsigned long codepoint,
697 qpdf_offset_t offset, 695 qpdf_offset_t offset,
698 - qpdf_offset_t i,  
699 unsigned long& high_surrogate, 696 unsigned long& high_surrogate,
700 qpdf_offset_t& high_offset, 697 qpdf_offset_t& high_offset,
701 std::string& result) 698 std::string& result)
702 { 699 {
703 - std::string hex = QUtil::hex_decode(std::string(s + i + 1, s + i + 5));  
704 - unsigned char high = static_cast<unsigned char>(hex.at(0));  
705 - unsigned char low = static_cast<unsigned char>(hex.at(1));  
706 - unsigned long codepoint = high;  
707 - codepoint <<= 8;  
708 - codepoint += low;  
709 if ((codepoint & 0xFC00) == 0xD800) { 700 if ((codepoint & 0xFC00) == 0xD800) {
710 // high surrogate 701 // high surrogate
711 - qpdf_offset_t new_high_offset = offset + i; 702 + qpdf_offset_t new_high_offset = offset;
712 if (high_offset) { 703 if (high_offset) {
713 QTC::TC("libtests", "JSON 16 high high"); 704 QTC::TC("libtests", "JSON 16 high high");
714 throw std::runtime_error( 705 throw std::runtime_error(
@@ -721,10 +712,10 @@ JSONParser::handle_u_code( @@ -721,10 +712,10 @@ JSONParser::handle_u_code(
721 high_surrogate = codepoint; 712 high_surrogate = codepoint;
722 } else if ((codepoint & 0xFC00) == 0xDC00) { 713 } else if ((codepoint & 0xFC00) == 0xDC00) {
723 // low surrogate 714 // low surrogate
724 - if (offset + i != (high_offset + 6)) { 715 + if (offset != (high_offset + 6)) {
725 QTC::TC("libtests", "JSON 16 low not after high"); 716 QTC::TC("libtests", "JSON 16 low not after high");
726 throw std::runtime_error( 717 throw std::runtime_error(
727 - "JSON: offset " + std::to_string(offset + i) + 718 + "JSON: offset " + std::to_string(offset) +
728 ": UTF-16 low surrogate found not immediately after high" 719 ": UTF-16 low surrogate found not immediately after high"
729 " surrogate"); 720 " surrogate");
730 } 721 }
@@ -737,74 +728,6 @@ JSONParser::handle_u_code( @@ -737,74 +728,6 @@ JSONParser::handle_u_code(
737 } 728 }
738 } 729 }
739 730
740 -std::string  
741 -JSONParser::decode_string(std::string const& str, qpdf_offset_t offset)  
742 -{  
743 - // The string has already been validated when this private method  
744 - // is called, so errors are logic errors instead of runtime  
745 - // errors.  
746 - size_t len = str.length();  
747 - char const* s = str.c_str();  
748 -  
749 - // Keep track of UTF-16 surrogate pairs.  
750 - unsigned long high_surrogate = 0;  
751 - qpdf_offset_t high_offset = 0;  
752 - std::string result;  
753 - qpdf_offset_t olen = toO(len);  
754 - for (qpdf_offset_t i = 0; i < olen; ++i) {  
755 - if (s[i] == '\\') {  
756 - if (i + 1 >= olen) {  
757 - throw std::logic_error("JSON parse: nothing after \\");  
758 - }  
759 - char ch = s[++i];  
760 - switch (ch) {  
761 - case '\\':  
762 - case '\"':  
763 - case '/':  
764 - // \/ is allowed in json input, but so is /, so we  
765 - // don't map / to \/ in output.  
766 - result.append(1, ch);  
767 - break;  
768 - case 'b':  
769 - result.append(1, '\b');  
770 - break;  
771 - case 'f':  
772 - result.append(1, '\f');  
773 - break;  
774 - case 'n':  
775 - result.append(1, '\n');  
776 - break;  
777 - case 'r':  
778 - result.append(1, '\r');  
779 - break;  
780 - case 't':  
781 - result.append(1, '\t');  
782 - break;  
783 - case 'u':  
784 - if (i + 4 >= olen) {  
785 - throw std::logic_error(  
786 - "JSON parse: not enough characters after \\u");  
787 - }  
788 - handle_u_code(  
789 - s, offset, i, high_surrogate, high_offset, result);  
790 - i += 4;  
791 - break;  
792 - default:  
793 - break;  
794 - }  
795 - } else {  
796 - result.append(1, s[i]);  
797 - }  
798 - }  
799 - if (high_offset) {  
800 - QTC::TC("libtests", "JSON 16 dangling high");  
801 - throw std::runtime_error(  
802 - "JSON: offset " + std::to_string(high_offset) +  
803 - ": UTF-16 high surrogate not followed by low surrogate");  
804 - }  
805 - return result;  
806 -}  
807 -  
808 void 731 void
809 JSONParser::numberError() 732 JSONParser::numberError()
810 { 733 {
@@ -850,6 +773,11 @@ JSONParser::getToken() @@ -850,6 +773,11 @@ JSONParser::getToken()
850 enum { append, ignore, reread } action = append; 773 enum { append, ignore, reread } action = append;
851 bool ready = false; 774 bool ready = false;
852 token.clear(); 775 token.clear();
  776 +
  777 + // Keep track of UTF-16 surrogate pairs.
  778 + unsigned long high_surrogate = 0;
  779 + qpdf_offset_t high_offset = 0;
  780 +
853 while (!done) { 781 while (!done) {
854 if (p == (buf + bytes)) { 782 if (p == (buf + bytes)) {
855 p = buf; 783 p = buf;
@@ -1046,7 +974,13 @@ JSONParser::getToken() @@ -1046,7 +974,13 @@ JSONParser::getToken()
1046 974
1047 case ls_string: 975 case ls_string:
1048 if (*p == '"') { 976 if (*p == '"') {
1049 - token = decode_string(token, token_start); 977 + if (high_offset) {
  978 + QTC::TC("libtests", "JSON 16 dangling high");
  979 + throw std::runtime_error(
  980 + "JSON: offset " + std::to_string(high_offset) +
  981 + ": UTF-16 high surrogate not followed by low "
  982 + "surrogate");
  983 + }
1050 action = ignore; 984 action = ignore;
1051 ready = true; 985 ready = true;
1052 } else if (*p == '\\') { 986 } else if (*p == '\\') {
@@ -1060,7 +994,6 @@ JSONParser::getToken() @@ -1060,7 +994,6 @@ JSONParser::getToken()
1060 lex_state = ls_string; 994 lex_state = ls_string;
1061 switch (*p) { 995 switch (*p) {
1062 case '\\': 996 case '\\':
1063 - token += "\\\\";  
1064 case '\"': 997 case '\"':
1065 case '/': 998 case '/':
1066 // \/ is allowed in json input, but so is /, so we 999 // \/ is allowed in json input, but so is /, so we
@@ -1083,9 +1016,9 @@ JSONParser::getToken() @@ -1083,9 +1016,9 @@ JSONParser::getToken()
1083 token += '\t'; 1016 token += '\t';
1084 break; 1017 break;
1085 case 'u': 1018 case 'u':
1086 - token += "\\u";  
1087 lex_state = ls_u4; 1019 lex_state = ls_u4;
1088 u_count = 0; 1020 u_count = 0;
  1021 + u_value = 0;
1089 break; 1022 break;
1090 default: 1023 default:
1091 QTC::TC("libtests", "JSON parse backslash bad character"); 1024 QTC::TC("libtests", "JSON parse backslash bad character");
@@ -1097,13 +1030,23 @@ JSONParser::getToken() @@ -1097,13 +1030,23 @@ JSONParser::getToken()
1097 break; 1030 break;
1098 1031
1099 case ls_u4: 1032 case ls_u4:
1100 - if (!QUtil::is_hex_digit(*p)) { 1033 + using ui = unsigned int;
  1034 + action = ignore;
  1035 + if ('0' <= *p && *p <= '9') {
  1036 + u_value = 16 * u_value + (ui(*p) - ui('0'));
  1037 + } else if ('a' <= *p && *p <= 'f') {
  1038 + u_value = 16 * u_value + (10 + ui(*p) - ui('a'));
  1039 + } else if ('A' <= *p && *p <= 'F') {
  1040 + u_value = 16 * u_value + (10 + ui(*p) - ui('A'));
  1041 + } else {
1101 QTC::TC("libtests", "JSON parse bad hex after u"); 1042 QTC::TC("libtests", "JSON parse bad hex after u");
1102 throw std::runtime_error( 1043 throw std::runtime_error(
1103 "JSON: offset " + std::to_string(offset - u_count - 1) + 1044 "JSON: offset " + std::to_string(offset - u_count - 1) +
1104 ": \\u must be followed by four hex digits"); 1045 ": \\u must be followed by four hex digits");
1105 } 1046 }
1106 if (++u_count == 4) { 1047 if (++u_count == 4) {
  1048 + handle_u_code(
  1049 + u_value, offset - 5, high_surrogate, high_offset, token);
1107 lex_state = ls_string; 1050 lex_state = ls_string;
1108 } 1051 }
1109 break; 1052 break;