Commit ee32235f54884247f6117fc0fbdd462a4e38ac1f
1 parent
f5b7448a
In JSONParser::getToken handle legal control chars early
Also, reject them in strings.
Showing
11 changed files
with
305 additions
and
284 deletions
libqpdf/JSON.cc
| @@ -723,10 +723,11 @@ JSONParser::handle_u_code( | @@ -723,10 +723,11 @@ JSONParser::handle_u_code( | ||
| 723 | void | 723 | void |
| 724 | JSONParser::tokenError() | 724 | JSONParser::tokenError() |
| 725 | { | 725 | { |
| 726 | - if (bytes == 0) { | 726 | + if (done) { |
| 727 | QTC::TC("libtests", "JSON parse ls premature end of input"); | 727 | QTC::TC("libtests", "JSON parse ls premature end of input"); |
| 728 | throw std::runtime_error("JSON: premature end of input"); | 728 | throw std::runtime_error("JSON: premature end of input"); |
| 729 | } | 729 | } |
| 730 | + | ||
| 730 | if (lex_state == ls_u4) { | 731 | if (lex_state == ls_u4) { |
| 731 | QTC::TC("libtests", "JSON parse bad hex after u"); | 732 | QTC::TC("libtests", "JSON parse bad hex after u"); |
| 732 | throw std::runtime_error( | 733 | throw std::runtime_error( |
| @@ -737,6 +738,11 @@ JSONParser::tokenError() | @@ -737,6 +738,11 @@ JSONParser::tokenError() | ||
| 737 | throw std::runtime_error( | 738 | throw std::runtime_error( |
| 738 | "JSON: offset " + std::to_string(offset) + | 739 | "JSON: offset " + std::to_string(offset) + |
| 739 | ": keyword: unexpected character " + std::string(p, 1)); | 740 | ": keyword: unexpected character " + std::string(p, 1)); |
| 741 | + } else if (lex_state == ls_string) { | ||
| 742 | + QTC::TC("libtests", "JSON parse control char in string"); | ||
| 743 | + throw std::runtime_error( | ||
| 744 | + "JSON: offset " + std::to_string(offset) + | ||
| 745 | + ": control character in string (missing \"?)"); | ||
| 740 | } else if (lex_state == ls_backslash) { | 746 | } else if (lex_state == ls_backslash) { |
| 741 | QTC::TC("libtests", "JSON parse backslash bad character"); | 747 | QTC::TC("libtests", "JSON parse backslash bad character"); |
| 742 | throw std::runtime_error( | 748 | throw std::runtime_error( |
| @@ -779,6 +785,7 @@ JSONParser::tokenError() | @@ -779,6 +785,7 @@ JSONParser::tokenError() | ||
| 779 | "JSON: offset " + std::to_string(offset) + | 785 | "JSON: offset " + std::to_string(offset) + |
| 780 | ": numeric literal: unexpected character " + std::string(p, 1)); | 786 | ": numeric literal: unexpected character " + std::string(p, 1)); |
| 781 | } | 787 | } |
| 788 | + throw std::logic_error("JSON::tokenError : unhandled error"); | ||
| 782 | } | 789 | } |
| 783 | 790 | ||
| 784 | void | 791 | void |
| @@ -792,7 +799,7 @@ JSONParser::getToken() | @@ -792,7 +799,7 @@ JSONParser::getToken() | ||
| 792 | unsigned long high_surrogate = 0; | 799 | unsigned long high_surrogate = 0; |
| 793 | qpdf_offset_t high_offset = 0; | 800 | qpdf_offset_t high_offset = 0; |
| 794 | 801 | ||
| 795 | - while (!done) { | 802 | + while (true) { |
| 796 | if (p == (buf + bytes)) { | 803 | if (p == (buf + bytes)) { |
| 797 | p = buf; | 804 | p = buf; |
| 798 | bytes = is.read(buf, sizeof(buf)); | 805 | bytes = is.read(buf, sizeof(buf)); |
| @@ -808,307 +815,320 @@ JSONParser::getToken() | @@ -808,307 +815,320 @@ JSONParser::getToken() | ||
| 808 | // end the current token (unless we are still before the start | 815 | // end the current token (unless we are still before the start |
| 809 | // of the token). | 816 | // of the token). |
| 810 | if (lex_state == ls_top) { | 817 | if (lex_state == ls_top) { |
| 811 | - // Continue with token | 818 | + ++p; |
| 819 | + ++offset; | ||
| 812 | } else { | 820 | } else { |
| 813 | - // done | 821 | + break; |
| 814 | } | 822 | } |
| 823 | + | ||
| 815 | } else { | 824 | } else { |
| 816 | QTC::TC("libtests", "JSON parse null character"); | 825 | QTC::TC("libtests", "JSON parse null character"); |
| 817 | throw std::runtime_error( | 826 | throw std::runtime_error( |
| 818 | "JSON: control or null character at offset " + | 827 | "JSON: control or null character at offset " + |
| 819 | std::to_string(offset)); | 828 | std::to_string(offset)); |
| 820 | } | 829 | } |
| 821 | - } | ||
| 822 | - action = append; | ||
| 823 | - switch (lex_state) { | ||
| 824 | - case ls_top: | ||
| 825 | - token_start = offset; | ||
| 826 | - if (*p == '"') { | ||
| 827 | - lex_state = ls_string; | ||
| 828 | - action = ignore; | ||
| 829 | - } else if (QUtil::is_space(*p)) { | ||
| 830 | - action = ignore; | ||
| 831 | - } else if (*p == ',') { | ||
| 832 | - lex_state = ls_comma; | ||
| 833 | - action = ignore; | ||
| 834 | - ready = true; | ||
| 835 | - } else if (*p == ':') { | ||
| 836 | - lex_state = ls_colon; | ||
| 837 | - action = ignore; | ||
| 838 | - ready = true; | ||
| 839 | - } else if (*p == '{') { | ||
| 840 | - lex_state = ls_begin_dict; | ||
| 841 | - action = ignore; | ||
| 842 | - ready = true; | ||
| 843 | - } else if (*p == '}') { | ||
| 844 | - lex_state = ls_end_dict; | ||
| 845 | - action = ignore; | ||
| 846 | - ready = true; | ||
| 847 | - } else if (*p == '[') { | ||
| 848 | - lex_state = ls_begin_array; | ||
| 849 | - action = ignore; | ||
| 850 | - ready = true; | ||
| 851 | - } else if (*p == ']') { | ||
| 852 | - lex_state = ls_end_array; | ||
| 853 | - action = ignore; | ||
| 854 | - ready = true; | ||
| 855 | - } else if ((*p >= 'a') && (*p <= 'z')) { | ||
| 856 | - lex_state = ls_alpha; | ||
| 857 | - } else if (*p == '-') { | ||
| 858 | - lex_state = ls_number_minus; | ||
| 859 | - } else if ((*p >= '1') && (*p <= '9')) { | ||
| 860 | - lex_state = ls_number_before_point; | ||
| 861 | - } else if (*p == '0') { | ||
| 862 | - lex_state = ls_number_leading_zero; | ||
| 863 | - } else { | ||
| 864 | - QTC::TC("libtests", "JSON parse bad character"); | ||
| 865 | - throw std::runtime_error( | ||
| 866 | - "JSON: offset " + std::to_string(offset) + | ||
| 867 | - ": unexpected character " + std::string(p, 1)); | ||
| 868 | - } | ||
| 869 | - break; | ||
| 870 | - | ||
| 871 | - case ls_number_minus: | ||
| 872 | - if ((*p >= '1') && (*p <= '9')) { | ||
| 873 | - lex_state = ls_number_before_point; | ||
| 874 | - } else if (*p == '0') { | ||
| 875 | - lex_state = ls_number_leading_zero; | ||
| 876 | - } else { | ||
| 877 | - QTC::TC("libtests", "JSON parse number minus no digits"); | ||
| 878 | - throw std::runtime_error( | ||
| 879 | - "JSON: offset " + std::to_string(offset) + | ||
| 880 | - ": numeric literal: no digit after minus sign"); | ||
| 881 | - } | ||
| 882 | - break; | ||
| 883 | - | ||
| 884 | - case ls_number_leading_zero: | ||
| 885 | - if (*p == '.') { | ||
| 886 | - lex_state = ls_number_point; | ||
| 887 | - } else if (QUtil::is_space(*p)) { | ||
| 888 | - lex_state = ls_number; | ||
| 889 | - action = ignore; | ||
| 890 | - ready = true; | ||
| 891 | - } else if (strchr("{}[]:,", *p)) { | ||
| 892 | - lex_state = ls_number; | ||
| 893 | - action = reread; | ||
| 894 | - ready = true; | ||
| 895 | - } else if (*p == 'e' || *p == 'E') { | ||
| 896 | - lex_state = ls_number_e; | ||
| 897 | - } else { | ||
| 898 | - QTC::TC("libtests", "JSON parse leading zero"); | ||
| 899 | - throw std::runtime_error( | ||
| 900 | - "JSON: offset " + std::to_string(offset) + | ||
| 901 | - ": number with leading zero"); | ||
| 902 | - } | ||
| 903 | - break; | ||
| 904 | - | ||
| 905 | - case ls_number_before_point: | ||
| 906 | - if ((*p >= '0') && (*p <= '9')) { | ||
| 907 | - // continue | ||
| 908 | - } else if (*p == '.') { | ||
| 909 | - lex_state = ls_number_point; | ||
| 910 | - } else if (QUtil::is_space(*p)) { | ||
| 911 | - lex_state = ls_number; | ||
| 912 | - action = ignore; | ||
| 913 | - ready = true; | ||
| 914 | - } else if (strchr("{}[]:,", *p)) { | ||
| 915 | - lex_state = ls_number; | ||
| 916 | - action = reread; | ||
| 917 | - ready = true; | ||
| 918 | - } else if (*p == 'e' || *p == 'E') { | ||
| 919 | - lex_state = ls_number_e; | ||
| 920 | - } else { | ||
| 921 | - tokenError(); | ||
| 922 | - } | ||
| 923 | - break; | ||
| 924 | - | ||
| 925 | - case ls_number_point: | ||
| 926 | - if ((*p >= '0') && (*p <= '9')) { | ||
| 927 | - lex_state = ls_number_after_point; | ||
| 928 | - } else { | ||
| 929 | - tokenError(); | ||
| 930 | - } | ||
| 931 | - break; | ||
| 932 | - | ||
| 933 | - case ls_number_after_point: | ||
| 934 | - if ((*p >= '0') && (*p <= '9')) { | ||
| 935 | - // continue | ||
| 936 | - } else if (QUtil::is_space(*p)) { | ||
| 937 | - lex_state = ls_number; | ||
| 938 | - action = ignore; | ||
| 939 | - ready = true; | ||
| 940 | - } else if (strchr("{}[]:,", *p)) { | ||
| 941 | - lex_state = ls_number; | ||
| 942 | - action = reread; | ||
| 943 | - ready = true; | ||
| 944 | - } else if (*p == 'e' || *p == 'E') { | ||
| 945 | - lex_state = ls_number_e; | ||
| 946 | - } else { | ||
| 947 | - tokenError(); | ||
| 948 | - } | ||
| 949 | - break; | 830 | + } else { |
| 831 | + action = append; | ||
| 832 | + switch (lex_state) { | ||
| 833 | + case ls_top: | ||
| 834 | + token_start = offset; | ||
| 835 | + if (*p == '"') { | ||
| 836 | + lex_state = ls_string; | ||
| 837 | + action = ignore; | ||
| 838 | + } else if (*p == ' ') { | ||
| 839 | + action = ignore; | ||
| 840 | + } else if (*p == ',') { | ||
| 841 | + lex_state = ls_comma; | ||
| 842 | + action = ignore; | ||
| 843 | + ready = true; | ||
| 844 | + } else if (*p == ',') { | ||
| 845 | + lex_state = ls_comma; | ||
| 846 | + action = ignore; | ||
| 847 | + ready = true; | ||
| 848 | + } else if (*p == ':') { | ||
| 849 | + lex_state = ls_colon; | ||
| 850 | + action = ignore; | ||
| 851 | + ready = true; | ||
| 852 | + } else if (*p == '{') { | ||
| 853 | + lex_state = ls_begin_dict; | ||
| 854 | + action = ignore; | ||
| 855 | + ready = true; | ||
| 856 | + } else if (*p == '}') { | ||
| 857 | + lex_state = ls_end_dict; | ||
| 858 | + action = ignore; | ||
| 859 | + ready = true; | ||
| 860 | + } else if (*p == '[') { | ||
| 861 | + lex_state = ls_begin_array; | ||
| 862 | + action = ignore; | ||
| 863 | + ready = true; | ||
| 864 | + } else if (*p == ']') { | ||
| 865 | + lex_state = ls_end_array; | ||
| 866 | + action = ignore; | ||
| 867 | + ready = true; | ||
| 868 | + } else if ((*p >= 'a') && (*p <= 'z')) { | ||
| 869 | + lex_state = ls_alpha; | ||
| 870 | + } else if (*p == '-') { | ||
| 871 | + lex_state = ls_number_minus; | ||
| 872 | + } else if ((*p >= '1') && (*p <= '9')) { | ||
| 873 | + lex_state = ls_number_before_point; | ||
| 874 | + } else if (*p == '0') { | ||
| 875 | + lex_state = ls_number_leading_zero; | ||
| 876 | + } else { | ||
| 877 | + QTC::TC("libtests", "JSON parse bad character"); | ||
| 878 | + throw std::runtime_error( | ||
| 879 | + "JSON: offset " + std::to_string(offset) + | ||
| 880 | + ": unexpected character " + std::string(p, 1)); | ||
| 881 | + } | ||
| 882 | + break; | ||
| 950 | 883 | ||
| 951 | - case ls_number_e: | ||
| 952 | - if ((*p >= '0') && (*p <= '9')) { | ||
| 953 | - lex_state = ls_number; | ||
| 954 | - } else if ((*p == '+') || (*p == '-')) { | ||
| 955 | - lex_state = ls_number_e_sign; | ||
| 956 | - } else { | ||
| 957 | - tokenError(); | ||
| 958 | - } | ||
| 959 | - break; | 884 | + case ls_number_minus: |
| 885 | + if ((*p >= '1') && (*p <= '9')) { | ||
| 886 | + lex_state = ls_number_before_point; | ||
| 887 | + } else if (*p == '0') { | ||
| 888 | + lex_state = ls_number_leading_zero; | ||
| 889 | + } else { | ||
| 890 | + QTC::TC("libtests", "JSON parse number minus no digits"); | ||
| 891 | + throw std::runtime_error( | ||
| 892 | + "JSON: offset " + std::to_string(offset) + | ||
| 893 | + ": numeric literal: no digit after minus sign"); | ||
| 894 | + } | ||
| 895 | + break; | ||
| 960 | 896 | ||
| 961 | - case ls_number_e_sign: | ||
| 962 | - if ((*p >= '0') && (*p <= '9')) { | ||
| 963 | - lex_state = ls_number; | ||
| 964 | - } else { | ||
| 965 | - tokenError(); | ||
| 966 | - } | ||
| 967 | - break; | 897 | + case ls_number_leading_zero: |
| 898 | + if (*p == '.') { | ||
| 899 | + lex_state = ls_number_point; | ||
| 900 | + } else if (*p == ' ') { | ||
| 901 | + lex_state = ls_number; | ||
| 902 | + action = ignore; | ||
| 903 | + ready = true; | ||
| 904 | + } else if (strchr("{}[]:,", *p)) { | ||
| 905 | + lex_state = ls_number; | ||
| 906 | + action = reread; | ||
| 907 | + ready = true; | ||
| 908 | + } else if (*p == 'e' || *p == 'E') { | ||
| 909 | + lex_state = ls_number_e; | ||
| 910 | + } else { | ||
| 911 | + QTC::TC("libtests", "JSON parse leading zero"); | ||
| 912 | + throw std::runtime_error( | ||
| 913 | + "JSON: offset " + std::to_string(offset) + | ||
| 914 | + ": number with leading zero"); | ||
| 915 | + } | ||
| 916 | + break; | ||
| 968 | 917 | ||
| 969 | - case ls_number: | ||
| 970 | - // We only get here after we have seen an exponent. | ||
| 971 | - if ((*p >= '0') && (*p <= '9')) { | ||
| 972 | - // continue | ||
| 973 | - } else if (QUtil::is_space(*p)) { | ||
| 974 | - action = ignore; | ||
| 975 | - ready = true; | ||
| 976 | - } else if (strchr("{}[]:,", *p)) { | ||
| 977 | - action = reread; | ||
| 978 | - ready = true; | ||
| 979 | - } else { | ||
| 980 | - tokenError(); | ||
| 981 | - } | ||
| 982 | - break; | 918 | + case ls_number_before_point: |
| 919 | + if ((*p >= '0') && (*p <= '9')) { | ||
| 920 | + // continue | ||
| 921 | + } else if (*p == '.') { | ||
| 922 | + lex_state = ls_number_point; | ||
| 923 | + } else if (*p == ' ') { | ||
| 924 | + lex_state = ls_number; | ||
| 925 | + action = ignore; | ||
| 926 | + ready = true; | ||
| 927 | + } else if (strchr("{}[]:,", *p)) { | ||
| 928 | + lex_state = ls_number; | ||
| 929 | + action = reread; | ||
| 930 | + ready = true; | ||
| 931 | + } else if (*p == 'e' || *p == 'E') { | ||
| 932 | + lex_state = ls_number_e; | ||
| 933 | + } else { | ||
| 934 | + tokenError(); | ||
| 935 | + } | ||
| 936 | + break; | ||
| 983 | 937 | ||
| 984 | - case ls_alpha: | ||
| 985 | - if ((*p >= 'a') && (*p <= 'z')) { | ||
| 986 | - // okay | ||
| 987 | - } else if (QUtil::is_space(*p)) { | ||
| 988 | - action = ignore; | ||
| 989 | - ready = true; | ||
| 990 | - } else if (strchr("{}[]:,", *p)) { | ||
| 991 | - action = reread; | ||
| 992 | - ready = true; | ||
| 993 | - } else { | ||
| 994 | - tokenError(); | ||
| 995 | - } | ||
| 996 | - break; | 938 | + case ls_number_point: |
| 939 | + if ((*p >= '0') && (*p <= '9')) { | ||
| 940 | + lex_state = ls_number_after_point; | ||
| 941 | + } else { | ||
| 942 | + tokenError(); | ||
| 943 | + } | ||
| 944 | + break; | ||
| 997 | 945 | ||
| 998 | - case ls_string: | ||
| 999 | - if (*p == '"') { | ||
| 1000 | - if (high_offset) { | ||
| 1001 | - QTC::TC("libtests", "JSON 16 dangling high"); | ||
| 1002 | - throw std::runtime_error( | ||
| 1003 | - "JSON: offset " + std::to_string(high_offset) + | ||
| 1004 | - ": UTF-16 high surrogate not followed by low " | ||
| 1005 | - "surrogate"); | 946 | + case ls_number_after_point: |
| 947 | + if ((*p >= '0') && (*p <= '9')) { | ||
| 948 | + // continue | ||
| 949 | + } else if (*p == ' ') { | ||
| 950 | + lex_state = ls_number; | ||
| 951 | + action = ignore; | ||
| 952 | + ready = true; | ||
| 953 | + } else if (strchr("{}[]:,", *p)) { | ||
| 954 | + lex_state = ls_number; | ||
| 955 | + action = reread; | ||
| 956 | + ready = true; | ||
| 957 | + } else if (*p == 'e' || *p == 'E') { | ||
| 958 | + lex_state = ls_number_e; | ||
| 959 | + } else { | ||
| 960 | + tokenError(); | ||
| 1006 | } | 961 | } |
| 1007 | - action = ignore; | ||
| 1008 | - ready = true; | ||
| 1009 | - } else if (*p == '\\') { | ||
| 1010 | - lex_state = ls_backslash; | ||
| 1011 | - action = ignore; | ||
| 1012 | - } | ||
| 1013 | - break; | 962 | + break; |
| 1014 | 963 | ||
| 1015 | - case ls_backslash: | ||
| 1016 | - action = ignore; | ||
| 1017 | - lex_state = ls_string; | ||
| 1018 | - switch (*p) { | ||
| 1019 | - case '\\': | ||
| 1020 | - case '\"': | ||
| 1021 | - case '/': | ||
| 1022 | - // \/ is allowed in json input, but so is /, so we | ||
| 1023 | - // don't map / to \/ in output. | ||
| 1024 | - token += *p; | 964 | + case ls_number_e: |
| 965 | + if ((*p >= '0') && (*p <= '9')) { | ||
| 966 | + lex_state = ls_number; | ||
| 967 | + } else if ((*p == '+') || (*p == '-')) { | ||
| 968 | + lex_state = ls_number_e_sign; | ||
| 969 | + } else { | ||
| 970 | + tokenError(); | ||
| 971 | + } | ||
| 1025 | break; | 972 | break; |
| 1026 | - case 'b': | ||
| 1027 | - token += '\b'; | 973 | + |
| 974 | + case ls_number_e_sign: | ||
| 975 | + if ((*p >= '0') && (*p <= '9')) { | ||
| 976 | + lex_state = ls_number; | ||
| 977 | + } else { | ||
| 978 | + tokenError(); | ||
| 979 | + } | ||
| 1028 | break; | 980 | break; |
| 1029 | - case 'f': | ||
| 1030 | - token += '\f'; | 981 | + |
| 982 | + case ls_number: | ||
| 983 | + // We only get here after we have seen an exponent. | ||
| 984 | + if ((*p >= '0') && (*p <= '9')) { | ||
| 985 | + // continue | ||
| 986 | + } else if (*p == ' ') { | ||
| 987 | + action = ignore; | ||
| 988 | + ready = true; | ||
| 989 | + } else if (strchr("{}[]:,", *p)) { | ||
| 990 | + action = reread; | ||
| 991 | + ready = true; | ||
| 992 | + } else { | ||
| 993 | + tokenError(); | ||
| 994 | + } | ||
| 1031 | break; | 995 | break; |
| 1032 | - case 'n': | ||
| 1033 | - token += '\n'; | 996 | + |
| 997 | + case ls_alpha: | ||
| 998 | + if ((*p >= 'a') && (*p <= 'z')) { | ||
| 999 | + // okay | ||
| 1000 | + } else if (*p == ' ') { | ||
| 1001 | + action = ignore; | ||
| 1002 | + ready = true; | ||
| 1003 | + } else if (strchr("{}[]:,", *p)) { | ||
| 1004 | + action = reread; | ||
| 1005 | + ready = true; | ||
| 1006 | + } else { | ||
| 1007 | + tokenError(); | ||
| 1008 | + } | ||
| 1034 | break; | 1009 | break; |
| 1035 | - case 'r': | ||
| 1036 | - token += '\r'; | 1010 | + |
| 1011 | + case ls_string: | ||
| 1012 | + if (*p == '"') { | ||
| 1013 | + if (high_offset) { | ||
| 1014 | + QTC::TC("libtests", "JSON 16 dangling high"); | ||
| 1015 | + throw std::runtime_error( | ||
| 1016 | + "JSON: offset " + std::to_string(high_offset) + | ||
| 1017 | + ": UTF-16 high surrogate not followed by low " | ||
| 1018 | + "surrogate"); | ||
| 1019 | + } | ||
| 1020 | + action = ignore; | ||
| 1021 | + ready = true; | ||
| 1022 | + } else if (*p == '\\') { | ||
| 1023 | + lex_state = ls_backslash; | ||
| 1024 | + action = ignore; | ||
| 1025 | + } | ||
| 1037 | break; | 1026 | break; |
| 1038 | - case 't': | ||
| 1039 | - token += '\t'; | 1027 | + |
| 1028 | + case ls_backslash: | ||
| 1029 | + action = ignore; | ||
| 1030 | + lex_state = ls_string; | ||
| 1031 | + switch (*p) { | ||
| 1032 | + case '\\': | ||
| 1033 | + case '\"': | ||
| 1034 | + case '/': | ||
| 1035 | + // \/ is allowed in json input, but so is /, so we | ||
| 1036 | + // don't map / to \/ in output. | ||
| 1037 | + token += *p; | ||
| 1038 | + break; | ||
| 1039 | + case 'b': | ||
| 1040 | + token += '\b'; | ||
| 1041 | + break; | ||
| 1042 | + case 'f': | ||
| 1043 | + token += '\f'; | ||
| 1044 | + break; | ||
| 1045 | + case 'n': | ||
| 1046 | + token += '\n'; | ||
| 1047 | + break; | ||
| 1048 | + case 'r': | ||
| 1049 | + token += '\r'; | ||
| 1050 | + break; | ||
| 1051 | + case 't': | ||
| 1052 | + token += '\t'; | ||
| 1053 | + break; | ||
| 1054 | + case 'u': | ||
| 1055 | + lex_state = ls_u4; | ||
| 1056 | + u_count = 0; | ||
| 1057 | + u_value = 0; | ||
| 1058 | + break; | ||
| 1059 | + default: | ||
| 1060 | + lex_state = ls_backslash; | ||
| 1061 | + tokenError(); | ||
| 1062 | + } | ||
| 1040 | break; | 1063 | break; |
| 1041 | - case 'u': | ||
| 1042 | - lex_state = ls_u4; | ||
| 1043 | - u_count = 0; | ||
| 1044 | - u_value = 0; | 1064 | + |
| 1065 | + case ls_u4: | ||
| 1066 | + using ui = unsigned int; | ||
| 1067 | + action = ignore; | ||
| 1068 | + if ('0' <= *p && *p <= '9') { | ||
| 1069 | + u_value = 16 * u_value + (ui(*p) - ui('0')); | ||
| 1070 | + } else if ('a' <= *p && *p <= 'f') { | ||
| 1071 | + u_value = 16 * u_value + (10 + ui(*p) - ui('a')); | ||
| 1072 | + } else if ('A' <= *p && *p <= 'F') { | ||
| 1073 | + u_value = 16 * u_value + (10 + ui(*p) - ui('A')); | ||
| 1074 | + } else { | ||
| 1075 | + tokenError(); | ||
| 1076 | + } | ||
| 1077 | + if (++u_count == 4) { | ||
| 1078 | + handle_u_code( | ||
| 1079 | + u_value, | ||
| 1080 | + offset - 5, | ||
| 1081 | + high_surrogate, | ||
| 1082 | + high_offset, | ||
| 1083 | + token); | ||
| 1084 | + lex_state = ls_string; | ||
| 1085 | + } | ||
| 1045 | break; | 1086 | break; |
| 1087 | + | ||
| 1046 | default: | 1088 | default: |
| 1047 | - lex_state = ls_backslash; | ||
| 1048 | - tokenError(); | 1089 | + throw std::logic_error( |
| 1090 | + "JSONParser::getToken : trying to handle delimiter state"); | ||
| 1049 | } | 1091 | } |
| 1050 | - break; | ||
| 1051 | - | ||
| 1052 | - case ls_u4: | ||
| 1053 | - using ui = unsigned int; | ||
| 1054 | - action = ignore; | ||
| 1055 | - if ('0' <= *p && *p <= '9') { | ||
| 1056 | - u_value = 16 * u_value + (ui(*p) - ui('0')); | ||
| 1057 | - } else if ('a' <= *p && *p <= 'f') { | ||
| 1058 | - u_value = 16 * u_value + (10 + ui(*p) - ui('a')); | ||
| 1059 | - } else if ('A' <= *p && *p <= 'F') { | ||
| 1060 | - u_value = 16 * u_value + (10 + ui(*p) - ui('A')); | ||
| 1061 | - } else { | ||
| 1062 | - tokenError(); | 1092 | + switch (action) { |
| 1093 | + case reread: | ||
| 1094 | + break; | ||
| 1095 | + case append: | ||
| 1096 | + token.append(1, *p); | ||
| 1097 | + // fall through | ||
| 1098 | + case ignore: | ||
| 1099 | + ++p; | ||
| 1100 | + ++offset; | ||
| 1101 | + break; | ||
| 1063 | } | 1102 | } |
| 1064 | - if (++u_count == 4) { | ||
| 1065 | - handle_u_code( | ||
| 1066 | - u_value, offset - 5, high_surrogate, high_offset, token); | ||
| 1067 | - lex_state = ls_string; | 1103 | + if (ready) { |
| 1104 | + return; | ||
| 1068 | } | 1105 | } |
| 1069 | - break; | ||
| 1070 | - | ||
| 1071 | - default: | ||
| 1072 | - throw std::logic_error( | ||
| 1073 | - "JSONParser::getToken : trying to handle delimiter state"); | ||
| 1074 | - } | ||
| 1075 | - switch (action) { | ||
| 1076 | - case reread: | ||
| 1077 | - break; | ||
| 1078 | - case append: | ||
| 1079 | - token.append(1, *p); | ||
| 1080 | - // fall through | ||
| 1081 | - case ignore: | ||
| 1082 | - ++p; | ||
| 1083 | - ++offset; | ||
| 1084 | - break; | ||
| 1085 | - } | ||
| 1086 | - if (ready) { | ||
| 1087 | - break; | ||
| 1088 | } | 1106 | } |
| 1089 | } | 1107 | } |
| 1090 | - if (done) { | ||
| 1091 | - if (!token.empty() && !ready) { | ||
| 1092 | - switch (lex_state) { | ||
| 1093 | - case ls_top: | ||
| 1094 | - // Can't happen | ||
| 1095 | - throw std::logic_error("tok_start set in ls_top while parsing"); | ||
| 1096 | - break; | ||
| 1097 | 1108 | ||
| 1098 | - case ls_number_leading_zero: | ||
| 1099 | - case ls_number_before_point: | ||
| 1100 | - case ls_number_after_point: | ||
| 1101 | - lex_state = ls_number; | ||
| 1102 | - break; | 1109 | + // We only get here if on end of input or if the last character was a |
| 1110 | + // control character. | ||
| 1103 | 1111 | ||
| 1104 | - case ls_number: | ||
| 1105 | - case ls_alpha: | ||
| 1106 | - // terminal state | ||
| 1107 | - break; | 1112 | + if (!token.empty()) { |
| 1113 | + switch (lex_state) { | ||
| 1114 | + case ls_top: | ||
| 1115 | + // Can't happen | ||
| 1116 | + throw std::logic_error("tok_start set in ls_top while parsing"); | ||
| 1117 | + break; | ||
| 1108 | 1118 | ||
| 1109 | - default: | ||
| 1110 | - tokenError(); | ||
| 1111 | - } | 1119 | + case ls_number_leading_zero: |
| 1120 | + case ls_number_before_point: | ||
| 1121 | + case ls_number_after_point: | ||
| 1122 | + lex_state = ls_number; | ||
| 1123 | + break; | ||
| 1124 | + | ||
| 1125 | + case ls_number: | ||
| 1126 | + case ls_alpha: | ||
| 1127 | + // terminal state | ||
| 1128 | + break; | ||
| 1129 | + | ||
| 1130 | + default: | ||
| 1131 | + tokenError(); | ||
| 1112 | } | 1132 | } |
| 1113 | } | 1133 | } |
| 1114 | } | 1134 | } |
libtests/libtests.testcov
| @@ -79,6 +79,7 @@ JSON parse number minus no digits 0 | @@ -79,6 +79,7 @@ JSON parse number minus no digits 0 | ||
| 79 | JSON parse incomplete number 0 | 79 | JSON parse incomplete number 0 |
| 80 | JSON parse keyword bad character 0 | 80 | JSON parse keyword bad character 0 |
| 81 | JSON parse backslash bad character 0 | 81 | JSON parse backslash bad character 0 |
| 82 | +JSON parse control char in string 0 | ||
| 82 | JSON parse leading zero 0 | 83 | JSON parse leading zero 0 |
| 83 | JSON parse ls premature end of input 0 | 84 | JSON parse ls premature end of input 0 |
| 84 | JSON parse bad hex after u 0 | 85 | JSON parse bad hex after u 0 |
libtests/qtest/json_parse.test
| @@ -125,10 +125,10 @@ my @bad = ( | @@ -125,10 +125,10 @@ my @bad = ( | ||
| 125 | "e after minus", # 42 | 125 | "e after minus", # 42 |
| 126 | "missing digit after e", # 43 | 126 | "missing digit after e", # 43 |
| 127 | "missing digit after e+/-", # 44 | 127 | "missing digit after e+/-", # 44 |
| 128 | - # "tab char in string", # 45 | ||
| 129 | - # "cr char in string", # 46 | ||
| 130 | - # "lf char in string", # 47 | ||
| 131 | - # "bs char in string", # 48 | 128 | + "tab char in string", # 45 |
| 129 | + "cr char in string", # 46 | ||
| 130 | + "lf char in string", # 47 | ||
| 131 | + "bs char in string", # 48 | ||
| 132 | ); | 132 | ); |
| 133 | 133 | ||
| 134 | my $i = 0; | 134 | my $i = 0; |
libtests/qtest/json_parse/bad-01.out
libtests/qtest/json_parse/bad-02.out
libtests/qtest/json_parse/bad-03.out
libtests/qtest/json_parse/bad-27.out
libtests/qtest/json_parse/bad-31.json
libtests/qtest/json_parse/bad-45.out
libtests/qtest/json_parse/bad-46.out
libtests/qtest/json_parse/bad-47.out