Commit d3152869b666a725d303e0667a69f973fc5a96ed
1 parent
ee32235f
In JSONParser::getToken handle structural and space chars early
Showing
3 changed files
with
95 additions
and
73 deletions
libqpdf/JSON.cc
| ... | ... | @@ -791,7 +791,7 @@ JSONParser::tokenError() |
| 791 | 791 | void |
| 792 | 792 | JSONParser::getToken() |
| 793 | 793 | { |
| 794 | - enum { append, ignore, reread } action = append; | |
| 794 | + enum { append, ignore } action = append; | |
| 795 | 795 | bool ready = false; |
| 796 | 796 | token.clear(); |
| 797 | 797 | |
| ... | ... | @@ -820,13 +820,103 @@ JSONParser::getToken() |
| 820 | 820 | } else { |
| 821 | 821 | break; |
| 822 | 822 | } |
| 823 | - | |
| 824 | 823 | } else { |
| 825 | 824 | QTC::TC("libtests", "JSON parse null character"); |
| 826 | 825 | throw std::runtime_error( |
| 827 | 826 | "JSON: control or null character at offset " + |
| 828 | 827 | std::to_string(offset)); |
| 829 | 828 | } |
| 829 | + } else if (*p == ',') { | |
| 830 | + if (lex_state == ls_top) { | |
| 831 | + ++p; | |
| 832 | + ++offset; | |
| 833 | + lex_state = ls_comma; | |
| 834 | + return; | |
| 835 | + } else if (lex_state == ls_string) { | |
| 836 | + token += *p; | |
| 837 | + ++p; | |
| 838 | + ++offset; | |
| 839 | + } else { | |
| 840 | + break; | |
| 841 | + } | |
| 842 | + } else if (*p == ':') { | |
| 843 | + if (lex_state == ls_top) { | |
| 844 | + ++p; | |
| 845 | + ++offset; | |
| 846 | + lex_state = ls_colon; | |
| 847 | + return; | |
| 848 | + } else if (lex_state == ls_string) { | |
| 849 | + token += *p; | |
| 850 | + ++p; | |
| 851 | + ++offset; | |
| 852 | + } else { | |
| 853 | + break; | |
| 854 | + } | |
| 855 | + } else if (*p == ' ') { | |
| 856 | + if (lex_state == ls_top) { | |
| 857 | + ++p; | |
| 858 | + ++offset; | |
| 859 | + } else if (lex_state == ls_string) { | |
| 860 | + token += *p; | |
| 861 | + ++p; | |
| 862 | + ++offset; | |
| 863 | + } else { | |
| 864 | + break; | |
| 865 | + } | |
| 866 | + } else if (*p == '{') { | |
| 867 | + if (lex_state == ls_top) { | |
| 868 | + token_start = offset; | |
| 869 | + ++p; | |
| 870 | + ++offset; | |
| 871 | + lex_state = ls_begin_dict; | |
| 872 | + return; | |
| 873 | + } else if (lex_state == ls_string) { | |
| 874 | + token += *p; | |
| 875 | + ++p; | |
| 876 | + ++offset; | |
| 877 | + } else { | |
| 878 | + break; | |
| 879 | + } | |
| 880 | + } else if (*p == '}') { | |
| 881 | + if (lex_state == ls_top) { | |
| 882 | + ++p; | |
| 883 | + ++offset; | |
| 884 | + lex_state = ls_end_dict; | |
| 885 | + return; | |
| 886 | + } else if (lex_state == ls_string) { | |
| 887 | + token += *p; | |
| 888 | + ++p; | |
| 889 | + ++offset; | |
| 890 | + } else { | |
| 891 | + break; | |
| 892 | + } | |
| 893 | + } else if (*p == '[') { | |
| 894 | + if (lex_state == ls_top) { | |
| 895 | + token_start = offset; | |
| 896 | + ++p; | |
| 897 | + ++offset; | |
| 898 | + lex_state = ls_begin_array; | |
| 899 | + return; | |
| 900 | + } else if (lex_state == ls_string) { | |
| 901 | + token += *p; | |
| 902 | + ++p; | |
| 903 | + ++offset; | |
| 904 | + } else { | |
| 905 | + break; | |
| 906 | + } | |
| 907 | + } else if (*p == ']') { | |
| 908 | + if (lex_state == ls_top) { | |
| 909 | + ++p; | |
| 910 | + ++offset; | |
| 911 | + lex_state = ls_end_array; | |
| 912 | + return; | |
| 913 | + } else if (lex_state == ls_string) { | |
| 914 | + token += *p; | |
| 915 | + ++p; | |
| 916 | + ++offset; | |
| 917 | + } else { | |
| 918 | + break; | |
| 919 | + } | |
| 830 | 920 | } else { |
| 831 | 921 | action = append; |
| 832 | 922 | switch (lex_state) { |
| ... | ... | @@ -835,36 +925,6 @@ JSONParser::getToken() |
| 835 | 925 | if (*p == '"') { |
| 836 | 926 | lex_state = ls_string; |
| 837 | 927 | action = ignore; |
| 838 | - } else if (*p == ' ') { | |
| 839 | - action = ignore; | |
| 840 | - } else if (*p == ',') { | |
| 841 | - lex_state = ls_comma; | |
| 842 | - action = ignore; | |
| 843 | - ready = true; | |
| 844 | - } else if (*p == ',') { | |
| 845 | - lex_state = ls_comma; | |
| 846 | - action = ignore; | |
| 847 | - ready = true; | |
| 848 | - } else if (*p == ':') { | |
| 849 | - lex_state = ls_colon; | |
| 850 | - action = ignore; | |
| 851 | - ready = true; | |
| 852 | - } else if (*p == '{') { | |
| 853 | - lex_state = ls_begin_dict; | |
| 854 | - action = ignore; | |
| 855 | - ready = true; | |
| 856 | - } else if (*p == '}') { | |
| 857 | - lex_state = ls_end_dict; | |
| 858 | - action = ignore; | |
| 859 | - ready = true; | |
| 860 | - } else if (*p == '[') { | |
| 861 | - lex_state = ls_begin_array; | |
| 862 | - action = ignore; | |
| 863 | - ready = true; | |
| 864 | - } else if (*p == ']') { | |
| 865 | - lex_state = ls_end_array; | |
| 866 | - action = ignore; | |
| 867 | - ready = true; | |
| 868 | 928 | } else if ((*p >= 'a') && (*p <= 'z')) { |
| 869 | 929 | lex_state = ls_alpha; |
| 870 | 930 | } else if (*p == '-') { |
| ... | ... | @@ -897,14 +957,6 @@ JSONParser::getToken() |
| 897 | 957 | case ls_number_leading_zero: |
| 898 | 958 | if (*p == '.') { |
| 899 | 959 | lex_state = ls_number_point; |
| 900 | - } else if (*p == ' ') { | |
| 901 | - lex_state = ls_number; | |
| 902 | - action = ignore; | |
| 903 | - ready = true; | |
| 904 | - } else if (strchr("{}[]:,", *p)) { | |
| 905 | - lex_state = ls_number; | |
| 906 | - action = reread; | |
| 907 | - ready = true; | |
| 908 | 960 | } else if (*p == 'e' || *p == 'E') { |
| 909 | 961 | lex_state = ls_number_e; |
| 910 | 962 | } else { |
| ... | ... | @@ -920,14 +972,6 @@ JSONParser::getToken() |
| 920 | 972 | // continue |
| 921 | 973 | } else if (*p == '.') { |
| 922 | 974 | lex_state = ls_number_point; |
| 923 | - } else if (*p == ' ') { | |
| 924 | - lex_state = ls_number; | |
| 925 | - action = ignore; | |
| 926 | - ready = true; | |
| 927 | - } else if (strchr("{}[]:,", *p)) { | |
| 928 | - lex_state = ls_number; | |
| 929 | - action = reread; | |
| 930 | - ready = true; | |
| 931 | 975 | } else if (*p == 'e' || *p == 'E') { |
| 932 | 976 | lex_state = ls_number_e; |
| 933 | 977 | } else { |
| ... | ... | @@ -946,14 +990,6 @@ JSONParser::getToken() |
| 946 | 990 | case ls_number_after_point: |
| 947 | 991 | if ((*p >= '0') && (*p <= '9')) { |
| 948 | 992 | // continue |
| 949 | - } else if (*p == ' ') { | |
| 950 | - lex_state = ls_number; | |
| 951 | - action = ignore; | |
| 952 | - ready = true; | |
| 953 | - } else if (strchr("{}[]:,", *p)) { | |
| 954 | - lex_state = ls_number; | |
| 955 | - action = reread; | |
| 956 | - ready = true; | |
| 957 | 993 | } else if (*p == 'e' || *p == 'E') { |
| 958 | 994 | lex_state = ls_number_e; |
| 959 | 995 | } else { |
| ... | ... | @@ -983,12 +1019,6 @@ JSONParser::getToken() |
| 983 | 1019 | // We only get here after we have seen an exponent. |
| 984 | 1020 | if ((*p >= '0') && (*p <= '9')) { |
| 985 | 1021 | // continue |
| 986 | - } else if (*p == ' ') { | |
| 987 | - action = ignore; | |
| 988 | - ready = true; | |
| 989 | - } else if (strchr("{}[]:,", *p)) { | |
| 990 | - action = reread; | |
| 991 | - ready = true; | |
| 992 | 1022 | } else { |
| 993 | 1023 | tokenError(); |
| 994 | 1024 | } |
| ... | ... | @@ -997,12 +1027,6 @@ JSONParser::getToken() |
| 997 | 1027 | case ls_alpha: |
| 998 | 1028 | if ((*p >= 'a') && (*p <= 'z')) { |
| 999 | 1029 | // okay |
| 1000 | - } else if (*p == ' ') { | |
| 1001 | - action = ignore; | |
| 1002 | - ready = true; | |
| 1003 | - } else if (strchr("{}[]:,", *p)) { | |
| 1004 | - action = reread; | |
| 1005 | - ready = true; | |
| 1006 | 1030 | } else { |
| 1007 | 1031 | tokenError(); |
| 1008 | 1032 | } |
| ... | ... | @@ -1090,8 +1114,6 @@ JSONParser::getToken() |
| 1090 | 1114 | "JSONParser::getToken : trying to handle delimiter state"); |
| 1091 | 1115 | } |
| 1092 | 1116 | switch (action) { |
| 1093 | - case reread: | |
| 1094 | - break; | |
| 1095 | 1117 | case append: |
| 1096 | 1118 | token.append(1, *p); |
| 1097 | 1119 | // fall through |
| ... | ... | @@ -1107,7 +1129,7 @@ JSONParser::getToken() |
| 1107 | 1129 | } |
| 1108 | 1130 | |
| 1109 | 1131 | // We only get here if on end of input or if the last character was a |
| 1110 | - // control character. | |
| 1132 | + // control character or other delimiter. | |
| 1111 | 1133 | |
| 1112 | 1134 | if (!token.empty()) { |
| 1113 | 1135 | switch (lex_state) { | ... | ... |
libtests/qtest/json_parse/bad-09.out
libtests/qtest/json_parse/bad-31.out