Commit 39dfd305c8b29d0959c3ac1201f0406f55091e85
1 parent
1e0ab79a
In JSONParser add lexer states for delimiters
Showing
1 changed file
with
50 additions
and
41 deletions
libqpdf/JSON.cc
| ... | ... | @@ -653,6 +653,12 @@ namespace |
| 653 | 653 | ls_string, |
| 654 | 654 | ls_backslash, |
| 655 | 655 | ls_u4, |
| 656 | + ls_begin_array, | |
| 657 | + ls_end_array, | |
| 658 | + ls_begin_dict, | |
| 659 | + ls_end_dict, | |
| 660 | + ls_colon, | |
| 661 | + ls_comma, | |
| 656 | 662 | }; |
| 657 | 663 | |
| 658 | 664 | InputSource& is; |
| ... | ... | @@ -861,6 +867,24 @@ JSONParser::getToken() |
| 861 | 867 | lex_state = ls_string; |
| 862 | 868 | } else if (QUtil::is_space(*p)) { |
| 863 | 869 | action = ignore; |
| 870 | + } else if (*p == ',') { | |
| 871 | + lex_state = ls_comma; | |
| 872 | + ready = true; | |
| 873 | + } else if (*p == ':') { | |
| 874 | + lex_state = ls_colon; | |
| 875 | + ready = true; | |
| 876 | + } else if (*p == '{') { | |
| 877 | + lex_state = ls_begin_dict; | |
| 878 | + ready = true; | |
| 879 | + } else if (*p == '}') { | |
| 880 | + lex_state = ls_end_dict; | |
| 881 | + ready = true; | |
| 882 | + } else if (*p == '[') { | |
| 883 | + lex_state = ls_begin_array; | |
| 884 | + ready = true; | |
| 885 | + } else if (*p == ']') { | |
| 886 | + lex_state = ls_end_array; | |
| 887 | + ready = true; | |
| 864 | 888 | } else if ((*p >= 'a') && (*p <= 'z')) { |
| 865 | 889 | lex_state = ls_alpha; |
| 866 | 890 | } else if (*p == '-') { |
| ... | ... | @@ -869,8 +893,6 @@ JSONParser::getToken() |
| 869 | 893 | lex_state = ls_number_before_point; |
| 870 | 894 | } else if (*p == '0') { |
| 871 | 895 | lex_state = ls_number_leading_zero; |
| 872 | - } else if (strchr("{}[]:,", *p)) { | |
| 873 | - ready = true; | |
| 874 | 896 | } else { |
| 875 | 897 | QTC::TC("libtests", "JSON parse bad character"); |
| 876 | 898 | throw std::runtime_error( |
| ... | ... | @@ -1044,6 +1066,10 @@ JSONParser::getToken() |
| 1044 | 1066 | lex_state = ls_string; |
| 1045 | 1067 | } |
| 1046 | 1068 | break; |
| 1069 | + | |
| 1070 | + default: | |
| 1071 | + throw std::logic_error( | |
| 1072 | + "JSONParser::getToken : trying to handle delimiter state"); | |
| 1047 | 1073 | } |
| 1048 | 1074 | switch (action) { |
| 1049 | 1075 | case reread: |
| ... | ... | @@ -1090,7 +1116,7 @@ JSONParser::getToken() |
| 1090 | 1116 | void |
| 1091 | 1117 | JSONParser::handleToken() |
| 1092 | 1118 | { |
| 1093 | - if (token.empty()) { | |
| 1119 | + if (lex_state == ls_top) { | |
| 1094 | 1120 | return; |
| 1095 | 1121 | } |
| 1096 | 1122 | |
| ... | ... | @@ -1110,31 +1136,25 @@ JSONParser::handleToken() |
| 1110 | 1136 | } |
| 1111 | 1137 | s_value = decode_string(token, offset - toO(token.length())); |
| 1112 | 1138 | } |
| 1113 | - // Based on the lexical state and value, figure out whether we are | |
| 1114 | - // looking at an item or a delimiter. It will always be exactly | |
| 1115 | - // one of those two or an error condition. | |
| 1116 | 1139 | |
| 1117 | 1140 | std::shared_ptr<JSON> item; |
| 1118 | - char delimiter = '\0'; | |
| 1119 | - // Already verified that token is not empty | |
| 1120 | - char first_char = token.at(0); | |
| 1141 | + | |
| 1121 | 1142 | switch (lex_state) { |
| 1122 | - case ls_top: | |
| 1123 | - switch (first_char) { | |
| 1124 | - case '{': | |
| 1125 | - item = std::make_shared<JSON>(JSON::makeDictionary()); | |
| 1126 | - item->setStart(offset - toO(token.length())); | |
| 1127 | - break; | |
| 1143 | + case ls_begin_dict: | |
| 1144 | + item = std::make_shared<JSON>(JSON::makeDictionary()); | |
| 1145 | + item->setStart(offset - toO(token.length())); | |
| 1146 | + break; | |
| 1128 | 1147 | |
| 1129 | - case '[': | |
| 1130 | - item = std::make_shared<JSON>(JSON::makeArray()); | |
| 1131 | - item->setStart(offset - toO(token.length())); | |
| 1132 | - break; | |
| 1148 | + case ls_begin_array: | |
| 1149 | + item = std::make_shared<JSON>(JSON::makeArray()); | |
| 1150 | + item->setStart(offset - toO(token.length())); | |
| 1151 | + break; | |
| 1133 | 1152 | |
| 1134 | - default: | |
| 1135 | - delimiter = first_char; | |
| 1136 | - break; | |
| 1137 | - } | |
| 1153 | + case ls_colon: | |
| 1154 | + case ls_comma: | |
| 1155 | + case ls_end_array: | |
| 1156 | + case ls_end_dict: | |
| 1157 | + // continue | |
| 1138 | 1158 | break; |
| 1139 | 1159 | |
| 1140 | 1160 | case ls_number: |
| ... | ... | @@ -1166,12 +1186,6 @@ JSONParser::handleToken() |
| 1166 | 1186 | break; |
| 1167 | 1187 | } |
| 1168 | 1188 | |
| 1169 | - if ((item == nullptr) == (delimiter == '\0')) { | |
| 1170 | - throw std::logic_error( | |
| 1171 | - "JSONParser::handleToken: logic error: exactly one of item" | |
| 1172 | - " or delimiter must be set"); | |
| 1173 | - } | |
| 1174 | - | |
| 1175 | 1189 | // See whether what we have is allowed at this point. |
| 1176 | 1190 | |
| 1177 | 1191 | if (item.get()) { |
| ... | ... | @@ -1217,7 +1231,7 @@ JSONParser::handleToken() |
| 1217 | 1231 | break; |
| 1218 | 1232 | // okay |
| 1219 | 1233 | } |
| 1220 | - } else if (delimiter == '}') { | |
| 1234 | + } else if (lex_state == ls_end_dict) { | |
| 1221 | 1235 | if (!((parser_state == ps_dict_begin) || |
| 1222 | 1236 | (parser_state == ps_dict_after_item))) |
| 1223 | 1237 | |
| ... | ... | @@ -1227,7 +1241,7 @@ JSONParser::handleToken() |
| 1227 | 1241 | "JSON: offset " + std::to_string(offset) + |
| 1228 | 1242 | ": unexpected dictionary end delimiter"); |
| 1229 | 1243 | } |
| 1230 | - } else if (delimiter == ']') { | |
| 1244 | + } else if (lex_state == ls_end_array) { | |
| 1231 | 1245 | if (!((parser_state == ps_array_begin) || |
| 1232 | 1246 | (parser_state == ps_array_after_item))) |
| 1233 | 1247 | |
| ... | ... | @@ -1237,14 +1251,14 @@ JSONParser::handleToken() |
| 1237 | 1251 | "JSON: offset " + std::to_string(offset) + |
| 1238 | 1252 | ": unexpected array end delimiter"); |
| 1239 | 1253 | } |
| 1240 | - } else if (delimiter == ':') { | |
| 1254 | + } else if (lex_state == ls_colon) { | |
| 1241 | 1255 | if (parser_state != ps_dict_after_key) { |
| 1242 | 1256 | QTC::TC("libtests", "JSON parse unexpected :"); |
| 1243 | 1257 | throw std::runtime_error( |
| 1244 | 1258 | "JSON: offset " + std::to_string(offset) + |
| 1245 | 1259 | ": unexpected colon"); |
| 1246 | 1260 | } |
| 1247 | - } else if (delimiter == ',') { | |
| 1261 | + } else if (lex_state == ls_comma) { | |
| 1248 | 1262 | if (!((parser_state == ps_dict_after_item) || |
| 1249 | 1263 | (parser_state == ps_array_after_item))) { |
| 1250 | 1264 | QTC::TC("libtests", "JSON parse unexpected ,"); |
| ... | ... | @@ -1252,17 +1266,15 @@ JSONParser::handleToken() |
| 1252 | 1266 | "JSON: offset " + std::to_string(offset) + |
| 1253 | 1267 | ": unexpected comma"); |
| 1254 | 1268 | } |
| 1255 | - } else if (delimiter != '\0') { | |
| 1256 | - throw std::logic_error("JSONParser::handleToken: bad delimiter"); | |
| 1257 | 1269 | } |
| 1258 | 1270 | |
| 1259 | 1271 | // Now we know we have a delimiter or item that is allowed. Do |
| 1260 | 1272 | // whatever we need to do with it. |
| 1261 | 1273 | |
| 1262 | 1274 | parser_state_e next_state = ps_top; |
| 1263 | - if (delimiter == ':') { | |
| 1275 | + if (lex_state == ls_colon) { | |
| 1264 | 1276 | next_state = ps_dict_after_colon; |
| 1265 | - } else if (delimiter == ',') { | |
| 1277 | + } else if (lex_state == ls_comma) { | |
| 1266 | 1278 | if (parser_state == ps_dict_after_item) { |
| 1267 | 1279 | next_state = ps_dict_after_comma; |
| 1268 | 1280 | } else if (parser_state == ps_array_after_item) { |
| ... | ... | @@ -1271,7 +1283,7 @@ JSONParser::handleToken() |
| 1271 | 1283 | throw std::logic_error("JSONParser::handleToken: unexpected parser" |
| 1272 | 1284 | " state for comma"); |
| 1273 | 1285 | } |
| 1274 | - } else if ((delimiter == '}') || (delimiter == ']')) { | |
| 1286 | + } else if ((lex_state == ls_end_array) || (lex_state == ls_end_dict)) { | |
| 1275 | 1287 | next_state = ps_stack.back(); |
| 1276 | 1288 | ps_stack.pop_back(); |
| 1277 | 1289 | auto tos = stack.back(); |
| ... | ... | @@ -1282,9 +1294,6 @@ JSONParser::handleToken() |
| 1282 | 1294 | if (next_state != ps_done) { |
| 1283 | 1295 | stack.pop_back(); |
| 1284 | 1296 | } |
| 1285 | - } else if (delimiter != '\0') { | |
| 1286 | - throw std::logic_error( | |
| 1287 | - "JSONParser::handleToken: unexpected delimiter in transition"); | |
| 1288 | 1297 | } else if (item.get()) { |
| 1289 | 1298 | if (!(item->isArray() || item->isDictionary())) { |
| 1290 | 1299 | item->setStart(offset - toO(token.length())); | ... | ... |