Commit 39dfd305c8b29d0959c3ac1201f0406f55091e85
1 parent
1e0ab79a
In JSONParser add lexer states for delimiters
Showing
1 changed file
with
50 additions
and
41 deletions
libqpdf/JSON.cc
| @@ -653,6 +653,12 @@ namespace | @@ -653,6 +653,12 @@ namespace | ||
| 653 | ls_string, | 653 | ls_string, |
| 654 | ls_backslash, | 654 | ls_backslash, |
| 655 | ls_u4, | 655 | ls_u4, |
| 656 | + ls_begin_array, | ||
| 657 | + ls_end_array, | ||
| 658 | + ls_begin_dict, | ||
| 659 | + ls_end_dict, | ||
| 660 | + ls_colon, | ||
| 661 | + ls_comma, | ||
| 656 | }; | 662 | }; |
| 657 | 663 | ||
| 658 | InputSource& is; | 664 | InputSource& is; |
| @@ -861,6 +867,24 @@ JSONParser::getToken() | @@ -861,6 +867,24 @@ JSONParser::getToken() | ||
| 861 | lex_state = ls_string; | 867 | lex_state = ls_string; |
| 862 | } else if (QUtil::is_space(*p)) { | 868 | } else if (QUtil::is_space(*p)) { |
| 863 | action = ignore; | 869 | action = ignore; |
| 870 | + } else if (*p == ',') { | ||
| 871 | + lex_state = ls_comma; | ||
| 872 | + ready = true; | ||
| 873 | + } else if (*p == ':') { | ||
| 874 | + lex_state = ls_colon; | ||
| 875 | + ready = true; | ||
| 876 | + } else if (*p == '{') { | ||
| 877 | + lex_state = ls_begin_dict; | ||
| 878 | + ready = true; | ||
| 879 | + } else if (*p == '}') { | ||
| 880 | + lex_state = ls_end_dict; | ||
| 881 | + ready = true; | ||
| 882 | + } else if (*p == '[') { | ||
| 883 | + lex_state = ls_begin_array; | ||
| 884 | + ready = true; | ||
| 885 | + } else if (*p == ']') { | ||
| 886 | + lex_state = ls_end_array; | ||
| 887 | + ready = true; | ||
| 864 | } else if ((*p >= 'a') && (*p <= 'z')) { | 888 | } else if ((*p >= 'a') && (*p <= 'z')) { |
| 865 | lex_state = ls_alpha; | 889 | lex_state = ls_alpha; |
| 866 | } else if (*p == '-') { | 890 | } else if (*p == '-') { |
| @@ -869,8 +893,6 @@ JSONParser::getToken() | @@ -869,8 +893,6 @@ JSONParser::getToken() | ||
| 869 | lex_state = ls_number_before_point; | 893 | lex_state = ls_number_before_point; |
| 870 | } else if (*p == '0') { | 894 | } else if (*p == '0') { |
| 871 | lex_state = ls_number_leading_zero; | 895 | lex_state = ls_number_leading_zero; |
| 872 | - } else if (strchr("{}[]:,", *p)) { | ||
| 873 | - ready = true; | ||
| 874 | } else { | 896 | } else { |
| 875 | QTC::TC("libtests", "JSON parse bad character"); | 897 | QTC::TC("libtests", "JSON parse bad character"); |
| 876 | throw std::runtime_error( | 898 | throw std::runtime_error( |
| @@ -1044,6 +1066,10 @@ JSONParser::getToken() | @@ -1044,6 +1066,10 @@ JSONParser::getToken() | ||
| 1044 | lex_state = ls_string; | 1066 | lex_state = ls_string; |
| 1045 | } | 1067 | } |
| 1046 | break; | 1068 | break; |
| 1069 | + | ||
| 1070 | + default: | ||
| 1071 | + throw std::logic_error( | ||
| 1072 | + "JSONParser::getToken : trying to handle delimiter state"); | ||
| 1047 | } | 1073 | } |
| 1048 | switch (action) { | 1074 | switch (action) { |
| 1049 | case reread: | 1075 | case reread: |
| @@ -1090,7 +1116,7 @@ JSONParser::getToken() | @@ -1090,7 +1116,7 @@ JSONParser::getToken() | ||
| 1090 | void | 1116 | void |
| 1091 | JSONParser::handleToken() | 1117 | JSONParser::handleToken() |
| 1092 | { | 1118 | { |
| 1093 | - if (token.empty()) { | 1119 | + if (lex_state == ls_top) { |
| 1094 | return; | 1120 | return; |
| 1095 | } | 1121 | } |
| 1096 | 1122 | ||
| @@ -1110,31 +1136,25 @@ JSONParser::handleToken() | @@ -1110,31 +1136,25 @@ JSONParser::handleToken() | ||
| 1110 | } | 1136 | } |
| 1111 | s_value = decode_string(token, offset - toO(token.length())); | 1137 | s_value = decode_string(token, offset - toO(token.length())); |
| 1112 | } | 1138 | } |
| 1113 | - // Based on the lexical state and value, figure out whether we are | ||
| 1114 | - // looking at an item or a delimiter. It will always be exactly | ||
| 1115 | - // one of those two or an error condition. | ||
| 1116 | 1139 | ||
| 1117 | std::shared_ptr<JSON> item; | 1140 | std::shared_ptr<JSON> item; |
| 1118 | - char delimiter = '\0'; | ||
| 1119 | - // Already verified that token is not empty | ||
| 1120 | - char first_char = token.at(0); | 1141 | + |
| 1121 | switch (lex_state) { | 1142 | switch (lex_state) { |
| 1122 | - case ls_top: | ||
| 1123 | - switch (first_char) { | ||
| 1124 | - case '{': | ||
| 1125 | - item = std::make_shared<JSON>(JSON::makeDictionary()); | ||
| 1126 | - item->setStart(offset - toO(token.length())); | ||
| 1127 | - break; | 1143 | + case ls_begin_dict: |
| 1144 | + item = std::make_shared<JSON>(JSON::makeDictionary()); | ||
| 1145 | + item->setStart(offset - toO(token.length())); | ||
| 1146 | + break; | ||
| 1128 | 1147 | ||
| 1129 | - case '[': | ||
| 1130 | - item = std::make_shared<JSON>(JSON::makeArray()); | ||
| 1131 | - item->setStart(offset - toO(token.length())); | ||
| 1132 | - break; | 1148 | + case ls_begin_array: |
| 1149 | + item = std::make_shared<JSON>(JSON::makeArray()); | ||
| 1150 | + item->setStart(offset - toO(token.length())); | ||
| 1151 | + break; | ||
| 1133 | 1152 | ||
| 1134 | - default: | ||
| 1135 | - delimiter = first_char; | ||
| 1136 | - break; | ||
| 1137 | - } | 1153 | + case ls_colon: |
| 1154 | + case ls_comma: | ||
| 1155 | + case ls_end_array: | ||
| 1156 | + case ls_end_dict: | ||
| 1157 | + // continue | ||
| 1138 | break; | 1158 | break; |
| 1139 | 1159 | ||
| 1140 | case ls_number: | 1160 | case ls_number: |
| @@ -1166,12 +1186,6 @@ JSONParser::handleToken() | @@ -1166,12 +1186,6 @@ JSONParser::handleToken() | ||
| 1166 | break; | 1186 | break; |
| 1167 | } | 1187 | } |
| 1168 | 1188 | ||
| 1169 | - if ((item == nullptr) == (delimiter == '\0')) { | ||
| 1170 | - throw std::logic_error( | ||
| 1171 | - "JSONParser::handleToken: logic error: exactly one of item" | ||
| 1172 | - " or delimiter must be set"); | ||
| 1173 | - } | ||
| 1174 | - | ||
| 1175 | // See whether what we have is allowed at this point. | 1189 | // See whether what we have is allowed at this point. |
| 1176 | 1190 | ||
| 1177 | if (item.get()) { | 1191 | if (item.get()) { |
| @@ -1217,7 +1231,7 @@ JSONParser::handleToken() | @@ -1217,7 +1231,7 @@ JSONParser::handleToken() | ||
| 1217 | break; | 1231 | break; |
| 1218 | // okay | 1232 | // okay |
| 1219 | } | 1233 | } |
| 1220 | - } else if (delimiter == '}') { | 1234 | + } else if (lex_state == ls_end_dict) { |
| 1221 | if (!((parser_state == ps_dict_begin) || | 1235 | if (!((parser_state == ps_dict_begin) || |
| 1222 | (parser_state == ps_dict_after_item))) | 1236 | (parser_state == ps_dict_after_item))) |
| 1223 | 1237 | ||
| @@ -1227,7 +1241,7 @@ JSONParser::handleToken() | @@ -1227,7 +1241,7 @@ JSONParser::handleToken() | ||
| 1227 | "JSON: offset " + std::to_string(offset) + | 1241 | "JSON: offset " + std::to_string(offset) + |
| 1228 | ": unexpected dictionary end delimiter"); | 1242 | ": unexpected dictionary end delimiter"); |
| 1229 | } | 1243 | } |
| 1230 | - } else if (delimiter == ']') { | 1244 | + } else if (lex_state == ls_end_array) { |
| 1231 | if (!((parser_state == ps_array_begin) || | 1245 | if (!((parser_state == ps_array_begin) || |
| 1232 | (parser_state == ps_array_after_item))) | 1246 | (parser_state == ps_array_after_item))) |
| 1233 | 1247 | ||
| @@ -1237,14 +1251,14 @@ JSONParser::handleToken() | @@ -1237,14 +1251,14 @@ JSONParser::handleToken() | ||
| 1237 | "JSON: offset " + std::to_string(offset) + | 1251 | "JSON: offset " + std::to_string(offset) + |
| 1238 | ": unexpected array end delimiter"); | 1252 | ": unexpected array end delimiter"); |
| 1239 | } | 1253 | } |
| 1240 | - } else if (delimiter == ':') { | 1254 | + } else if (lex_state == ls_colon) { |
| 1241 | if (parser_state != ps_dict_after_key) { | 1255 | if (parser_state != ps_dict_after_key) { |
| 1242 | QTC::TC("libtests", "JSON parse unexpected :"); | 1256 | QTC::TC("libtests", "JSON parse unexpected :"); |
| 1243 | throw std::runtime_error( | 1257 | throw std::runtime_error( |
| 1244 | "JSON: offset " + std::to_string(offset) + | 1258 | "JSON: offset " + std::to_string(offset) + |
| 1245 | ": unexpected colon"); | 1259 | ": unexpected colon"); |
| 1246 | } | 1260 | } |
| 1247 | - } else if (delimiter == ',') { | 1261 | + } else if (lex_state == ls_comma) { |
| 1248 | if (!((parser_state == ps_dict_after_item) || | 1262 | if (!((parser_state == ps_dict_after_item) || |
| 1249 | (parser_state == ps_array_after_item))) { | 1263 | (parser_state == ps_array_after_item))) { |
| 1250 | QTC::TC("libtests", "JSON parse unexpected ,"); | 1264 | QTC::TC("libtests", "JSON parse unexpected ,"); |
| @@ -1252,17 +1266,15 @@ JSONParser::handleToken() | @@ -1252,17 +1266,15 @@ JSONParser::handleToken() | ||
| 1252 | "JSON: offset " + std::to_string(offset) + | 1266 | "JSON: offset " + std::to_string(offset) + |
| 1253 | ": unexpected comma"); | 1267 | ": unexpected comma"); |
| 1254 | } | 1268 | } |
| 1255 | - } else if (delimiter != '\0') { | ||
| 1256 | - throw std::logic_error("JSONParser::handleToken: bad delimiter"); | ||
| 1257 | } | 1269 | } |
| 1258 | 1270 | ||
| 1259 | // Now we know we have a delimiter or item that is allowed. Do | 1271 | // Now we know we have a delimiter or item that is allowed. Do |
| 1260 | // whatever we need to do with it. | 1272 | // whatever we need to do with it. |
| 1261 | 1273 | ||
| 1262 | parser_state_e next_state = ps_top; | 1274 | parser_state_e next_state = ps_top; |
| 1263 | - if (delimiter == ':') { | 1275 | + if (lex_state == ls_colon) { |
| 1264 | next_state = ps_dict_after_colon; | 1276 | next_state = ps_dict_after_colon; |
| 1265 | - } else if (delimiter == ',') { | 1277 | + } else if (lex_state == ls_comma) { |
| 1266 | if (parser_state == ps_dict_after_item) { | 1278 | if (parser_state == ps_dict_after_item) { |
| 1267 | next_state = ps_dict_after_comma; | 1279 | next_state = ps_dict_after_comma; |
| 1268 | } else if (parser_state == ps_array_after_item) { | 1280 | } else if (parser_state == ps_array_after_item) { |
| @@ -1271,7 +1283,7 @@ JSONParser::handleToken() | @@ -1271,7 +1283,7 @@ JSONParser::handleToken() | ||
| 1271 | throw std::logic_error("JSONParser::handleToken: unexpected parser" | 1283 | throw std::logic_error("JSONParser::handleToken: unexpected parser" |
| 1272 | " state for comma"); | 1284 | " state for comma"); |
| 1273 | } | 1285 | } |
| 1274 | - } else if ((delimiter == '}') || (delimiter == ']')) { | 1286 | + } else if ((lex_state == ls_end_array) || (lex_state == ls_end_dict)) { |
| 1275 | next_state = ps_stack.back(); | 1287 | next_state = ps_stack.back(); |
| 1276 | ps_stack.pop_back(); | 1288 | ps_stack.pop_back(); |
| 1277 | auto tos = stack.back(); | 1289 | auto tos = stack.back(); |
| @@ -1282,9 +1294,6 @@ JSONParser::handleToken() | @@ -1282,9 +1294,6 @@ JSONParser::handleToken() | ||
| 1282 | if (next_state != ps_done) { | 1294 | if (next_state != ps_done) { |
| 1283 | stack.pop_back(); | 1295 | stack.pop_back(); |
| 1284 | } | 1296 | } |
| 1285 | - } else if (delimiter != '\0') { | ||
| 1286 | - throw std::logic_error( | ||
| 1287 | - "JSONParser::handleToken: unexpected delimiter in transition"); | ||
| 1288 | } else if (item.get()) { | 1297 | } else if (item.get()) { |
| 1289 | if (!(item->isArray() || item->isDictionary())) { | 1298 | if (!(item->isArray() || item->isDictionary())) { |
| 1290 | item->setStart(offset - toO(token.length())); | 1299 | item->setStart(offset - toO(token.length())); |