Commit 39dfd305c8b29d0959c3ac1201f0406f55091e85

Authored by m-holger
1 parent 1e0ab79a

In JSONParser add lexer states for delimiters

Showing 1 changed file with 50 additions and 41 deletions
libqpdf/JSON.cc
... ... @@ -653,6 +653,12 @@ namespace
653 653 ls_string,
654 654 ls_backslash,
655 655 ls_u4,
  656 + ls_begin_array,
  657 + ls_end_array,
  658 + ls_begin_dict,
  659 + ls_end_dict,
  660 + ls_colon,
  661 + ls_comma,
656 662 };
657 663  
658 664 InputSource& is;
... ... @@ -861,6 +867,24 @@ JSONParser::getToken()
861 867 lex_state = ls_string;
862 868 } else if (QUtil::is_space(*p)) {
863 869 action = ignore;
  870 + } else if (*p == ',') {
  871 + lex_state = ls_comma;
  872 + ready = true;
  873 + } else if (*p == ':') {
  874 + lex_state = ls_colon;
  875 + ready = true;
  876 + } else if (*p == '{') {
  877 + lex_state = ls_begin_dict;
  878 + ready = true;
  879 + } else if (*p == '}') {
  880 + lex_state = ls_end_dict;
  881 + ready = true;
  882 + } else if (*p == '[') {
  883 + lex_state = ls_begin_array;
  884 + ready = true;
  885 + } else if (*p == ']') {
  886 + lex_state = ls_end_array;
  887 + ready = true;
864 888 } else if ((*p >= 'a') && (*p <= 'z')) {
865 889 lex_state = ls_alpha;
866 890 } else if (*p == '-') {
... ... @@ -869,8 +893,6 @@ JSONParser::getToken()
869 893 lex_state = ls_number_before_point;
870 894 } else if (*p == '0') {
871 895 lex_state = ls_number_leading_zero;
872   - } else if (strchr("{}[]:,", *p)) {
873   - ready = true;
874 896 } else {
875 897 QTC::TC("libtests", "JSON parse bad character");
876 898 throw std::runtime_error(
... ... @@ -1044,6 +1066,10 @@ JSONParser::getToken()
1044 1066 lex_state = ls_string;
1045 1067 }
1046 1068 break;
  1069 +
  1070 + default:
  1071 + throw std::logic_error(
  1072 + "JSONParser::getToken : trying to handle delimiter state");
1047 1073 }
1048 1074 switch (action) {
1049 1075 case reread:
... ... @@ -1090,7 +1116,7 @@ JSONParser::getToken()
1090 1116 void
1091 1117 JSONParser::handleToken()
1092 1118 {
1093   - if (token.empty()) {
  1119 + if (lex_state == ls_top) {
1094 1120 return;
1095 1121 }
1096 1122  
... ... @@ -1110,31 +1136,25 @@ JSONParser::handleToken()
1110 1136 }
1111 1137 s_value = decode_string(token, offset - toO(token.length()));
1112 1138 }
1113   - // Based on the lexical state and value, figure out whether we are
1114   - // looking at an item or a delimiter. It will always be exactly
1115   - // one of those two or an error condition.
1116 1139  
1117 1140 std::shared_ptr<JSON> item;
1118   - char delimiter = '\0';
1119   - // Already verified that token is not empty
1120   - char first_char = token.at(0);
  1141 +
1121 1142 switch (lex_state) {
1122   - case ls_top:
1123   - switch (first_char) {
1124   - case '{':
1125   - item = std::make_shared<JSON>(JSON::makeDictionary());
1126   - item->setStart(offset - toO(token.length()));
1127   - break;
  1143 + case ls_begin_dict:
  1144 + item = std::make_shared<JSON>(JSON::makeDictionary());
  1145 + item->setStart(offset - toO(token.length()));
  1146 + break;
1128 1147  
1129   - case '[':
1130   - item = std::make_shared<JSON>(JSON::makeArray());
1131   - item->setStart(offset - toO(token.length()));
1132   - break;
  1148 + case ls_begin_array:
  1149 + item = std::make_shared<JSON>(JSON::makeArray());
  1150 + item->setStart(offset - toO(token.length()));
  1151 + break;
1133 1152  
1134   - default:
1135   - delimiter = first_char;
1136   - break;
1137   - }
  1153 + case ls_colon:
  1154 + case ls_comma:
  1155 + case ls_end_array:
  1156 + case ls_end_dict:
  1157 + // continue
1138 1158 break;
1139 1159  
1140 1160 case ls_number:
... ... @@ -1166,12 +1186,6 @@ JSONParser::handleToken()
1166 1186 break;
1167 1187 }
1168 1188  
1169   - if ((item == nullptr) == (delimiter == '\0')) {
1170   - throw std::logic_error(
1171   - "JSONParser::handleToken: logic error: exactly one of item"
1172   - " or delimiter must be set");
1173   - }
1174   -
1175 1189 // See whether what we have is allowed at this point.
1176 1190  
1177 1191 if (item.get()) {
... ... @@ -1217,7 +1231,7 @@ JSONParser::handleToken()
1217 1231 break;
1218 1232 // okay
1219 1233 }
1220   - } else if (delimiter == '}') {
  1234 + } else if (lex_state == ls_end_dict) {
1221 1235 if (!((parser_state == ps_dict_begin) ||
1222 1236 (parser_state == ps_dict_after_item)))
1223 1237  
... ... @@ -1227,7 +1241,7 @@ JSONParser::handleToken()
1227 1241 "JSON: offset " + std::to_string(offset) +
1228 1242 ": unexpected dictionary end delimiter");
1229 1243 }
1230   - } else if (delimiter == ']') {
  1244 + } else if (lex_state == ls_end_array) {
1231 1245 if (!((parser_state == ps_array_begin) ||
1232 1246 (parser_state == ps_array_after_item)))
1233 1247  
... ... @@ -1237,14 +1251,14 @@ JSONParser::handleToken()
1237 1251 "JSON: offset " + std::to_string(offset) +
1238 1252 ": unexpected array end delimiter");
1239 1253 }
1240   - } else if (delimiter == ':') {
  1254 + } else if (lex_state == ls_colon) {
1241 1255 if (parser_state != ps_dict_after_key) {
1242 1256 QTC::TC("libtests", "JSON parse unexpected :");
1243 1257 throw std::runtime_error(
1244 1258 "JSON: offset " + std::to_string(offset) +
1245 1259 ": unexpected colon");
1246 1260 }
1247   - } else if (delimiter == ',') {
  1261 + } else if (lex_state == ls_comma) {
1248 1262 if (!((parser_state == ps_dict_after_item) ||
1249 1263 (parser_state == ps_array_after_item))) {
1250 1264 QTC::TC("libtests", "JSON parse unexpected ,");
... ... @@ -1252,17 +1266,15 @@ JSONParser::handleToken()
1252 1266 "JSON: offset " + std::to_string(offset) +
1253 1267 ": unexpected comma");
1254 1268 }
1255   - } else if (delimiter != '\0') {
1256   - throw std::logic_error("JSONParser::handleToken: bad delimiter");
1257 1269 }
1258 1270  
1259 1271 // Now we know we have a delimiter or item that is allowed. Do
1260 1272 // whatever we need to do with it.
1261 1273  
1262 1274 parser_state_e next_state = ps_top;
1263   - if (delimiter == ':') {
  1275 + if (lex_state == ls_colon) {
1264 1276 next_state = ps_dict_after_colon;
1265   - } else if (delimiter == ',') {
  1277 + } else if (lex_state == ls_comma) {
1266 1278 if (parser_state == ps_dict_after_item) {
1267 1279 next_state = ps_dict_after_comma;
1268 1280 } else if (parser_state == ps_array_after_item) {
... ... @@ -1271,7 +1283,7 @@ JSONParser::handleToken()
1271 1283 throw std::logic_error("JSONParser::handleToken: unexpected parser"
1272 1284 " state for comma");
1273 1285 }
1274   - } else if ((delimiter == '}') || (delimiter == ']')) {
  1286 + } else if ((lex_state == ls_end_array) || (lex_state == ls_end_dict)) {
1275 1287 next_state = ps_stack.back();
1276 1288 ps_stack.pop_back();
1277 1289 auto tos = stack.back();
... ... @@ -1282,9 +1294,6 @@ JSONParser::handleToken()
1282 1294 if (next_state != ps_done) {
1283 1295 stack.pop_back();
1284 1296 }
1285   - } else if (delimiter != '\0') {
1286   - throw std::logic_error(
1287   - "JSONParser::handleToken: unexpected delimiter in transition");
1288 1297 } else if (item.get()) {
1289 1298 if (!(item->isArray() || item->isDictionary())) {
1290 1299 item->setStart(offset - toO(token.length()));
... ...