Commit 39dfd305c8b29d0959c3ac1201f0406f55091e85

Authored by m-holger
1 parent 1e0ab79a

In JSONParser add lexer states for delimiters

Showing 1 changed file with 50 additions and 41 deletions
libqpdf/JSON.cc
@@ -653,6 +653,12 @@ namespace @@ -653,6 +653,12 @@ namespace
653 ls_string, 653 ls_string,
654 ls_backslash, 654 ls_backslash,
655 ls_u4, 655 ls_u4,
  656 + ls_begin_array,
  657 + ls_end_array,
  658 + ls_begin_dict,
  659 + ls_end_dict,
  660 + ls_colon,
  661 + ls_comma,
656 }; 662 };
657 663
658 InputSource& is; 664 InputSource& is;
@@ -861,6 +867,24 @@ JSONParser::getToken() @@ -861,6 +867,24 @@ JSONParser::getToken()
861 lex_state = ls_string; 867 lex_state = ls_string;
862 } else if (QUtil::is_space(*p)) { 868 } else if (QUtil::is_space(*p)) {
863 action = ignore; 869 action = ignore;
  870 + } else if (*p == ',') {
  871 + lex_state = ls_comma;
  872 + ready = true;
  873 + } else if (*p == ':') {
  874 + lex_state = ls_colon;
  875 + ready = true;
  876 + } else if (*p == '{') {
  877 + lex_state = ls_begin_dict;
  878 + ready = true;
  879 + } else if (*p == '}') {
  880 + lex_state = ls_end_dict;
  881 + ready = true;
  882 + } else if (*p == '[') {
  883 + lex_state = ls_begin_array;
  884 + ready = true;
  885 + } else if (*p == ']') {
  886 + lex_state = ls_end_array;
  887 + ready = true;
864 } else if ((*p >= 'a') && (*p <= 'z')) { 888 } else if ((*p >= 'a') && (*p <= 'z')) {
865 lex_state = ls_alpha; 889 lex_state = ls_alpha;
866 } else if (*p == '-') { 890 } else if (*p == '-') {
@@ -869,8 +893,6 @@ JSONParser::getToken() @@ -869,8 +893,6 @@ JSONParser::getToken()
869 lex_state = ls_number_before_point; 893 lex_state = ls_number_before_point;
870 } else if (*p == '0') { 894 } else if (*p == '0') {
871 lex_state = ls_number_leading_zero; 895 lex_state = ls_number_leading_zero;
872 - } else if (strchr("{}[]:,", *p)) {  
873 - ready = true;  
874 } else { 896 } else {
875 QTC::TC("libtests", "JSON parse bad character"); 897 QTC::TC("libtests", "JSON parse bad character");
876 throw std::runtime_error( 898 throw std::runtime_error(
@@ -1044,6 +1066,10 @@ JSONParser::getToken() @@ -1044,6 +1066,10 @@ JSONParser::getToken()
1044 lex_state = ls_string; 1066 lex_state = ls_string;
1045 } 1067 }
1046 break; 1068 break;
  1069 +
  1070 + default:
  1071 + throw std::logic_error(
  1072 + "JSONParser::getToken : trying to handle delimiter state");
1047 } 1073 }
1048 switch (action) { 1074 switch (action) {
1049 case reread: 1075 case reread:
@@ -1090,7 +1116,7 @@ JSONParser::getToken() @@ -1090,7 +1116,7 @@ JSONParser::getToken()
1090 void 1116 void
1091 JSONParser::handleToken() 1117 JSONParser::handleToken()
1092 { 1118 {
1093 - if (token.empty()) { 1119 + if (lex_state == ls_top) {
1094 return; 1120 return;
1095 } 1121 }
1096 1122
@@ -1110,31 +1136,25 @@ JSONParser::handleToken() @@ -1110,31 +1136,25 @@ JSONParser::handleToken()
1110 } 1136 }
1111 s_value = decode_string(token, offset - toO(token.length())); 1137 s_value = decode_string(token, offset - toO(token.length()));
1112 } 1138 }
1113 - // Based on the lexical state and value, figure out whether we are  
1114 - // looking at an item or a delimiter. It will always be exactly  
1115 - // one of those two or an error condition.  
1116 1139
1117 std::shared_ptr<JSON> item; 1140 std::shared_ptr<JSON> item;
1118 - char delimiter = '\0';  
1119 - // Already verified that token is not empty  
1120 - char first_char = token.at(0); 1141 +
1121 switch (lex_state) { 1142 switch (lex_state) {
1122 - case ls_top:  
1123 - switch (first_char) {  
1124 - case '{':  
1125 - item = std::make_shared<JSON>(JSON::makeDictionary());  
1126 - item->setStart(offset - toO(token.length()));  
1127 - break; 1143 + case ls_begin_dict:
  1144 + item = std::make_shared<JSON>(JSON::makeDictionary());
  1145 + item->setStart(offset - toO(token.length()));
  1146 + break;
1128 1147
1129 - case '[':  
1130 - item = std::make_shared<JSON>(JSON::makeArray());  
1131 - item->setStart(offset - toO(token.length()));  
1132 - break; 1148 + case ls_begin_array:
  1149 + item = std::make_shared<JSON>(JSON::makeArray());
  1150 + item->setStart(offset - toO(token.length()));
  1151 + break;
1133 1152
1134 - default:  
1135 - delimiter = first_char;  
1136 - break;  
1137 - } 1153 + case ls_colon:
  1154 + case ls_comma:
  1155 + case ls_end_array:
  1156 + case ls_end_dict:
  1157 + // continue
1138 break; 1158 break;
1139 1159
1140 case ls_number: 1160 case ls_number:
@@ -1166,12 +1186,6 @@ JSONParser::handleToken() @@ -1166,12 +1186,6 @@ JSONParser::handleToken()
1166 break; 1186 break;
1167 } 1187 }
1168 1188
1169 - if ((item == nullptr) == (delimiter == '\0')) {  
1170 - throw std::logic_error(  
1171 - "JSONParser::handleToken: logic error: exactly one of item"  
1172 - " or delimiter must be set");  
1173 - }  
1174 -  
1175 // See whether what we have is allowed at this point. 1189 // See whether what we have is allowed at this point.
1176 1190
1177 if (item.get()) { 1191 if (item.get()) {
@@ -1217,7 +1231,7 @@ JSONParser::handleToken() @@ -1217,7 +1231,7 @@ JSONParser::handleToken()
1217 break; 1231 break;
1218 // okay 1232 // okay
1219 } 1233 }
1220 - } else if (delimiter == '}') { 1234 + } else if (lex_state == ls_end_dict) {
1221 if (!((parser_state == ps_dict_begin) || 1235 if (!((parser_state == ps_dict_begin) ||
1222 (parser_state == ps_dict_after_item))) 1236 (parser_state == ps_dict_after_item)))
1223 1237
@@ -1227,7 +1241,7 @@ JSONParser::handleToken() @@ -1227,7 +1241,7 @@ JSONParser::handleToken()
1227 "JSON: offset " + std::to_string(offset) + 1241 "JSON: offset " + std::to_string(offset) +
1228 ": unexpected dictionary end delimiter"); 1242 ": unexpected dictionary end delimiter");
1229 } 1243 }
1230 - } else if (delimiter == ']') { 1244 + } else if (lex_state == ls_end_array) {
1231 if (!((parser_state == ps_array_begin) || 1245 if (!((parser_state == ps_array_begin) ||
1232 (parser_state == ps_array_after_item))) 1246 (parser_state == ps_array_after_item)))
1233 1247
@@ -1237,14 +1251,14 @@ JSONParser::handleToken() @@ -1237,14 +1251,14 @@ JSONParser::handleToken()
1237 "JSON: offset " + std::to_string(offset) + 1251 "JSON: offset " + std::to_string(offset) +
1238 ": unexpected array end delimiter"); 1252 ": unexpected array end delimiter");
1239 } 1253 }
1240 - } else if (delimiter == ':') { 1254 + } else if (lex_state == ls_colon) {
1241 if (parser_state != ps_dict_after_key) { 1255 if (parser_state != ps_dict_after_key) {
1242 QTC::TC("libtests", "JSON parse unexpected :"); 1256 QTC::TC("libtests", "JSON parse unexpected :");
1243 throw std::runtime_error( 1257 throw std::runtime_error(
1244 "JSON: offset " + std::to_string(offset) + 1258 "JSON: offset " + std::to_string(offset) +
1245 ": unexpected colon"); 1259 ": unexpected colon");
1246 } 1260 }
1247 - } else if (delimiter == ',') { 1261 + } else if (lex_state == ls_comma) {
1248 if (!((parser_state == ps_dict_after_item) || 1262 if (!((parser_state == ps_dict_after_item) ||
1249 (parser_state == ps_array_after_item))) { 1263 (parser_state == ps_array_after_item))) {
1250 QTC::TC("libtests", "JSON parse unexpected ,"); 1264 QTC::TC("libtests", "JSON parse unexpected ,");
@@ -1252,17 +1266,15 @@ JSONParser::handleToken() @@ -1252,17 +1266,15 @@ JSONParser::handleToken()
1252 "JSON: offset " + std::to_string(offset) + 1266 "JSON: offset " + std::to_string(offset) +
1253 ": unexpected comma"); 1267 ": unexpected comma");
1254 } 1268 }
1255 - } else if (delimiter != '\0') {  
1256 - throw std::logic_error("JSONParser::handleToken: bad delimiter");  
1257 } 1269 }
1258 1270
1259 // Now we know we have a delimiter or item that is allowed. Do 1271 // Now we know we have a delimiter or item that is allowed. Do
1260 // whatever we need to do with it. 1272 // whatever we need to do with it.
1261 1273
1262 parser_state_e next_state = ps_top; 1274 parser_state_e next_state = ps_top;
1263 - if (delimiter == ':') { 1275 + if (lex_state == ls_colon) {
1264 next_state = ps_dict_after_colon; 1276 next_state = ps_dict_after_colon;
1265 - } else if (delimiter == ',') { 1277 + } else if (lex_state == ls_comma) {
1266 if (parser_state == ps_dict_after_item) { 1278 if (parser_state == ps_dict_after_item) {
1267 next_state = ps_dict_after_comma; 1279 next_state = ps_dict_after_comma;
1268 } else if (parser_state == ps_array_after_item) { 1280 } else if (parser_state == ps_array_after_item) {
@@ -1271,7 +1283,7 @@ JSONParser::handleToken() @@ -1271,7 +1283,7 @@ JSONParser::handleToken()
1271 throw std::logic_error("JSONParser::handleToken: unexpected parser" 1283 throw std::logic_error("JSONParser::handleToken: unexpected parser"
1272 " state for comma"); 1284 " state for comma");
1273 } 1285 }
1274 - } else if ((delimiter == '}') || (delimiter == ']')) { 1286 + } else if ((lex_state == ls_end_array) || (lex_state == ls_end_dict)) {
1275 next_state = ps_stack.back(); 1287 next_state = ps_stack.back();
1276 ps_stack.pop_back(); 1288 ps_stack.pop_back();
1277 auto tos = stack.back(); 1289 auto tos = stack.back();
@@ -1282,9 +1294,6 @@ JSONParser::handleToken() @@ -1282,9 +1294,6 @@ JSONParser::handleToken()
1282 if (next_state != ps_done) { 1294 if (next_state != ps_done) {
1283 stack.pop_back(); 1295 stack.pop_back();
1284 } 1296 }
1285 - } else if (delimiter != '\0') {  
1286 - throw std::logic_error(  
1287 - "JSONParser::handleToken: unexpected delimiter in transition");  
1288 } else if (item.get()) { 1297 } else if (item.get()) {
1289 if (!(item->isArray() || item->isDictionary())) { 1298 if (!(item->isArray() || item->isDictionary())) {
1290 item->setStart(offset - toO(token.length())); 1299 item->setStart(offset - toO(token.length()));