Commit d3152869b666a725d303e0667a69f973fc5a96ed

Authored by m-holger
1 parent ee32235f

In JSONParser::getToken handle structural and space chars early

libqpdf/JSON.cc
@@ -791,7 +791,7 @@ JSONParser::tokenError() @@ -791,7 +791,7 @@ JSONParser::tokenError()
791 void 791 void
792 JSONParser::getToken() 792 JSONParser::getToken()
793 { 793 {
794 - enum { append, ignore, reread } action = append; 794 + enum { append, ignore } action = append;
795 bool ready = false; 795 bool ready = false;
796 token.clear(); 796 token.clear();
797 797
@@ -820,13 +820,103 @@ JSONParser::getToken() @@ -820,13 +820,103 @@ JSONParser::getToken()
820 } else { 820 } else {
821 break; 821 break;
822 } 822 }
823 -  
824 } else { 823 } else {
825 QTC::TC("libtests", "JSON parse null character"); 824 QTC::TC("libtests", "JSON parse null character");
826 throw std::runtime_error( 825 throw std::runtime_error(
827 "JSON: control or null character at offset " + 826 "JSON: control or null character at offset " +
828 std::to_string(offset)); 827 std::to_string(offset));
829 } 828 }
  829 + } else if (*p == ',') {
  830 + if (lex_state == ls_top) {
  831 + ++p;
  832 + ++offset;
  833 + lex_state = ls_comma;
  834 + return;
  835 + } else if (lex_state == ls_string) {
  836 + token += *p;
  837 + ++p;
  838 + ++offset;
  839 + } else {
  840 + break;
  841 + }
  842 + } else if (*p == ':') {
  843 + if (lex_state == ls_top) {
  844 + ++p;
  845 + ++offset;
  846 + lex_state = ls_colon;
  847 + return;
  848 + } else if (lex_state == ls_string) {
  849 + token += *p;
  850 + ++p;
  851 + ++offset;
  852 + } else {
  853 + break;
  854 + }
  855 + } else if (*p == ' ') {
  856 + if (lex_state == ls_top) {
  857 + ++p;
  858 + ++offset;
  859 + } else if (lex_state == ls_string) {
  860 + token += *p;
  861 + ++p;
  862 + ++offset;
  863 + } else {
  864 + break;
  865 + }
  866 + } else if (*p == '{') {
  867 + if (lex_state == ls_top) {
  868 + token_start = offset;
  869 + ++p;
  870 + ++offset;
  871 + lex_state = ls_begin_dict;
  872 + return;
  873 + } else if (lex_state == ls_string) {
  874 + token += *p;
  875 + ++p;
  876 + ++offset;
  877 + } else {
  878 + break;
  879 + }
  880 + } else if (*p == '}') {
  881 + if (lex_state == ls_top) {
  882 + ++p;
  883 + ++offset;
  884 + lex_state = ls_end_dict;
  885 + return;
  886 + } else if (lex_state == ls_string) {
  887 + token += *p;
  888 + ++p;
  889 + ++offset;
  890 + } else {
  891 + break;
  892 + }
  893 + } else if (*p == '[') {
  894 + if (lex_state == ls_top) {
  895 + token_start = offset;
  896 + ++p;
  897 + ++offset;
  898 + lex_state = ls_begin_array;
  899 + return;
  900 + } else if (lex_state == ls_string) {
  901 + token += *p;
  902 + ++p;
  903 + ++offset;
  904 + } else {
  905 + break;
  906 + }
  907 + } else if (*p == ']') {
  908 + if (lex_state == ls_top) {
  909 + ++p;
  910 + ++offset;
  911 + lex_state = ls_end_array;
  912 + return;
  913 + } else if (lex_state == ls_string) {
  914 + token += *p;
  915 + ++p;
  916 + ++offset;
  917 + } else {
  918 + break;
  919 + }
830 } else { 920 } else {
831 action = append; 921 action = append;
832 switch (lex_state) { 922 switch (lex_state) {
@@ -835,36 +925,6 @@ JSONParser::getToken() @@ -835,36 +925,6 @@ JSONParser::getToken()
835 if (*p == '"') { 925 if (*p == '"') {
836 lex_state = ls_string; 926 lex_state = ls_string;
837 action = ignore; 927 action = ignore;
838 - } else if (*p == ' ') {  
839 - action = ignore;  
840 - } else if (*p == ',') {  
841 - lex_state = ls_comma;  
842 - action = ignore;  
843 - ready = true;  
844 - } else if (*p == ',') {  
845 - lex_state = ls_comma;  
846 - action = ignore;  
847 - ready = true;  
848 - } else if (*p == ':') {  
849 - lex_state = ls_colon;  
850 - action = ignore;  
851 - ready = true;  
852 - } else if (*p == '{') {  
853 - lex_state = ls_begin_dict;  
854 - action = ignore;  
855 - ready = true;  
856 - } else if (*p == '}') {  
857 - lex_state = ls_end_dict;  
858 - action = ignore;  
859 - ready = true;  
860 - } else if (*p == '[') {  
861 - lex_state = ls_begin_array;  
862 - action = ignore;  
863 - ready = true;  
864 - } else if (*p == ']') {  
865 - lex_state = ls_end_array;  
866 - action = ignore;  
867 - ready = true;  
868 } else if ((*p >= 'a') && (*p <= 'z')) { 928 } else if ((*p >= 'a') && (*p <= 'z')) {
869 lex_state = ls_alpha; 929 lex_state = ls_alpha;
870 } else if (*p == '-') { 930 } else if (*p == '-') {
@@ -897,14 +957,6 @@ JSONParser::getToken() @@ -897,14 +957,6 @@ JSONParser::getToken()
897 case ls_number_leading_zero: 957 case ls_number_leading_zero:
898 if (*p == '.') { 958 if (*p == '.') {
899 lex_state = ls_number_point; 959 lex_state = ls_number_point;
900 - } else if (*p == ' ') {  
901 - lex_state = ls_number;  
902 - action = ignore;  
903 - ready = true;  
904 - } else if (strchr("{}[]:,", *p)) {  
905 - lex_state = ls_number;  
906 - action = reread;  
907 - ready = true;  
908 } else if (*p == 'e' || *p == 'E') { 960 } else if (*p == 'e' || *p == 'E') {
909 lex_state = ls_number_e; 961 lex_state = ls_number_e;
910 } else { 962 } else {
@@ -920,14 +972,6 @@ JSONParser::getToken() @@ -920,14 +972,6 @@ JSONParser::getToken()
920 // continue 972 // continue
921 } else if (*p == '.') { 973 } else if (*p == '.') {
922 lex_state = ls_number_point; 974 lex_state = ls_number_point;
923 - } else if (*p == ' ') {  
924 - lex_state = ls_number;  
925 - action = ignore;  
926 - ready = true;  
927 - } else if (strchr("{}[]:,", *p)) {  
928 - lex_state = ls_number;  
929 - action = reread;  
930 - ready = true;  
931 } else if (*p == 'e' || *p == 'E') { 975 } else if (*p == 'e' || *p == 'E') {
932 lex_state = ls_number_e; 976 lex_state = ls_number_e;
933 } else { 977 } else {
@@ -946,14 +990,6 @@ JSONParser::getToken() @@ -946,14 +990,6 @@ JSONParser::getToken()
946 case ls_number_after_point: 990 case ls_number_after_point:
947 if ((*p >= '0') && (*p <= '9')) { 991 if ((*p >= '0') && (*p <= '9')) {
948 // continue 992 // continue
949 - } else if (*p == ' ') {  
950 - lex_state = ls_number;  
951 - action = ignore;  
952 - ready = true;  
953 - } else if (strchr("{}[]:,", *p)) {  
954 - lex_state = ls_number;  
955 - action = reread;  
956 - ready = true;  
957 } else if (*p == 'e' || *p == 'E') { 993 } else if (*p == 'e' || *p == 'E') {
958 lex_state = ls_number_e; 994 lex_state = ls_number_e;
959 } else { 995 } else {
@@ -983,12 +1019,6 @@ JSONParser::getToken() @@ -983,12 +1019,6 @@ JSONParser::getToken()
983 // We only get here after we have seen an exponent. 1019 // We only get here after we have seen an exponent.
984 if ((*p >= '0') && (*p <= '9')) { 1020 if ((*p >= '0') && (*p <= '9')) {
985 // continue 1021 // continue
986 - } else if (*p == ' ') {  
987 - action = ignore;  
988 - ready = true;  
989 - } else if (strchr("{}[]:,", *p)) {  
990 - action = reread;  
991 - ready = true;  
992 } else { 1022 } else {
993 tokenError(); 1023 tokenError();
994 } 1024 }
@@ -997,12 +1027,6 @@ JSONParser::getToken() @@ -997,12 +1027,6 @@ JSONParser::getToken()
997 case ls_alpha: 1027 case ls_alpha:
998 if ((*p >= 'a') && (*p <= 'z')) { 1028 if ((*p >= 'a') && (*p <= 'z')) {
999 // okay 1029 // okay
1000 - } else if (*p == ' ') {  
1001 - action = ignore;  
1002 - ready = true;  
1003 - } else if (strchr("{}[]:,", *p)) {  
1004 - action = reread;  
1005 - ready = true;  
1006 } else { 1030 } else {
1007 tokenError(); 1031 tokenError();
1008 } 1032 }
@@ -1090,8 +1114,6 @@ JSONParser::getToken() @@ -1090,8 +1114,6 @@ JSONParser::getToken()
1090 "JSONParser::getToken : trying to handle delimiter state"); 1114 "JSONParser::getToken : trying to handle delimiter state");
1091 } 1115 }
1092 switch (action) { 1116 switch (action) {
1093 - case reread:  
1094 - break;  
1095 case append: 1117 case append:
1096 token.append(1, *p); 1118 token.append(1, *p);
1097 // fall through 1119 // fall through
@@ -1107,7 +1129,7 @@ JSONParser::getToken() @@ -1107,7 +1129,7 @@ JSONParser::getToken()
1107 } 1129 }
1108 1130
1109 // We only get here if on end of input or if the last character was a 1131 // We only get here if on end of input or if the last character was a
1110 - // control character. 1132 + // control character or other delimiter.
1111 1133
1112 if (!token.empty()) { 1134 if (!token.empty()) {
1113 switch (lex_state) { 1135 switch (lex_state) {
libtests/qtest/json_parse/bad-09.out
1 -exception: bad-09.json: JSON: offset 3: expect string as dictionary key 1 +exception: bad-09.json: JSON: offset 2: expect string as dictionary key
libtests/qtest/json_parse/bad-31.out
1 -exception: bad-31.json: JSON: offset 1: numeric literal: no digit after minus sign 1 +exception: bad-31.json: JSON: offset 1: numeric literal: incomplete number