Commit d3152869b666a725d303e0667a69f973fc5a96ed

Authored by m-holger
1 parent ee32235f

In JSONParser::getToken handle structural and space chars early

libqpdf/JSON.cc
... ... @@ -791,7 +791,7 @@ JSONParser::tokenError()
791 791 void
792 792 JSONParser::getToken()
793 793 {
794   - enum { append, ignore, reread } action = append;
  794 + enum { append, ignore } action = append;
795 795 bool ready = false;
796 796 token.clear();
797 797  
... ... @@ -820,13 +820,103 @@ JSONParser::getToken()
820 820 } else {
821 821 break;
822 822 }
823   -
824 823 } else {
825 824 QTC::TC("libtests", "JSON parse null character");
826 825 throw std::runtime_error(
827 826 "JSON: control or null character at offset " +
828 827 std::to_string(offset));
829 828 }
  829 + } else if (*p == ',') {
  830 + if (lex_state == ls_top) {
  831 + ++p;
  832 + ++offset;
  833 + lex_state = ls_comma;
  834 + return;
  835 + } else if (lex_state == ls_string) {
  836 + token += *p;
  837 + ++p;
  838 + ++offset;
  839 + } else {
  840 + break;
  841 + }
  842 + } else if (*p == ':') {
  843 + if (lex_state == ls_top) {
  844 + ++p;
  845 + ++offset;
  846 + lex_state = ls_colon;
  847 + return;
  848 + } else if (lex_state == ls_string) {
  849 + token += *p;
  850 + ++p;
  851 + ++offset;
  852 + } else {
  853 + break;
  854 + }
  855 + } else if (*p == ' ') {
  856 + if (lex_state == ls_top) {
  857 + ++p;
  858 + ++offset;
  859 + } else if (lex_state == ls_string) {
  860 + token += *p;
  861 + ++p;
  862 + ++offset;
  863 + } else {
  864 + break;
  865 + }
  866 + } else if (*p == '{') {
  867 + if (lex_state == ls_top) {
  868 + token_start = offset;
  869 + ++p;
  870 + ++offset;
  871 + lex_state = ls_begin_dict;
  872 + return;
  873 + } else if (lex_state == ls_string) {
  874 + token += *p;
  875 + ++p;
  876 + ++offset;
  877 + } else {
  878 + break;
  879 + }
  880 + } else if (*p == '}') {
  881 + if (lex_state == ls_top) {
  882 + ++p;
  883 + ++offset;
  884 + lex_state = ls_end_dict;
  885 + return;
  886 + } else if (lex_state == ls_string) {
  887 + token += *p;
  888 + ++p;
  889 + ++offset;
  890 + } else {
  891 + break;
  892 + }
  893 + } else if (*p == '[') {
  894 + if (lex_state == ls_top) {
  895 + token_start = offset;
  896 + ++p;
  897 + ++offset;
  898 + lex_state = ls_begin_array;
  899 + return;
  900 + } else if (lex_state == ls_string) {
  901 + token += *p;
  902 + ++p;
  903 + ++offset;
  904 + } else {
  905 + break;
  906 + }
  907 + } else if (*p == ']') {
  908 + if (lex_state == ls_top) {
  909 + ++p;
  910 + ++offset;
  911 + lex_state = ls_end_array;
  912 + return;
  913 + } else if (lex_state == ls_string) {
  914 + token += *p;
  915 + ++p;
  916 + ++offset;
  917 + } else {
  918 + break;
  919 + }
830 920 } else {
831 921 action = append;
832 922 switch (lex_state) {
... ... @@ -835,36 +925,6 @@ JSONParser::getToken()
835 925 if (*p == '"') {
836 926 lex_state = ls_string;
837 927 action = ignore;
838   - } else if (*p == ' ') {
839   - action = ignore;
840   - } else if (*p == ',') {
841   - lex_state = ls_comma;
842   - action = ignore;
843   - ready = true;
844   - } else if (*p == ',') {
845   - lex_state = ls_comma;
846   - action = ignore;
847   - ready = true;
848   - } else if (*p == ':') {
849   - lex_state = ls_colon;
850   - action = ignore;
851   - ready = true;
852   - } else if (*p == '{') {
853   - lex_state = ls_begin_dict;
854   - action = ignore;
855   - ready = true;
856   - } else if (*p == '}') {
857   - lex_state = ls_end_dict;
858   - action = ignore;
859   - ready = true;
860   - } else if (*p == '[') {
861   - lex_state = ls_begin_array;
862   - action = ignore;
863   - ready = true;
864   - } else if (*p == ']') {
865   - lex_state = ls_end_array;
866   - action = ignore;
867   - ready = true;
868 928 } else if ((*p >= 'a') && (*p <= 'z')) {
869 929 lex_state = ls_alpha;
870 930 } else if (*p == '-') {
... ... @@ -897,14 +957,6 @@ JSONParser::getToken()
897 957 case ls_number_leading_zero:
898 958 if (*p == '.') {
899 959 lex_state = ls_number_point;
900   - } else if (*p == ' ') {
901   - lex_state = ls_number;
902   - action = ignore;
903   - ready = true;
904   - } else if (strchr("{}[]:,", *p)) {
905   - lex_state = ls_number;
906   - action = reread;
907   - ready = true;
908 960 } else if (*p == 'e' || *p == 'E') {
909 961 lex_state = ls_number_e;
910 962 } else {
... ... @@ -920,14 +972,6 @@ JSONParser::getToken()
920 972 // continue
921 973 } else if (*p == '.') {
922 974 lex_state = ls_number_point;
923   - } else if (*p == ' ') {
924   - lex_state = ls_number;
925   - action = ignore;
926   - ready = true;
927   - } else if (strchr("{}[]:,", *p)) {
928   - lex_state = ls_number;
929   - action = reread;
930   - ready = true;
931 975 } else if (*p == 'e' || *p == 'E') {
932 976 lex_state = ls_number_e;
933 977 } else {
... ... @@ -946,14 +990,6 @@ JSONParser::getToken()
946 990 case ls_number_after_point:
947 991 if ((*p >= '0') && (*p <= '9')) {
948 992 // continue
949   - } else if (*p == ' ') {
950   - lex_state = ls_number;
951   - action = ignore;
952   - ready = true;
953   - } else if (strchr("{}[]:,", *p)) {
954   - lex_state = ls_number;
955   - action = reread;
956   - ready = true;
957 993 } else if (*p == 'e' || *p == 'E') {
958 994 lex_state = ls_number_e;
959 995 } else {
... ... @@ -983,12 +1019,6 @@ JSONParser::getToken()
983 1019 // We only get here after we have seen an exponent.
984 1020 if ((*p >= '0') && (*p <= '9')) {
985 1021 // continue
986   - } else if (*p == ' ') {
987   - action = ignore;
988   - ready = true;
989   - } else if (strchr("{}[]:,", *p)) {
990   - action = reread;
991   - ready = true;
992 1022 } else {
993 1023 tokenError();
994 1024 }
... ... @@ -997,12 +1027,6 @@ JSONParser::getToken()
997 1027 case ls_alpha:
998 1028 if ((*p >= 'a') && (*p <= 'z')) {
999 1029 // okay
1000   - } else if (*p == ' ') {
1001   - action = ignore;
1002   - ready = true;
1003   - } else if (strchr("{}[]:,", *p)) {
1004   - action = reread;
1005   - ready = true;
1006 1030 } else {
1007 1031 tokenError();
1008 1032 }
... ... @@ -1090,8 +1114,6 @@ JSONParser::getToken()
1090 1114 "JSONParser::getToken : trying to handle delimiter state");
1091 1115 }
1092 1116 switch (action) {
1093   - case reread:
1094   - break;
1095 1117 case append:
1096 1118 token.append(1, *p);
1097 1119 // fall through
... ... @@ -1107,7 +1129,7 @@ JSONParser::getToken()
1107 1129 }
1108 1130  
1109 1131 // We only get here if on end of input or if the last character was a
1110   - // control character.
  1132 + // control character or other delimiter.
1111 1133  
1112 1134 if (!token.empty()) {
1113 1135 switch (lex_state) {
... ...
libtests/qtest/json_parse/bad-09.out
1   -exception: bad-09.json: JSON: offset 3: expect string as dictionary key
  1 +exception: bad-09.json: JSON: offset 2: expect string as dictionary key
... ...
libtests/qtest/json_parse/bad-31.out
1   -exception: bad-31.json: JSON: offset 1: numeric literal: no digit after minus sign
  1 +exception: bad-31.json: JSON: offset 1: numeric literal: incomplete number
... ...