Commit f6c9019597c5077d3e99c6d41a598b49b385f59c

Authored by m-holger
1 parent d3152869

Add new methods JSONParser::append and ignore

Reduce boilerplate and increase efficiency by avoiding setting and
branching on action and ready in getToken.
Showing 1 changed file with 95 additions and 95 deletions
libqpdf/JSON.cc
... ... @@ -605,16 +605,6 @@ namespace
605 605 std::shared_ptr<JSON> parse();
606 606  
607 607 private:
608   - void getToken();
609   - void handleToken();
610   - void tokenError();
611   - static void handle_u_code(
612   - unsigned long codepoint,
613   - qpdf_offset_t offset,
614   - unsigned long& high_surrogate,
615   - qpdf_offset_t& high_offset,
616   - std::string& result);
617   -
618 608 enum parser_state_e {
619 609 ps_top,
620 610 ps_dict_begin,
... ... @@ -662,6 +652,20 @@ namespace
662 652 std::shared_ptr<JSON> item;
663 653 };
664 654  
  655 + void getToken();
  656 + void handleToken();
  657 + void tokenError();
  658 + static void handle_u_code(
  659 + unsigned long codepoint,
  660 + qpdf_offset_t offset,
  661 + unsigned long& high_surrogate,
  662 + qpdf_offset_t& high_offset,
  663 + std::string& result);
  664 + inline void append();
  665 + inline void append(lex_state_e);
  666 + inline void ignore();
  667 + inline void ignore(lex_state_e);
  668 +
665 669 InputSource& is;
666 670 JSON::Reactor* reactor;
667 671 lex_state_e lex_state;
... ... @@ -788,11 +792,48 @@ JSONParser::tokenError()
788 792 throw std::logic_error("JSON::tokenError : unhandled error");
789 793 }
790 794  
  795 +// Append current character to token and advance to next input character.
  796 +inline void
  797 +JSONParser::append()
  798 +{
  799 + token += *p;
  800 + ++p;
  801 + ++offset;
  802 +}
  803 +
  804 +// Append current character to token, advance to next input character and
  805 +// transition to 'next' lexer state.
  806 +inline void
  807 +JSONParser::append(lex_state_e next)
  808 +{
  809 + lex_state = next;
  810 + token += *p;
  811 + ++p;
  812 + ++offset;
  813 +}
  814 +
  815 +// Advance to next input character without appending the current character to
  816 +// token.
  817 +inline void
  818 +JSONParser::ignore()
  819 +{
  820 + ++p;
  821 + ++offset;
  822 +}
  823 +
  824 +// Advance to next input character without appending the current character to
  825 +// token and transition to 'next' lexer state.
  826 +inline void
  827 +JSONParser::ignore(lex_state_e next)
  828 +{
  829 + lex_state = next;
  830 + ++p;
  831 + ++offset;
  832 +}
  833 +
791 834 void
792 835 JSONParser::getToken()
793 836 {
794   - enum { append, ignore } action = append;
795   - bool ready = false;
796 837 token.clear();
797 838  
798 839 // Keep track of UTF-16 surrogate pairs.
... ... @@ -815,8 +856,7 @@ JSONParser::getToken()
815 856 // end the current token (unless we are still before the start
816 857 // of the token).
817 858 if (lex_state == ls_top) {
818   - ++p;
819   - ++offset;
  859 + ignore();
820 860 } else {
821 861 break;
822 862 }
... ... @@ -828,111 +868,82 @@ JSONParser::getToken()
828 868 }
829 869 } else if (*p == ',') {
830 870 if (lex_state == ls_top) {
831   - ++p;
832   - ++offset;
833   - lex_state = ls_comma;
  871 + ignore(ls_comma);
834 872 return;
835 873 } else if (lex_state == ls_string) {
836   - token += *p;
837   - ++p;
838   - ++offset;
  874 + append();
839 875 } else {
840 876 break;
841 877 }
842 878 } else if (*p == ':') {
843 879 if (lex_state == ls_top) {
844   - ++p;
845   - ++offset;
846   - lex_state = ls_colon;
  880 + ignore(ls_colon);
847 881 return;
848 882 } else if (lex_state == ls_string) {
849   - token += *p;
850   - ++p;
851   - ++offset;
  883 + append();
852 884 } else {
853 885 break;
854 886 }
855 887 } else if (*p == ' ') {
856 888 if (lex_state == ls_top) {
857   - ++p;
858   - ++offset;
  889 + ignore();
859 890 } else if (lex_state == ls_string) {
860   - token += *p;
861   - ++p;
862   - ++offset;
  891 + append();
863 892 } else {
864 893 break;
865 894 }
866 895 } else if (*p == '{') {
867 896 if (lex_state == ls_top) {
868 897 token_start = offset;
869   - ++p;
870   - ++offset;
871   - lex_state = ls_begin_dict;
  898 + ignore(ls_begin_dict);
872 899 return;
873 900 } else if (lex_state == ls_string) {
874   - token += *p;
875   - ++p;
876   - ++offset;
  901 + append();
877 902 } else {
878 903 break;
879 904 }
880 905 } else if (*p == '}') {
881 906 if (lex_state == ls_top) {
882   - ++p;
883   - ++offset;
884   - lex_state = ls_end_dict;
  907 + ignore(ls_end_dict);
885 908 return;
886 909 } else if (lex_state == ls_string) {
887   - token += *p;
888   - ++p;
889   - ++offset;
  910 + append();
890 911 } else {
891 912 break;
892 913 }
893 914 } else if (*p == '[') {
894 915 if (lex_state == ls_top) {
895 916 token_start = offset;
896   - ++p;
897   - ++offset;
898   - lex_state = ls_begin_array;
  917 + ignore(ls_begin_array);
899 918 return;
900 919 } else if (lex_state == ls_string) {
901   - token += *p;
902   - ++p;
903   - ++offset;
  920 + append();
904 921 } else {
905 922 break;
906 923 }
907 924 } else if (*p == ']') {
908 925 if (lex_state == ls_top) {
909   - ++p;
910   - ++offset;
911   - lex_state = ls_end_array;
  926 + ignore(ls_end_array);
912 927 return;
913 928 } else if (lex_state == ls_string) {
914   - token += *p;
915   - ++p;
916   - ++offset;
  929 + append();
917 930 } else {
918 931 break;
919 932 }
920 933 } else {
921   - action = append;
922 934 switch (lex_state) {
923 935 case ls_top:
924 936 token_start = offset;
925 937 if (*p == '"') {
926   - lex_state = ls_string;
927   - action = ignore;
  938 + ignore(ls_string);
928 939 } else if ((*p >= 'a') && (*p <= 'z')) {
929   - lex_state = ls_alpha;
  940 + append(ls_alpha);
930 941 } else if (*p == '-') {
931   - lex_state = ls_number_minus;
  942 + append(ls_number_minus);
932 943 } else if ((*p >= '1') && (*p <= '9')) {
933   - lex_state = ls_number_before_point;
  944 + append(ls_number_before_point);
934 945 } else if (*p == '0') {
935   - lex_state = ls_number_leading_zero;
  946 + append(ls_number_leading_zero);
936 947 } else {
937 948 QTC::TC("libtests", "JSON parse bad character");
938 949 throw std::runtime_error(
... ... @@ -943,9 +954,9 @@ JSONParser::getToken()
943 954  
944 955 case ls_number_minus:
945 956 if ((*p >= '1') && (*p <= '9')) {
946   - lex_state = ls_number_before_point;
  957 + append(ls_number_before_point);
947 958 } else if (*p == '0') {
948   - lex_state = ls_number_leading_zero;
  959 + append(ls_number_leading_zero);
949 960 } else {
950 961 QTC::TC("libtests", "JSON parse number minus no digits");
951 962 throw std::runtime_error(
... ... @@ -956,9 +967,9 @@ JSONParser::getToken()
956 967  
957 968 case ls_number_leading_zero:
958 969 if (*p == '.') {
959   - lex_state = ls_number_point;
  970 + append(ls_number_point);
960 971 } else if (*p == 'e' || *p == 'E') {
961   - lex_state = ls_number_e;
  972 + append(ls_number_e);
962 973 } else {
963 974 QTC::TC("libtests", "JSON parse leading zero");
964 975 throw std::runtime_error(
... ... @@ -969,11 +980,11 @@ JSONParser::getToken()
969 980  
970 981 case ls_number_before_point:
971 982 if ((*p >= '0') && (*p <= '9')) {
972   - // continue
  983 + append();
973 984 } else if (*p == '.') {
974   - lex_state = ls_number_point;
  985 + append(ls_number_point);
975 986 } else if (*p == 'e' || *p == 'E') {
976   - lex_state = ls_number_e;
  987 + append(ls_number_e);
977 988 } else {
978 989 tokenError();
979 990 }
... ... @@ -981,7 +992,7 @@ JSONParser::getToken()
981 992  
982 993 case ls_number_point:
983 994 if ((*p >= '0') && (*p <= '9')) {
984   - lex_state = ls_number_after_point;
  995 + append(ls_number_after_point);
985 996 } else {
986 997 tokenError();
987 998 }
... ... @@ -989,9 +1000,9 @@ JSONParser::getToken()
989 1000  
990 1001 case ls_number_after_point:
991 1002 if ((*p >= '0') && (*p <= '9')) {
992   - // continue
  1003 + append();
993 1004 } else if (*p == 'e' || *p == 'E') {
994   - lex_state = ls_number_e;
  1005 + append(ls_number_e);
995 1006 } else {
996 1007 tokenError();
997 1008 }
... ... @@ -999,9 +1010,9 @@ JSONParser::getToken()
999 1010  
1000 1011 case ls_number_e:
1001 1012 if ((*p >= '0') && (*p <= '9')) {
1002   - lex_state = ls_number;
  1013 + append(ls_number);
1003 1014 } else if ((*p == '+') || (*p == '-')) {
1004   - lex_state = ls_number_e_sign;
  1015 + append(ls_number_e_sign);
1005 1016 } else {
1006 1017 tokenError();
1007 1018 }
... ... @@ -1009,7 +1020,7 @@ JSONParser::getToken()
1009 1020  
1010 1021 case ls_number_e_sign:
1011 1022 if ((*p >= '0') && (*p <= '9')) {
1012   - lex_state = ls_number;
  1023 + append(ls_number);
1013 1024 } else {
1014 1025 tokenError();
1015 1026 }
... ... @@ -1018,7 +1029,7 @@ JSONParser::getToken()
1018 1029 case ls_number:
1019 1030 // We only get here after we have seen an exponent.
1020 1031 if ((*p >= '0') && (*p <= '9')) {
1021   - // continue
  1032 + append();
1022 1033 } else {
1023 1034 tokenError();
1024 1035 }
... ... @@ -1026,7 +1037,7 @@ JSONParser::getToken()
1026 1037  
1027 1038 case ls_alpha:
1028 1039 if ((*p >= 'a') && (*p <= 'z')) {
1029   - // okay
  1040 + append();
1030 1041 } else {
1031 1042 tokenError();
1032 1043 }
... ... @@ -1041,16 +1052,16 @@ JSONParser::getToken()
1041 1052 ": UTF-16 high surrogate not followed by low "
1042 1053 "surrogate");
1043 1054 }
1044   - action = ignore;
1045   - ready = true;
  1055 + ignore();
  1056 + return;
1046 1057 } else if (*p == '\\') {
1047   - lex_state = ls_backslash;
1048   - action = ignore;
  1058 + ignore(ls_backslash);
  1059 + } else {
  1060 + append();
1049 1061 }
1050 1062 break;
1051 1063  
1052 1064 case ls_backslash:
1053   - action = ignore;
1054 1065 lex_state = ls_string;
1055 1066 switch (*p) {
1056 1067 case '\\':
... ... @@ -1084,11 +1095,11 @@ JSONParser::getToken()
1084 1095 lex_state = ls_backslash;
1085 1096 tokenError();
1086 1097 }
  1098 + ignore();
1087 1099 break;
1088 1100  
1089 1101 case ls_u4:
1090 1102 using ui = unsigned int;
1091   - action = ignore;
1092 1103 if ('0' <= *p && *p <= '9') {
1093 1104 u_value = 16 * u_value + (ui(*p) - ui('0'));
1094 1105 } else if ('a' <= *p && *p <= 'f') {
... ... @@ -1107,24 +1118,13 @@ JSONParser::getToken()
1107 1118 token);
1108 1119 lex_state = ls_string;
1109 1120 }
  1121 + ignore();
1110 1122 break;
1111 1123  
1112 1124 default:
1113 1125 throw std::logic_error(
1114 1126 "JSONParser::getToken : trying to handle delimiter state");
1115 1127 }
1116   - switch (action) {
1117   - case append:
1118   - token.append(1, *p);
1119   - // fall through
1120   - case ignore:
1121   - ++p;
1122   - ++offset;
1123   - break;
1124   - }
1125   - if (ready) {
1126   - return;
1127   - }
1128 1128 }
1129 1129 }
1130 1130  
... ...