Commit f6c9019597c5077d3e99c6d41a598b49b385f59c

Authored by m-holger
1 parent d3152869

Add new methods JSONParser::append and ignore

Reduce boilerplate and increase efficiency by avoiding setting and
branching on action and ready in getToken.
Showing 1 changed file with 95 additions and 95 deletions
libqpdf/JSON.cc
@@ -605,16 +605,6 @@ namespace @@ -605,16 +605,6 @@ namespace
605 std::shared_ptr<JSON> parse(); 605 std::shared_ptr<JSON> parse();
606 606
607 private: 607 private:
608 - void getToken();  
609 - void handleToken();  
610 - void tokenError();  
611 - static void handle_u_code(  
612 - unsigned long codepoint,  
613 - qpdf_offset_t offset,  
614 - unsigned long& high_surrogate,  
615 - qpdf_offset_t& high_offset,  
616 - std::string& result);  
617 -  
618 enum parser_state_e { 608 enum parser_state_e {
619 ps_top, 609 ps_top,
620 ps_dict_begin, 610 ps_dict_begin,
@@ -662,6 +652,20 @@ namespace @@ -662,6 +652,20 @@ namespace
662 std::shared_ptr<JSON> item; 652 std::shared_ptr<JSON> item;
663 }; 653 };
664 654
  655 + void getToken();
  656 + void handleToken();
  657 + void tokenError();
  658 + static void handle_u_code(
  659 + unsigned long codepoint,
  660 + qpdf_offset_t offset,
  661 + unsigned long& high_surrogate,
  662 + qpdf_offset_t& high_offset,
  663 + std::string& result);
  664 + inline void append();
  665 + inline void append(lex_state_e);
  666 + inline void ignore();
  667 + inline void ignore(lex_state_e);
  668 +
665 InputSource& is; 669 InputSource& is;
666 JSON::Reactor* reactor; 670 JSON::Reactor* reactor;
667 lex_state_e lex_state; 671 lex_state_e lex_state;
@@ -788,11 +792,48 @@ JSONParser::tokenError() @@ -788,11 +792,48 @@ JSONParser::tokenError()
788 throw std::logic_error("JSON::tokenError : unhandled error"); 792 throw std::logic_error("JSON::tokenError : unhandled error");
789 } 793 }
790 794
  795 +// Append current character to token and advance to next input character.
  796 +inline void
  797 +JSONParser::append()
  798 +{
  799 + token += *p;
  800 + ++p;
  801 + ++offset;
  802 +}
  803 +
  804 +// Append current character to token, advance to next input character and
  805 +// transition to 'next' lexer state.
  806 +inline void
  807 +JSONParser::append(lex_state_e next)
  808 +{
  809 + lex_state = next;
  810 + token += *p;
  811 + ++p;
  812 + ++offset;
  813 +}
  814 +
  815 +// Advance to next input character without appending the current character to
  816 +// token.
  817 +inline void
  818 +JSONParser::ignore()
  819 +{
  820 + ++p;
  821 + ++offset;
  822 +}
  823 +
  824 +// Advance to next input character without appending the current character to
  825 +// token and transition to 'next' lexer state.
  826 +inline void
  827 +JSONParser::ignore(lex_state_e next)
  828 +{
  829 + lex_state = next;
  830 + ++p;
  831 + ++offset;
  832 +}
  833 +
791 void 834 void
792 JSONParser::getToken() 835 JSONParser::getToken()
793 { 836 {
794 - enum { append, ignore } action = append;  
795 - bool ready = false;  
796 token.clear(); 837 token.clear();
797 838
798 // Keep track of UTF-16 surrogate pairs. 839 // Keep track of UTF-16 surrogate pairs.
@@ -815,8 +856,7 @@ JSONParser::getToken() @@ -815,8 +856,7 @@ JSONParser::getToken()
815 // end the current token (unless we are still before the start 856 // end the current token (unless we are still before the start
816 // of the token). 857 // of the token).
817 if (lex_state == ls_top) { 858 if (lex_state == ls_top) {
818 - ++p;  
819 - ++offset; 859 + ignore();
820 } else { 860 } else {
821 break; 861 break;
822 } 862 }
@@ -828,111 +868,82 @@ JSONParser::getToken() @@ -828,111 +868,82 @@ JSONParser::getToken()
828 } 868 }
829 } else if (*p == ',') { 869 } else if (*p == ',') {
830 if (lex_state == ls_top) { 870 if (lex_state == ls_top) {
831 - ++p;  
832 - ++offset;  
833 - lex_state = ls_comma; 871 + ignore(ls_comma);
834 return; 872 return;
835 } else if (lex_state == ls_string) { 873 } else if (lex_state == ls_string) {
836 - token += *p;  
837 - ++p;  
838 - ++offset; 874 + append();
839 } else { 875 } else {
840 break; 876 break;
841 } 877 }
842 } else if (*p == ':') { 878 } else if (*p == ':') {
843 if (lex_state == ls_top) { 879 if (lex_state == ls_top) {
844 - ++p;  
845 - ++offset;  
846 - lex_state = ls_colon; 880 + ignore(ls_colon);
847 return; 881 return;
848 } else if (lex_state == ls_string) { 882 } else if (lex_state == ls_string) {
849 - token += *p;  
850 - ++p;  
851 - ++offset; 883 + append();
852 } else { 884 } else {
853 break; 885 break;
854 } 886 }
855 } else if (*p == ' ') { 887 } else if (*p == ' ') {
856 if (lex_state == ls_top) { 888 if (lex_state == ls_top) {
857 - ++p;  
858 - ++offset; 889 + ignore();
859 } else if (lex_state == ls_string) { 890 } else if (lex_state == ls_string) {
860 - token += *p;  
861 - ++p;  
862 - ++offset; 891 + append();
863 } else { 892 } else {
864 break; 893 break;
865 } 894 }
866 } else if (*p == '{') { 895 } else if (*p == '{') {
867 if (lex_state == ls_top) { 896 if (lex_state == ls_top) {
868 token_start = offset; 897 token_start = offset;
869 - ++p;  
870 - ++offset;  
871 - lex_state = ls_begin_dict; 898 + ignore(ls_begin_dict);
872 return; 899 return;
873 } else if (lex_state == ls_string) { 900 } else if (lex_state == ls_string) {
874 - token += *p;  
875 - ++p;  
876 - ++offset; 901 + append();
877 } else { 902 } else {
878 break; 903 break;
879 } 904 }
880 } else if (*p == '}') { 905 } else if (*p == '}') {
881 if (lex_state == ls_top) { 906 if (lex_state == ls_top) {
882 - ++p;  
883 - ++offset;  
884 - lex_state = ls_end_dict; 907 + ignore(ls_end_dict);
885 return; 908 return;
886 } else if (lex_state == ls_string) { 909 } else if (lex_state == ls_string) {
887 - token += *p;  
888 - ++p;  
889 - ++offset; 910 + append();
890 } else { 911 } else {
891 break; 912 break;
892 } 913 }
893 } else if (*p == '[') { 914 } else if (*p == '[') {
894 if (lex_state == ls_top) { 915 if (lex_state == ls_top) {
895 token_start = offset; 916 token_start = offset;
896 - ++p;  
897 - ++offset;  
898 - lex_state = ls_begin_array; 917 + ignore(ls_begin_array);
899 return; 918 return;
900 } else if (lex_state == ls_string) { 919 } else if (lex_state == ls_string) {
901 - token += *p;  
902 - ++p;  
903 - ++offset; 920 + append();
904 } else { 921 } else {
905 break; 922 break;
906 } 923 }
907 } else if (*p == ']') { 924 } else if (*p == ']') {
908 if (lex_state == ls_top) { 925 if (lex_state == ls_top) {
909 - ++p;  
910 - ++offset;  
911 - lex_state = ls_end_array; 926 + ignore(ls_end_array);
912 return; 927 return;
913 } else if (lex_state == ls_string) { 928 } else if (lex_state == ls_string) {
914 - token += *p;  
915 - ++p;  
916 - ++offset; 929 + append();
917 } else { 930 } else {
918 break; 931 break;
919 } 932 }
920 } else { 933 } else {
921 - action = append;  
922 switch (lex_state) { 934 switch (lex_state) {
923 case ls_top: 935 case ls_top:
924 token_start = offset; 936 token_start = offset;
925 if (*p == '"') { 937 if (*p == '"') {
926 - lex_state = ls_string;  
927 - action = ignore; 938 + ignore(ls_string);
928 } else if ((*p >= 'a') && (*p <= 'z')) { 939 } else if ((*p >= 'a') && (*p <= 'z')) {
929 - lex_state = ls_alpha; 940 + append(ls_alpha);
930 } else if (*p == '-') { 941 } else if (*p == '-') {
931 - lex_state = ls_number_minus; 942 + append(ls_number_minus);
932 } else if ((*p >= '1') && (*p <= '9')) { 943 } else if ((*p >= '1') && (*p <= '9')) {
933 - lex_state = ls_number_before_point; 944 + append(ls_number_before_point);
934 } else if (*p == '0') { 945 } else if (*p == '0') {
935 - lex_state = ls_number_leading_zero; 946 + append(ls_number_leading_zero);
936 } else { 947 } else {
937 QTC::TC("libtests", "JSON parse bad character"); 948 QTC::TC("libtests", "JSON parse bad character");
938 throw std::runtime_error( 949 throw std::runtime_error(
@@ -943,9 +954,9 @@ JSONParser::getToken() @@ -943,9 +954,9 @@ JSONParser::getToken()
943 954
944 case ls_number_minus: 955 case ls_number_minus:
945 if ((*p >= '1') && (*p <= '9')) { 956 if ((*p >= '1') && (*p <= '9')) {
946 - lex_state = ls_number_before_point; 957 + append(ls_number_before_point);
947 } else if (*p == '0') { 958 } else if (*p == '0') {
948 - lex_state = ls_number_leading_zero; 959 + append(ls_number_leading_zero);
949 } else { 960 } else {
950 QTC::TC("libtests", "JSON parse number minus no digits"); 961 QTC::TC("libtests", "JSON parse number minus no digits");
951 throw std::runtime_error( 962 throw std::runtime_error(
@@ -956,9 +967,9 @@ JSONParser::getToken() @@ -956,9 +967,9 @@ JSONParser::getToken()
956 967
957 case ls_number_leading_zero: 968 case ls_number_leading_zero:
958 if (*p == '.') { 969 if (*p == '.') {
959 - lex_state = ls_number_point; 970 + append(ls_number_point);
960 } else if (*p == 'e' || *p == 'E') { 971 } else if (*p == 'e' || *p == 'E') {
961 - lex_state = ls_number_e; 972 + append(ls_number_e);
962 } else { 973 } else {
963 QTC::TC("libtests", "JSON parse leading zero"); 974 QTC::TC("libtests", "JSON parse leading zero");
964 throw std::runtime_error( 975 throw std::runtime_error(
@@ -969,11 +980,11 @@ JSONParser::getToken() @@ -969,11 +980,11 @@ JSONParser::getToken()
969 980
970 case ls_number_before_point: 981 case ls_number_before_point:
971 if ((*p >= '0') && (*p <= '9')) { 982 if ((*p >= '0') && (*p <= '9')) {
972 - // continue 983 + append();
973 } else if (*p == '.') { 984 } else if (*p == '.') {
974 - lex_state = ls_number_point; 985 + append(ls_number_point);
975 } else if (*p == 'e' || *p == 'E') { 986 } else if (*p == 'e' || *p == 'E') {
976 - lex_state = ls_number_e; 987 + append(ls_number_e);
977 } else { 988 } else {
978 tokenError(); 989 tokenError();
979 } 990 }
@@ -981,7 +992,7 @@ JSONParser::getToken() @@ -981,7 +992,7 @@ JSONParser::getToken()
981 992
982 case ls_number_point: 993 case ls_number_point:
983 if ((*p >= '0') && (*p <= '9')) { 994 if ((*p >= '0') && (*p <= '9')) {
984 - lex_state = ls_number_after_point; 995 + append(ls_number_after_point);
985 } else { 996 } else {
986 tokenError(); 997 tokenError();
987 } 998 }
@@ -989,9 +1000,9 @@ JSONParser::getToken() @@ -989,9 +1000,9 @@ JSONParser::getToken()
989 1000
990 case ls_number_after_point: 1001 case ls_number_after_point:
991 if ((*p >= '0') && (*p <= '9')) { 1002 if ((*p >= '0') && (*p <= '9')) {
992 - // continue 1003 + append();
993 } else if (*p == 'e' || *p == 'E') { 1004 } else if (*p == 'e' || *p == 'E') {
994 - lex_state = ls_number_e; 1005 + append(ls_number_e);
995 } else { 1006 } else {
996 tokenError(); 1007 tokenError();
997 } 1008 }
@@ -999,9 +1010,9 @@ JSONParser::getToken() @@ -999,9 +1010,9 @@ JSONParser::getToken()
999 1010
1000 case ls_number_e: 1011 case ls_number_e:
1001 if ((*p >= '0') && (*p <= '9')) { 1012 if ((*p >= '0') && (*p <= '9')) {
1002 - lex_state = ls_number; 1013 + append(ls_number);
1003 } else if ((*p == '+') || (*p == '-')) { 1014 } else if ((*p == '+') || (*p == '-')) {
1004 - lex_state = ls_number_e_sign; 1015 + append(ls_number_e_sign);
1005 } else { 1016 } else {
1006 tokenError(); 1017 tokenError();
1007 } 1018 }
@@ -1009,7 +1020,7 @@ JSONParser::getToken() @@ -1009,7 +1020,7 @@ JSONParser::getToken()
1009 1020
1010 case ls_number_e_sign: 1021 case ls_number_e_sign:
1011 if ((*p >= '0') && (*p <= '9')) { 1022 if ((*p >= '0') && (*p <= '9')) {
1012 - lex_state = ls_number; 1023 + append(ls_number);
1013 } else { 1024 } else {
1014 tokenError(); 1025 tokenError();
1015 } 1026 }
@@ -1018,7 +1029,7 @@ JSONParser::getToken() @@ -1018,7 +1029,7 @@ JSONParser::getToken()
1018 case ls_number: 1029 case ls_number:
1019 // We only get here after we have seen an exponent. 1030 // We only get here after we have seen an exponent.
1020 if ((*p >= '0') && (*p <= '9')) { 1031 if ((*p >= '0') && (*p <= '9')) {
1021 - // continue 1032 + append();
1022 } else { 1033 } else {
1023 tokenError(); 1034 tokenError();
1024 } 1035 }
@@ -1026,7 +1037,7 @@ JSONParser::getToken() @@ -1026,7 +1037,7 @@ JSONParser::getToken()
1026 1037
1027 case ls_alpha: 1038 case ls_alpha:
1028 if ((*p >= 'a') && (*p <= 'z')) { 1039 if ((*p >= 'a') && (*p <= 'z')) {
1029 - // okay 1040 + append();
1030 } else { 1041 } else {
1031 tokenError(); 1042 tokenError();
1032 } 1043 }
@@ -1041,16 +1052,16 @@ JSONParser::getToken() @@ -1041,16 +1052,16 @@ JSONParser::getToken()
1041 ": UTF-16 high surrogate not followed by low " 1052 ": UTF-16 high surrogate not followed by low "
1042 "surrogate"); 1053 "surrogate");
1043 } 1054 }
1044 - action = ignore;  
1045 - ready = true; 1055 + ignore();
  1056 + return;
1046 } else if (*p == '\\') { 1057 } else if (*p == '\\') {
1047 - lex_state = ls_backslash;  
1048 - action = ignore; 1058 + ignore(ls_backslash);
  1059 + } else {
  1060 + append();
1049 } 1061 }
1050 break; 1062 break;
1051 1063
1052 case ls_backslash: 1064 case ls_backslash:
1053 - action = ignore;  
1054 lex_state = ls_string; 1065 lex_state = ls_string;
1055 switch (*p) { 1066 switch (*p) {
1056 case '\\': 1067 case '\\':
@@ -1084,11 +1095,11 @@ JSONParser::getToken() @@ -1084,11 +1095,11 @@ JSONParser::getToken()
1084 lex_state = ls_backslash; 1095 lex_state = ls_backslash;
1085 tokenError(); 1096 tokenError();
1086 } 1097 }
  1098 + ignore();
1087 break; 1099 break;
1088 1100
1089 case ls_u4: 1101 case ls_u4:
1090 using ui = unsigned int; 1102 using ui = unsigned int;
1091 - action = ignore;  
1092 if ('0' <= *p && *p <= '9') { 1103 if ('0' <= *p && *p <= '9') {
1093 u_value = 16 * u_value + (ui(*p) - ui('0')); 1104 u_value = 16 * u_value + (ui(*p) - ui('0'));
1094 } else if ('a' <= *p && *p <= 'f') { 1105 } else if ('a' <= *p && *p <= 'f') {
@@ -1107,24 +1118,13 @@ JSONParser::getToken() @@ -1107,24 +1118,13 @@ JSONParser::getToken()
1107 token); 1118 token);
1108 lex_state = ls_string; 1119 lex_state = ls_string;
1109 } 1120 }
  1121 + ignore();
1110 break; 1122 break;
1111 1123
1112 default: 1124 default:
1113 throw std::logic_error( 1125 throw std::logic_error(
1114 "JSONParser::getToken : trying to handle delimiter state"); 1126 "JSONParser::getToken : trying to handle delimiter state");
1115 } 1127 }
1116 - switch (action) {  
1117 - case append:  
1118 - token.append(1, *p);  
1119 - // fall through  
1120 - case ignore:  
1121 - ++p;  
1122 - ++offset;  
1123 - break;  
1124 - }  
1125 - if (ready) {  
1126 - return;  
1127 - }  
1128 } 1128 }
1129 } 1129 }
1130 1130