Commit f6c9019597c5077d3e99c6d41a598b49b385f59c
1 parent
d3152869
Add new methods JSONParser::append and ignore
Reduce boilerplate and increase efficiency by avoiding setting and branching on action and ready in getToken.
Showing
1 changed file
with
95 additions
and
95 deletions
libqpdf/JSON.cc
| ... | ... | @@ -605,16 +605,6 @@ namespace |
| 605 | 605 | std::shared_ptr<JSON> parse(); |
| 606 | 606 | |
| 607 | 607 | private: |
| 608 | - void getToken(); | |
| 609 | - void handleToken(); | |
| 610 | - void tokenError(); | |
| 611 | - static void handle_u_code( | |
| 612 | - unsigned long codepoint, | |
| 613 | - qpdf_offset_t offset, | |
| 614 | - unsigned long& high_surrogate, | |
| 615 | - qpdf_offset_t& high_offset, | |
| 616 | - std::string& result); | |
| 617 | - | |
| 618 | 608 | enum parser_state_e { |
| 619 | 609 | ps_top, |
| 620 | 610 | ps_dict_begin, |
| ... | ... | @@ -662,6 +652,20 @@ namespace |
| 662 | 652 | std::shared_ptr<JSON> item; |
| 663 | 653 | }; |
| 664 | 654 | |
| 655 | + void getToken(); | |
| 656 | + void handleToken(); | |
| 657 | + void tokenError(); | |
| 658 | + static void handle_u_code( | |
| 659 | + unsigned long codepoint, | |
| 660 | + qpdf_offset_t offset, | |
| 661 | + unsigned long& high_surrogate, | |
| 662 | + qpdf_offset_t& high_offset, | |
| 663 | + std::string& result); | |
| 664 | + inline void append(); | |
| 665 | + inline void append(lex_state_e); | |
| 666 | + inline void ignore(); | |
| 667 | + inline void ignore(lex_state_e); | |
| 668 | + | |
| 665 | 669 | InputSource& is; |
| 666 | 670 | JSON::Reactor* reactor; |
| 667 | 671 | lex_state_e lex_state; |
| ... | ... | @@ -788,11 +792,48 @@ JSONParser::tokenError() |
| 788 | 792 | throw std::logic_error("JSON::tokenError : unhandled error"); |
| 789 | 793 | } |
| 790 | 794 | |
| 795 | +// Append current character to token and advance to next input character. | |
| 796 | +inline void | |
| 797 | +JSONParser::append() | |
| 798 | +{ | |
| 799 | + token += *p; | |
| 800 | + ++p; | |
| 801 | + ++offset; | |
| 802 | +} | |
| 803 | + | |
| 804 | +// Append current character to token, advance to next input character and | |
| 805 | +// transition to 'next' lexer state. | |
| 806 | +inline void | |
| 807 | +JSONParser::append(lex_state_e next) | |
| 808 | +{ | |
| 809 | + lex_state = next; | |
| 810 | + token += *p; | |
| 811 | + ++p; | |
| 812 | + ++offset; | |
| 813 | +} | |
| 814 | + | |
| 815 | +// Advance to next input character without appending the current character to | |
| 816 | +// token. | |
| 817 | +inline void | |
| 818 | +JSONParser::ignore() | |
| 819 | +{ | |
| 820 | + ++p; | |
| 821 | + ++offset; | |
| 822 | +} | |
| 823 | + | |
| 824 | +// Advance to next input character without appending the current character to | |
| 825 | +// token and transition to 'next' lexer state. | |
| 826 | +inline void | |
| 827 | +JSONParser::ignore(lex_state_e next) | |
| 828 | +{ | |
| 829 | + lex_state = next; | |
| 830 | + ++p; | |
| 831 | + ++offset; | |
| 832 | +} | |
| 833 | + | |
| 791 | 834 | void |
| 792 | 835 | JSONParser::getToken() |
| 793 | 836 | { |
| 794 | - enum { append, ignore } action = append; | |
| 795 | - bool ready = false; | |
| 796 | 837 | token.clear(); |
| 797 | 838 | |
| 798 | 839 | // Keep track of UTF-16 surrogate pairs. |
| ... | ... | @@ -815,8 +856,7 @@ JSONParser::getToken() |
| 815 | 856 | // end the current token (unless we are still before the start |
| 816 | 857 | // of the token). |
| 817 | 858 | if (lex_state == ls_top) { |
| 818 | - ++p; | |
| 819 | - ++offset; | |
| 859 | + ignore(); | |
| 820 | 860 | } else { |
| 821 | 861 | break; |
| 822 | 862 | } |
| ... | ... | @@ -828,111 +868,82 @@ JSONParser::getToken() |
| 828 | 868 | } |
| 829 | 869 | } else if (*p == ',') { |
| 830 | 870 | if (lex_state == ls_top) { |
| 831 | - ++p; | |
| 832 | - ++offset; | |
| 833 | - lex_state = ls_comma; | |
| 871 | + ignore(ls_comma); | |
| 834 | 872 | return; |
| 835 | 873 | } else if (lex_state == ls_string) { |
| 836 | - token += *p; | |
| 837 | - ++p; | |
| 838 | - ++offset; | |
| 874 | + append(); | |
| 839 | 875 | } else { |
| 840 | 876 | break; |
| 841 | 877 | } |
| 842 | 878 | } else if (*p == ':') { |
| 843 | 879 | if (lex_state == ls_top) { |
| 844 | - ++p; | |
| 845 | - ++offset; | |
| 846 | - lex_state = ls_colon; | |
| 880 | + ignore(ls_colon); | |
| 847 | 881 | return; |
| 848 | 882 | } else if (lex_state == ls_string) { |
| 849 | - token += *p; | |
| 850 | - ++p; | |
| 851 | - ++offset; | |
| 883 | + append(); | |
| 852 | 884 | } else { |
| 853 | 885 | break; |
| 854 | 886 | } |
| 855 | 887 | } else if (*p == ' ') { |
| 856 | 888 | if (lex_state == ls_top) { |
| 857 | - ++p; | |
| 858 | - ++offset; | |
| 889 | + ignore(); | |
| 859 | 890 | } else if (lex_state == ls_string) { |
| 860 | - token += *p; | |
| 861 | - ++p; | |
| 862 | - ++offset; | |
| 891 | + append(); | |
| 863 | 892 | } else { |
| 864 | 893 | break; |
| 865 | 894 | } |
| 866 | 895 | } else if (*p == '{') { |
| 867 | 896 | if (lex_state == ls_top) { |
| 868 | 897 | token_start = offset; |
| 869 | - ++p; | |
| 870 | - ++offset; | |
| 871 | - lex_state = ls_begin_dict; | |
| 898 | + ignore(ls_begin_dict); | |
| 872 | 899 | return; |
| 873 | 900 | } else if (lex_state == ls_string) { |
| 874 | - token += *p; | |
| 875 | - ++p; | |
| 876 | - ++offset; | |
| 901 | + append(); | |
| 877 | 902 | } else { |
| 878 | 903 | break; |
| 879 | 904 | } |
| 880 | 905 | } else if (*p == '}') { |
| 881 | 906 | if (lex_state == ls_top) { |
| 882 | - ++p; | |
| 883 | - ++offset; | |
| 884 | - lex_state = ls_end_dict; | |
| 907 | + ignore(ls_end_dict); | |
| 885 | 908 | return; |
| 886 | 909 | } else if (lex_state == ls_string) { |
| 887 | - token += *p; | |
| 888 | - ++p; | |
| 889 | - ++offset; | |
| 910 | + append(); | |
| 890 | 911 | } else { |
| 891 | 912 | break; |
| 892 | 913 | } |
| 893 | 914 | } else if (*p == '[') { |
| 894 | 915 | if (lex_state == ls_top) { |
| 895 | 916 | token_start = offset; |
| 896 | - ++p; | |
| 897 | - ++offset; | |
| 898 | - lex_state = ls_begin_array; | |
| 917 | + ignore(ls_begin_array); | |
| 899 | 918 | return; |
| 900 | 919 | } else if (lex_state == ls_string) { |
| 901 | - token += *p; | |
| 902 | - ++p; | |
| 903 | - ++offset; | |
| 920 | + append(); | |
| 904 | 921 | } else { |
| 905 | 922 | break; |
| 906 | 923 | } |
| 907 | 924 | } else if (*p == ']') { |
| 908 | 925 | if (lex_state == ls_top) { |
| 909 | - ++p; | |
| 910 | - ++offset; | |
| 911 | - lex_state = ls_end_array; | |
| 926 | + ignore(ls_end_array); | |
| 912 | 927 | return; |
| 913 | 928 | } else if (lex_state == ls_string) { |
| 914 | - token += *p; | |
| 915 | - ++p; | |
| 916 | - ++offset; | |
| 929 | + append(); | |
| 917 | 930 | } else { |
| 918 | 931 | break; |
| 919 | 932 | } |
| 920 | 933 | } else { |
| 921 | - action = append; | |
| 922 | 934 | switch (lex_state) { |
| 923 | 935 | case ls_top: |
| 924 | 936 | token_start = offset; |
| 925 | 937 | if (*p == '"') { |
| 926 | - lex_state = ls_string; | |
| 927 | - action = ignore; | |
| 938 | + ignore(ls_string); | |
| 928 | 939 | } else if ((*p >= 'a') && (*p <= 'z')) { |
| 929 | - lex_state = ls_alpha; | |
| 940 | + append(ls_alpha); | |
| 930 | 941 | } else if (*p == '-') { |
| 931 | - lex_state = ls_number_minus; | |
| 942 | + append(ls_number_minus); | |
| 932 | 943 | } else if ((*p >= '1') && (*p <= '9')) { |
| 933 | - lex_state = ls_number_before_point; | |
| 944 | + append(ls_number_before_point); | |
| 934 | 945 | } else if (*p == '0') { |
| 935 | - lex_state = ls_number_leading_zero; | |
| 946 | + append(ls_number_leading_zero); | |
| 936 | 947 | } else { |
| 937 | 948 | QTC::TC("libtests", "JSON parse bad character"); |
| 938 | 949 | throw std::runtime_error( |
| ... | ... | @@ -943,9 +954,9 @@ JSONParser::getToken() |
| 943 | 954 | |
| 944 | 955 | case ls_number_minus: |
| 945 | 956 | if ((*p >= '1') && (*p <= '9')) { |
| 946 | - lex_state = ls_number_before_point; | |
| 957 | + append(ls_number_before_point); | |
| 947 | 958 | } else if (*p == '0') { |
| 948 | - lex_state = ls_number_leading_zero; | |
| 959 | + append(ls_number_leading_zero); | |
| 949 | 960 | } else { |
| 950 | 961 | QTC::TC("libtests", "JSON parse number minus no digits"); |
| 951 | 962 | throw std::runtime_error( |
| ... | ... | @@ -956,9 +967,9 @@ JSONParser::getToken() |
| 956 | 967 | |
| 957 | 968 | case ls_number_leading_zero: |
| 958 | 969 | if (*p == '.') { |
| 959 | - lex_state = ls_number_point; | |
| 970 | + append(ls_number_point); | |
| 960 | 971 | } else if (*p == 'e' || *p == 'E') { |
| 961 | - lex_state = ls_number_e; | |
| 972 | + append(ls_number_e); | |
| 962 | 973 | } else { |
| 963 | 974 | QTC::TC("libtests", "JSON parse leading zero"); |
| 964 | 975 | throw std::runtime_error( |
| ... | ... | @@ -969,11 +980,11 @@ JSONParser::getToken() |
| 969 | 980 | |
| 970 | 981 | case ls_number_before_point: |
| 971 | 982 | if ((*p >= '0') && (*p <= '9')) { |
| 972 | - // continue | |
| 983 | + append(); | |
| 973 | 984 | } else if (*p == '.') { |
| 974 | - lex_state = ls_number_point; | |
| 985 | + append(ls_number_point); | |
| 975 | 986 | } else if (*p == 'e' || *p == 'E') { |
| 976 | - lex_state = ls_number_e; | |
| 987 | + append(ls_number_e); | |
| 977 | 988 | } else { |
| 978 | 989 | tokenError(); |
| 979 | 990 | } |
| ... | ... | @@ -981,7 +992,7 @@ JSONParser::getToken() |
| 981 | 992 | |
| 982 | 993 | case ls_number_point: |
| 983 | 994 | if ((*p >= '0') && (*p <= '9')) { |
| 984 | - lex_state = ls_number_after_point; | |
| 995 | + append(ls_number_after_point); | |
| 985 | 996 | } else { |
| 986 | 997 | tokenError(); |
| 987 | 998 | } |
| ... | ... | @@ -989,9 +1000,9 @@ JSONParser::getToken() |
| 989 | 1000 | |
| 990 | 1001 | case ls_number_after_point: |
| 991 | 1002 | if ((*p >= '0') && (*p <= '9')) { |
| 992 | - // continue | |
| 1003 | + append(); | |
| 993 | 1004 | } else if (*p == 'e' || *p == 'E') { |
| 994 | - lex_state = ls_number_e; | |
| 1005 | + append(ls_number_e); | |
| 995 | 1006 | } else { |
| 996 | 1007 | tokenError(); |
| 997 | 1008 | } |
| ... | ... | @@ -999,9 +1010,9 @@ JSONParser::getToken() |
| 999 | 1010 | |
| 1000 | 1011 | case ls_number_e: |
| 1001 | 1012 | if ((*p >= '0') && (*p <= '9')) { |
| 1002 | - lex_state = ls_number; | |
| 1013 | + append(ls_number); | |
| 1003 | 1014 | } else if ((*p == '+') || (*p == '-')) { |
| 1004 | - lex_state = ls_number_e_sign; | |
| 1015 | + append(ls_number_e_sign); | |
| 1005 | 1016 | } else { |
| 1006 | 1017 | tokenError(); |
| 1007 | 1018 | } |
| ... | ... | @@ -1009,7 +1020,7 @@ JSONParser::getToken() |
| 1009 | 1020 | |
| 1010 | 1021 | case ls_number_e_sign: |
| 1011 | 1022 | if ((*p >= '0') && (*p <= '9')) { |
| 1012 | - lex_state = ls_number; | |
| 1023 | + append(ls_number); | |
| 1013 | 1024 | } else { |
| 1014 | 1025 | tokenError(); |
| 1015 | 1026 | } |
| ... | ... | @@ -1018,7 +1029,7 @@ JSONParser::getToken() |
| 1018 | 1029 | case ls_number: |
| 1019 | 1030 | // We only get here after we have seen an exponent. |
| 1020 | 1031 | if ((*p >= '0') && (*p <= '9')) { |
| 1021 | - // continue | |
| 1032 | + append(); | |
| 1022 | 1033 | } else { |
| 1023 | 1034 | tokenError(); |
| 1024 | 1035 | } |
| ... | ... | @@ -1026,7 +1037,7 @@ JSONParser::getToken() |
| 1026 | 1037 | |
| 1027 | 1038 | case ls_alpha: |
| 1028 | 1039 | if ((*p >= 'a') && (*p <= 'z')) { |
| 1029 | - // okay | |
| 1040 | + append(); | |
| 1030 | 1041 | } else { |
| 1031 | 1042 | tokenError(); |
| 1032 | 1043 | } |
| ... | ... | @@ -1041,16 +1052,16 @@ JSONParser::getToken() |
| 1041 | 1052 | ": UTF-16 high surrogate not followed by low " |
| 1042 | 1053 | "surrogate"); |
| 1043 | 1054 | } |
| 1044 | - action = ignore; | |
| 1045 | - ready = true; | |
| 1055 | + ignore(); | |
| 1056 | + return; | |
| 1046 | 1057 | } else if (*p == '\\') { |
| 1047 | - lex_state = ls_backslash; | |
| 1048 | - action = ignore; | |
| 1058 | + ignore(ls_backslash); | |
| 1059 | + } else { | |
| 1060 | + append(); | |
| 1049 | 1061 | } |
| 1050 | 1062 | break; |
| 1051 | 1063 | |
| 1052 | 1064 | case ls_backslash: |
| 1053 | - action = ignore; | |
| 1054 | 1065 | lex_state = ls_string; |
| 1055 | 1066 | switch (*p) { |
| 1056 | 1067 | case '\\': |
| ... | ... | @@ -1084,11 +1095,11 @@ JSONParser::getToken() |
| 1084 | 1095 | lex_state = ls_backslash; |
| 1085 | 1096 | tokenError(); |
| 1086 | 1097 | } |
| 1098 | + ignore(); | |
| 1087 | 1099 | break; |
| 1088 | 1100 | |
| 1089 | 1101 | case ls_u4: |
| 1090 | 1102 | using ui = unsigned int; |
| 1091 | - action = ignore; | |
| 1092 | 1103 | if ('0' <= *p && *p <= '9') { |
| 1093 | 1104 | u_value = 16 * u_value + (ui(*p) - ui('0')); |
| 1094 | 1105 | } else if ('a' <= *p && *p <= 'f') { |
| ... | ... | @@ -1107,24 +1118,13 @@ JSONParser::getToken() |
| 1107 | 1118 | token); |
| 1108 | 1119 | lex_state = ls_string; |
| 1109 | 1120 | } |
| 1121 | + ignore(); | |
| 1110 | 1122 | break; |
| 1111 | 1123 | |
| 1112 | 1124 | default: |
| 1113 | 1125 | throw std::logic_error( |
| 1114 | 1126 | "JSONParser::getToken : trying to handle delimiter state"); |
| 1115 | 1127 | } |
| 1116 | - switch (action) { | |
| 1117 | - case append: | |
| 1118 | - token.append(1, *p); | |
| 1119 | - // fall through | |
| 1120 | - case ignore: | |
| 1121 | - ++p; | |
| 1122 | - ++offset; | |
| 1123 | - break; | |
| 1124 | - } | |
| 1125 | - if (ready) { | |
| 1126 | - return; | |
| 1127 | - } | |
| 1128 | 1128 | } |
| 1129 | 1129 | } |
| 1130 | 1130 | ... | ... |