Commit e4e03e9ac10d8ea0c1a8ef8c78f0103068928822

Authored by Jay Berkenbilt
Committed by GitHub
2 parents 76189c44 b6f04854

Merge pull request #890 from m-holger/jpsp

Eliminate the use of shared pointers in JSONParser
Showing 41 changed files with 641 additions and 559 deletions
include/qpdf/JSON.hh
@@ -54,6 +54,7 @@ class JSON @@ -54,6 +54,7 @@ class JSON
54 { 54 {
55 public: 55 public:
56 static int constexpr LATEST = 2; 56 static int constexpr LATEST = 2;
  57 + JSON() = default;
57 58
58 QPDF_DLL 59 QPDF_DLL
59 std::string unparse() const; 60 std::string unparse() const;
libqpdf/JSON.cc
@@ -4,19 +4,11 @@ @@ -4,19 +4,11 @@
4 #include <qpdf/Pl_Base64.hh> 4 #include <qpdf/Pl_Base64.hh>
5 #include <qpdf/Pl_Concatenate.hh> 5 #include <qpdf/Pl_Concatenate.hh>
6 #include <qpdf/Pl_String.hh> 6 #include <qpdf/Pl_String.hh>
7 -#include <qpdf/QIntC.hh>  
8 #include <qpdf/QTC.hh> 7 #include <qpdf/QTC.hh>
9 #include <qpdf/QUtil.hh> 8 #include <qpdf/QUtil.hh>
10 #include <cstring> 9 #include <cstring>
11 #include <stdexcept> 10 #include <stdexcept>
12 11
13 -template <typename T>  
14 -static qpdf_offset_t  
15 -toO(T const& i)  
16 -{  
17 - return QIntC::to_offset(i);  
18 -}  
19 -  
20 JSON::Members::Members(std::shared_ptr<JSON_value> value) : 12 JSON::Members::Members(std::shared_ptr<JSON_value> value) :
21 value(value), 13 value(value),
22 start(0), 14 start(0),
@@ -622,11 +614,6 @@ namespace @@ -622,11 +614,6 @@ namespace
622 is(is), 614 is(is),
623 reactor(reactor), 615 reactor(reactor),
624 lex_state(ls_top), 616 lex_state(ls_top),
625 - number_before_point(0),  
626 - number_after_point(0),  
627 - number_after_e(0),  
628 - number_saw_point(false),  
629 - number_saw_e(false),  
630 bytes(0), 617 bytes(0),
631 p(buf), 618 p(buf),
632 u_count(0), 619 u_count(0),
@@ -637,21 +624,9 @@ namespace @@ -637,21 +624,9 @@ namespace
637 { 624 {
638 } 625 }
639 626
640 - std::shared_ptr<JSON> parse(); 627 + JSON parse();
641 628
642 private: 629 private:
643 - void getToken();  
644 - void handleToken();  
645 - static std::string  
646 - decode_string(std::string const& json, qpdf_offset_t offset);  
647 - static void handle_u_code(  
648 - char const* s,  
649 - qpdf_offset_t offset,  
650 - qpdf_offset_t i,  
651 - unsigned long& high_surrogate,  
652 - qpdf_offset_t& high_offset,  
653 - std::string& result);  
654 -  
655 enum parser_state_e { 630 enum parser_state_e {
656 ps_top, 631 ps_top,
657 ps_dict_begin, 632 ps_dict_begin,
@@ -668,30 +643,65 @@ namespace @@ -668,30 +643,65 @@ namespace
668 enum lex_state_e { 643 enum lex_state_e {
669 ls_top, 644 ls_top,
670 ls_number, 645 ls_number,
  646 + ls_number_minus,
  647 + ls_number_leading_zero,
  648 + ls_number_before_point,
  649 + ls_number_point,
  650 + ls_number_after_point,
  651 + ls_number_e,
  652 + ls_number_e_sign,
671 ls_alpha, 653 ls_alpha,
672 ls_string, 654 ls_string,
673 ls_backslash, 655 ls_backslash,
674 ls_u4, 656 ls_u4,
  657 + ls_begin_array,
  658 + ls_end_array,
  659 + ls_begin_dict,
  660 + ls_end_dict,
  661 + ls_colon,
  662 + ls_comma,
675 }; 663 };
676 664
  665 + struct StackFrame
  666 + {
  667 + StackFrame(parser_state_e state, JSON& item) :
  668 + state(state),
  669 + item(item)
  670 + {
  671 + }
  672 +
  673 + parser_state_e state;
  674 + JSON item;
  675 + };
  676 +
  677 + void getToken();
  678 + void handleToken();
  679 + void tokenError();
  680 + static void handle_u_code(
  681 + unsigned long codepoint,
  682 + qpdf_offset_t offset,
  683 + unsigned long& high_surrogate,
  684 + qpdf_offset_t& high_offset,
  685 + std::string& result);
  686 + inline void append();
  687 + inline void append(lex_state_e);
  688 + inline void ignore();
  689 + inline void ignore(lex_state_e);
  690 +
677 InputSource& is; 691 InputSource& is;
678 JSON::Reactor* reactor; 692 JSON::Reactor* reactor;
679 lex_state_e lex_state; 693 lex_state_e lex_state;
680 - size_t number_before_point;  
681 - size_t number_after_point;  
682 - size_t number_after_e;  
683 - bool number_saw_point;  
684 - bool number_saw_e;  
685 char buf[16384]; 694 char buf[16384];
686 size_t bytes; 695 size_t bytes;
687 char const* p; 696 char const* p;
688 qpdf_offset_t u_count; 697 qpdf_offset_t u_count;
  698 + unsigned long u_value{0};
689 qpdf_offset_t offset; 699 qpdf_offset_t offset;
690 bool done; 700 bool done;
691 std::string token; 701 std::string token;
  702 + qpdf_offset_t token_start{0};
692 parser_state_e parser_state; 703 parser_state_e parser_state;
693 - std::vector<std::shared_ptr<JSON>> stack;  
694 - std::vector<parser_state_e> ps_stack; 704 + std::vector<StackFrame> stack;
695 std::string dict_key; 705 std::string dict_key;
696 qpdf_offset_t dict_key_offset; 706 qpdf_offset_t dict_key_offset;
697 }; 707 };
@@ -699,22 +709,15 @@ namespace @@ -699,22 +709,15 @@ namespace
699 709
700 void 710 void
701 JSONParser::handle_u_code( 711 JSONParser::handle_u_code(
702 - char const* s, 712 + unsigned long codepoint,
703 qpdf_offset_t offset, 713 qpdf_offset_t offset,
704 - qpdf_offset_t i,  
705 unsigned long& high_surrogate, 714 unsigned long& high_surrogate,
706 qpdf_offset_t& high_offset, 715 qpdf_offset_t& high_offset,
707 std::string& result) 716 std::string& result)
708 { 717 {
709 - std::string hex = QUtil::hex_decode(std::string(s + i + 1, s + i + 5));  
710 - unsigned char high = static_cast<unsigned char>(hex.at(0));  
711 - unsigned char low = static_cast<unsigned char>(hex.at(1));  
712 - unsigned long codepoint = high;  
713 - codepoint <<= 8;  
714 - codepoint += low;  
715 if ((codepoint & 0xFC00) == 0xD800) { 718 if ((codepoint & 0xFC00) == 0xD800) {
716 // high surrogate 719 // high surrogate
717 - qpdf_offset_t new_high_offset = offset + i; 720 + qpdf_offset_t new_high_offset = offset;
718 if (high_offset) { 721 if (high_offset) {
719 QTC::TC("libtests", "JSON 16 high high"); 722 QTC::TC("libtests", "JSON 16 high high");
720 throw std::runtime_error( 723 throw std::runtime_error(
@@ -727,10 +730,10 @@ JSONParser::handle_u_code( @@ -727,10 +730,10 @@ JSONParser::handle_u_code(
727 high_surrogate = codepoint; 730 high_surrogate = codepoint;
728 } else if ((codepoint & 0xFC00) == 0xDC00) { 731 } else if ((codepoint & 0xFC00) == 0xDC00) {
729 // low surrogate 732 // low surrogate
730 - if (offset + i != (high_offset + 6)) { 733 + if (offset != (high_offset + 6)) {
731 QTC::TC("libtests", "JSON 16 low not after high"); 734 QTC::TC("libtests", "JSON 16 low not after high");
732 throw std::runtime_error( 735 throw std::runtime_error(
733 - "JSON: offset " + std::to_string(offset + i) + 736 + "JSON: offset " + std::to_string(offset) +
734 ": UTF-16 low surrogate found not immediately after high" 737 ": UTF-16 low surrogate found not immediately after high"
735 " surrogate"); 738 " surrogate");
736 } 739 }
@@ -743,88 +746,123 @@ JSONParser::handle_u_code( @@ -743,88 +746,123 @@ JSONParser::handle_u_code(
743 } 746 }
744 } 747 }
745 748
746 -std::string  
747 -JSONParser::decode_string(std::string const& str, qpdf_offset_t offset) 749 +void
  750 +JSONParser::tokenError()
748 { 751 {
749 - // The string has already been validated when this private method  
750 - // is called, so errors are logic errors instead of runtime  
751 - // errors.  
752 - size_t len = str.length();  
753 - if ((len < 2) || (str.at(0) != '"') || (str.at(len - 1) != '"')) {  
754 - throw std::logic_error(  
755 - "JSON Parse: decode_string called with other than \"...\""); 752 + if (done) {
  753 + QTC::TC("libtests", "JSON parse ls premature end of input");
  754 + throw std::runtime_error("JSON: premature end of input");
756 } 755 }
757 - char const* s = str.c_str();  
758 - // Move inside the quotation marks  
759 - ++s;  
760 - len -= 2;  
761 - // Keep track of UTF-16 surrogate pairs.  
762 - unsigned long high_surrogate = 0;  
763 - qpdf_offset_t high_offset = 0;  
764 - std::string result;  
765 - qpdf_offset_t olen = toO(len);  
766 - for (qpdf_offset_t i = 0; i < olen; ++i) {  
767 - if (s[i] == '\\') {  
768 - if (i + 1 >= olen) {  
769 - throw std::logic_error("JSON parse: nothing after \\");  
770 - }  
771 - char ch = s[++i];  
772 - switch (ch) {  
773 - case '\\':  
774 - case '\"':  
775 - case '/':  
776 - // \/ is allowed in json input, but so is /, so we  
777 - // don't map / to \/ in output.  
778 - result.append(1, ch);  
779 - break;  
780 - case 'b':  
781 - result.append(1, '\b');  
782 - break;  
783 - case 'f':  
784 - result.append(1, '\f');  
785 - break;  
786 - case 'n':  
787 - result.append(1, '\n');  
788 - break;  
789 - case 'r':  
790 - result.append(1, '\r');  
791 - break;  
792 - case 't':  
793 - result.append(1, '\t');  
794 - break;  
795 - case 'u':  
796 - if (i + 4 >= olen) {  
797 - throw std::logic_error(  
798 - "JSON parse: not enough characters after \\u");  
799 - }  
800 - handle_u_code(  
801 - s, offset, i, high_surrogate, high_offset, result);  
802 - i += 4;  
803 - break;  
804 - default:  
805 - throw std::logic_error("JSON parse: bad character after \\");  
806 - break;  
807 - } 756 +
  757 + if (lex_state == ls_u4) {
  758 + QTC::TC("libtests", "JSON parse bad hex after u");
  759 + throw std::runtime_error(
  760 + "JSON: offset " + std::to_string(offset - u_count - 1) +
  761 + ": \\u must be followed by four hex digits");
  762 + } else if (lex_state == ls_alpha) {
  763 + QTC::TC("libtests", "JSON parse keyword bad character");
  764 + throw std::runtime_error(
  765 + "JSON: offset " + std::to_string(offset) +
  766 + ": keyword: unexpected character " + std::string(p, 1));
  767 + } else if (lex_state == ls_string) {
  768 + QTC::TC("libtests", "JSON parse control char in string");
  769 + throw std::runtime_error(
  770 + "JSON: offset " + std::to_string(offset) +
  771 + ": control character in string (missing \"?)");
  772 + } else if (lex_state == ls_backslash) {
  773 + QTC::TC("libtests", "JSON parse backslash bad character");
  774 + throw std::runtime_error(
  775 + "JSON: offset " + std::to_string(offset) +
  776 + ": invalid character after backslash: " + std::string(p, 1));
  777 + }
  778 +
  779 + if (*p == '.') {
  780 + if (lex_state == ls_number || lex_state == ls_number_e ||
  781 + lex_state == ls_number_e_sign) {
  782 + QTC::TC("libtests", "JSON parse point after e");
  783 + throw std::runtime_error(
  784 + "JSON: offset " + std::to_string(offset) +
  785 + ": numeric literal: decimal point after e");
808 } else { 786 } else {
809 - result.append(1, s[i]); 787 + QTC::TC("libtests", "JSON parse duplicate point");
  788 + throw std::runtime_error(
  789 + "JSON: offset " + std::to_string(offset) +
  790 + ": numeric literal: decimal point already seen");
810 } 791 }
811 - }  
812 - if (high_offset) {  
813 - QTC::TC("libtests", "JSON 16 dangling high"); 792 + } else if (*p == 'e' || *p == 'E') {
  793 + QTC::TC("libtests", "JSON parse duplicate e");
  794 + throw std::runtime_error(
  795 + "JSON: offset " + std::to_string(offset) +
  796 + ": numeric literal: e already seen");
  797 + } else if ((*p == '+') || (*p == '-')) {
  798 + QTC::TC("libtests", "JSON parse unexpected sign");
  799 + throw std::runtime_error(
  800 + "JSON: offset " + std::to_string(offset) +
  801 + ": numeric literal: unexpected sign");
  802 + } else if (QUtil::is_space(*p) || strchr("{}[]:,", *p)) {
  803 + QTC::TC("libtests", "JSON parse incomplete number");
  804 + throw std::runtime_error(
  805 + "JSON: offset " + std::to_string(offset) +
  806 + ": numeric literal: incomplete number");
  807 +
  808 + } else {
  809 + QTC::TC("libtests", "JSON parse numeric bad character");
814 throw std::runtime_error( 810 throw std::runtime_error(
815 - "JSON: offset " + std::to_string(high_offset) +  
816 - ": UTF-16 high surrogate not followed by low surrogate"); 811 + "JSON: offset " + std::to_string(offset) +
  812 + ": numeric literal: unexpected character " + std::string(p, 1));
817 } 813 }
818 - return result; 814 + throw std::logic_error("JSON::tokenError : unhandled error");
  815 +}
  816 +
  817 +// Append current character to token and advance to next input character.
  818 +inline void
  819 +JSONParser::append()
  820 +{
  821 + token += *p;
  822 + ++p;
  823 + ++offset;
  824 +}
  825 +
  826 +// Append current character to token, advance to next input character and
  827 +// transition to 'next' lexer state.
  828 +inline void
  829 +JSONParser::append(lex_state_e next)
  830 +{
  831 + lex_state = next;
  832 + token += *p;
  833 + ++p;
  834 + ++offset;
  835 +}
  836 +
  837 +// Advance to next input character without appending the current character to
  838 +// token.
  839 +inline void
  840 +JSONParser::ignore()
  841 +{
  842 + ++p;
  843 + ++offset;
  844 +}
  845 +
  846 +// Advance to next input character without appending the current character to
  847 +// token and transition to 'next' lexer state.
  848 +inline void
  849 +JSONParser::ignore(lex_state_e next)
  850 +{
  851 + lex_state = next;
  852 + ++p;
  853 + ++offset;
819 } 854 }
820 855
821 void 856 void
822 JSONParser::getToken() 857 JSONParser::getToken()
823 { 858 {
824 - enum { append, ignore, reread } action = append;  
825 - bool ready = false;  
826 token.clear(); 859 token.clear();
827 - while (!done) { 860 +
  861 + // Keep track of UTF-16 surrogate pairs.
  862 + unsigned long high_surrogate = 0;
  863 + qpdf_offset_t high_offset = 0;
  864 +
  865 + while (true) {
828 if (p == (buf + bytes)) { 866 if (p == (buf + bytes)) {
829 p = buf; 867 p = buf;
830 bytes = is.read(buf, sizeof(buf)); 868 bytes = is.read(buf, sizeof(buf));
@@ -834,210 +872,316 @@ JSONParser::getToken() @@ -834,210 +872,316 @@ JSONParser::getToken()
834 } 872 }
835 } 873 }
836 874
837 - if (*p == 0) {  
838 - QTC::TC("libtests", "JSON parse null character");  
839 - throw std::runtime_error(  
840 - "JSON: null character at offset " + std::to_string(offset));  
841 - }  
842 - action = append;  
843 - switch (lex_state) {  
844 - case ls_top:  
845 - if (*p == '"') {  
846 - lex_state = ls_string;  
847 - } else if (QUtil::is_space(*p)) {  
848 - action = ignore;  
849 - } else if ((*p >= 'a') && (*p <= 'z')) {  
850 - lex_state = ls_alpha;  
851 - } else if (*p == '-') {  
852 - lex_state = ls_number;  
853 - number_before_point = 0;  
854 - number_after_point = 0;  
855 - number_after_e = 0;  
856 - number_saw_point = false;  
857 - number_saw_e = false;  
858 - } else if ((*p >= '0') && (*p <= '9')) {  
859 - lex_state = ls_number;  
860 - number_before_point = 1;  
861 - number_after_point = 0;  
862 - number_after_e = 0;  
863 - number_saw_point = false;  
864 - number_saw_e = false;  
865 - } else if (*p == '.') {  
866 - lex_state = ls_number;  
867 - number_before_point = 0;  
868 - number_after_point = 0;  
869 - number_after_e = 0;  
870 - number_saw_point = true;  
871 - number_saw_e = false;  
872 - } else if (strchr("{}[]:,", *p)) {  
873 - ready = true; 875 + if ((*p < 32 && *p >= 0)) {
  876 + if (*p == '\t' || *p == '\n' || *p == '\r') {
  877 + // Legal white space not permitted in strings. This will always
  878 + // end the current token (unless we are still before the start
  879 + // of the token).
  880 + if (lex_state == ls_top) {
  881 + ignore();
  882 + } else {
  883 + break;
  884 + }
  885 +
874 } else { 886 } else {
875 - QTC::TC("libtests", "JSON parse bad character"); 887 + QTC::TC("libtests", "JSON parse null character");
876 throw std::runtime_error( 888 throw std::runtime_error(
877 - "JSON: offset " + std::to_string(offset) +  
878 - ": unexpected character " + std::string(p, 1)); 889 + "JSON: control or null character at offset " +
  890 + std::to_string(offset));
879 } 891 }
880 - break;  
881 -  
882 - case ls_number:  
883 - if ((*p >= '0') && (*p <= '9')) {  
884 - if (number_saw_e) {  
885 - ++number_after_e;  
886 - } else if (number_saw_point) {  
887 - ++number_after_point; 892 + } else if (*p == ',') {
  893 + if (lex_state == ls_top) {
  894 + ignore(ls_comma);
  895 + return;
  896 + } else if (lex_state == ls_string) {
  897 + append();
  898 + } else {
  899 + break;
  900 + }
  901 + } else if (*p == ':') {
  902 + if (lex_state == ls_top) {
  903 + ignore(ls_colon);
  904 + return;
  905 + } else if (lex_state == ls_string) {
  906 + append();
  907 + } else {
  908 + break;
  909 + }
  910 + } else if (*p == ' ') {
  911 + if (lex_state == ls_top) {
  912 + ignore();
  913 + } else if (lex_state == ls_string) {
  914 + append();
  915 + } else {
  916 + break;
  917 + }
  918 + } else if (*p == '{') {
  919 + if (lex_state == ls_top) {
  920 + token_start = offset;
  921 + ignore(ls_begin_dict);
  922 + return;
  923 + } else if (lex_state == ls_string) {
  924 + append();
  925 + } else {
  926 + break;
  927 + }
  928 + } else if (*p == '}') {
  929 + if (lex_state == ls_top) {
  930 + ignore(ls_end_dict);
  931 + return;
  932 + } else if (lex_state == ls_string) {
  933 + append();
  934 + } else {
  935 + break;
  936 + }
  937 + } else if (*p == '[') {
  938 + if (lex_state == ls_top) {
  939 + token_start = offset;
  940 + ignore(ls_begin_array);
  941 + return;
  942 + } else if (lex_state == ls_string) {
  943 + append();
  944 + } else {
  945 + break;
  946 + }
  947 + } else if (*p == ']') {
  948 + if (lex_state == ls_top) {
  949 + ignore(ls_end_array);
  950 + return;
  951 + } else if (lex_state == ls_string) {
  952 + append();
  953 + } else {
  954 + break;
  955 + }
  956 + } else {
  957 + switch (lex_state) {
  958 + case ls_top:
  959 + token_start = offset;
  960 + if (*p == '"') {
  961 + ignore(ls_string);
  962 + } else if ((*p >= 'a') && (*p <= 'z')) {
  963 + append(ls_alpha);
  964 + } else if (*p == '-') {
  965 + append(ls_number_minus);
  966 + } else if ((*p >= '1') && (*p <= '9')) {
  967 + append(ls_number_before_point);
  968 + } else if (*p == '0') {
  969 + append(ls_number_leading_zero);
888 } else { 970 } else {
889 - ++number_before_point;  
890 - }  
891 - } else if (*p == '.') {  
892 - if (number_saw_e) {  
893 - QTC::TC("libtests", "JSON parse point after e");  
894 - throw std::runtime_error(  
895 - "JSON: offset " + std::to_string(offset) +  
896 - ": numeric literal: decimal point after e");  
897 - } else if (number_saw_point) {  
898 - QTC::TC("libtests", "JSON parse duplicate point"); 971 + QTC::TC("libtests", "JSON parse bad character");
899 throw std::runtime_error( 972 throw std::runtime_error(
900 "JSON: offset " + std::to_string(offset) + 973 "JSON: offset " + std::to_string(offset) +
901 - ": numeric literal: decimal point already seen");  
902 - } else {  
903 - number_saw_point = true; 974 + ": unexpected character " + std::string(p, 1));
904 } 975 }
905 - } else if (*p == 'e') {  
906 - if (number_saw_e) {  
907 - QTC::TC("libtests", "JSON parse duplicate e"); 976 + break;
  977 +
  978 + case ls_number_minus:
  979 + if ((*p >= '1') && (*p <= '9')) {
  980 + append(ls_number_before_point);
  981 + } else if (*p == '0') {
  982 + append(ls_number_leading_zero);
  983 + } else {
  984 + QTC::TC("libtests", "JSON parse number minus no digits");
908 throw std::runtime_error( 985 throw std::runtime_error(
909 "JSON: offset " + std::to_string(offset) + 986 "JSON: offset " + std::to_string(offset) +
910 - ": numeric literal: e already seen");  
911 - } else {  
912 - number_saw_e = true; 987 + ": numeric literal: no digit after minus sign");
913 } 988 }
914 - } else if ((*p == '+') || (*p == '-')) {  
915 - if (number_saw_e && (number_after_e == 0)) {  
916 - // okay 989 + break;
  990 +
  991 + case ls_number_leading_zero:
  992 + if (*p == '.') {
  993 + append(ls_number_point);
  994 + } else if (*p == 'e' || *p == 'E') {
  995 + append(ls_number_e);
917 } else { 996 } else {
918 - QTC::TC("libtests", "JSON parse unexpected sign"); 997 + QTC::TC("libtests", "JSON parse leading zero");
919 throw std::runtime_error( 998 throw std::runtime_error(
920 "JSON: offset " + std::to_string(offset) + 999 "JSON: offset " + std::to_string(offset) +
921 - ": numeric literal: unexpected sign"); 1000 + ": number with leading zero");
922 } 1001 }
923 - } else if (QUtil::is_space(*p)) {  
924 - action = ignore;  
925 - ready = true;  
926 - } else if (strchr("{}[]:,", *p)) {  
927 - action = reread;  
928 - ready = true;  
929 - } else {  
930 - QTC::TC("libtests", "JSON parse numeric bad character");  
931 - throw std::runtime_error(  
932 - "JSON: offset " + std::to_string(offset) +  
933 - ": numeric literal: unexpected character " +  
934 - std::string(p, 1));  
935 - }  
936 - break; 1002 + break;
937 1003
938 - case ls_alpha:  
939 - if ((*p >= 'a') && (*p <= 'z')) {  
940 - // okay  
941 - } else if (QUtil::is_space(*p)) {  
942 - action = ignore;  
943 - ready = true;  
944 - } else if (strchr("{}[]:,", *p)) {  
945 - action = reread;  
946 - ready = true;  
947 - } else {  
948 - QTC::TC("libtests", "JSON parse keyword bad character");  
949 - throw std::runtime_error(  
950 - "JSON: offset " + std::to_string(offset) +  
951 - ": keyword: unexpected character " + std::string(p, 1));  
952 - }  
953 - break; 1004 + case ls_number_before_point:
  1005 + if ((*p >= '0') && (*p <= '9')) {
  1006 + append();
  1007 + } else if (*p == '.') {
  1008 + append(ls_number_point);
  1009 + } else if (*p == 'e' || *p == 'E') {
  1010 + append(ls_number_e);
  1011 + } else {
  1012 + tokenError();
  1013 + }
  1014 + break;
954 1015
955 - case ls_string:  
956 - if (*p == '"') {  
957 - ready = true;  
958 - } else if (*p == '\\') {  
959 - lex_state = ls_backslash;  
960 - }  
961 - break; 1016 + case ls_number_point:
  1017 + if ((*p >= '0') && (*p <= '9')) {
  1018 + append(ls_number_after_point);
  1019 + } else {
  1020 + tokenError();
  1021 + }
  1022 + break;
962 1023
963 - case ls_backslash:  
964 - /* cSpell: ignore bfnrt */  
965 - if (strchr("\\\"/bfnrt", *p)) {  
966 - lex_state = ls_string;  
967 - } else if (*p == 'u') {  
968 - lex_state = ls_u4;  
969 - u_count = 0;  
970 - } else {  
971 - QTC::TC("libtests", "JSON parse backslash bad character");  
972 - throw std::runtime_error(  
973 - "JSON: offset " + std::to_string(offset) +  
974 - ": invalid character after backslash: " +  
975 - std::string(p, 1));  
976 - }  
977 - break; 1024 + case ls_number_after_point:
  1025 + if ((*p >= '0') && (*p <= '9')) {
  1026 + append();
  1027 + } else if (*p == 'e' || *p == 'E') {
  1028 + append(ls_number_e);
  1029 + } else {
  1030 + tokenError();
  1031 + }
  1032 + break;
978 1033
979 - case ls_u4:  
980 - if (!QUtil::is_hex_digit(*p)) {  
981 - QTC::TC("libtests", "JSON parse bad hex after u");  
982 - throw std::runtime_error(  
983 - "JSON: offset " + std::to_string(offset - u_count - 1) +  
984 - ": \\u must be followed by four hex digits");  
985 - }  
986 - if (++u_count == 4) {  
987 - lex_state = ls_string;  
988 - }  
989 - break;  
990 - }  
991 - switch (action) {  
992 - case reread:  
993 - break;  
994 - case append:  
995 - token.append(1, *p);  
996 - // fall through  
997 - case ignore:  
998 - ++p;  
999 - ++offset;  
1000 - break;  
1001 - }  
1002 - if (ready) {  
1003 - break;  
1004 - }  
1005 - }  
1006 - if (done) {  
1007 - if ((!token.empty()) && (!ready)) {  
1008 - switch (lex_state) {  
1009 - case ls_top:  
1010 - // Can't happen  
1011 - throw std::logic_error("tok_start set in ls_top while parsing"); 1034 + case ls_number_e:
  1035 + if ((*p >= '0') && (*p <= '9')) {
  1036 + append(ls_number);
  1037 + } else if ((*p == '+') || (*p == '-')) {
  1038 + append(ls_number_e_sign);
  1039 + } else {
  1040 + tokenError();
  1041 + }
  1042 + break;
  1043 +
  1044 + case ls_number_e_sign:
  1045 + if ((*p >= '0') && (*p <= '9')) {
  1046 + append(ls_number);
  1047 + } else {
  1048 + tokenError();
  1049 + }
1012 break; 1050 break;
1013 1051
1014 case ls_number: 1052 case ls_number:
1015 - case ls_alpha:  
1016 - // okay 1053 + // We only get here after we have seen an exponent.
  1054 + if ((*p >= '0') && (*p <= '9')) {
  1055 + append();
  1056 + } else {
  1057 + tokenError();
  1058 + }
1017 break; 1059 break;
1018 1060
1019 - case ls_u4:  
1020 - QTC::TC("libtests", "JSON parse premature end of u");  
1021 - throw std::runtime_error(  
1022 - "JSON: offset " + std::to_string(offset - u_count - 1) +  
1023 - ": \\u must be followed by four characters"); 1061 + case ls_alpha:
  1062 + if ((*p >= 'a') && (*p <= 'z')) {
  1063 + append();
  1064 + } else {
  1065 + tokenError();
  1066 + }
  1067 + break;
1024 1068
1025 case ls_string: 1069 case ls_string:
  1070 + if (*p == '"') {
  1071 + if (high_offset) {
  1072 + QTC::TC("libtests", "JSON 16 dangling high");
  1073 + throw std::runtime_error(
  1074 + "JSON: offset " + std::to_string(high_offset) +
  1075 + ": UTF-16 high surrogate not followed by low "
  1076 + "surrogate");
  1077 + }
  1078 + ignore();
  1079 + return;
  1080 + } else if (*p == '\\') {
  1081 + ignore(ls_backslash);
  1082 + } else {
  1083 + append();
  1084 + }
  1085 + break;
  1086 +
1026 case ls_backslash: 1087 case ls_backslash:
1027 - QTC::TC("libtests", "JSON parse unterminated string");  
1028 - throw std::runtime_error(  
1029 - "JSON: offset " + std::to_string(offset) +  
1030 - ": unterminated string"); 1088 + lex_state = ls_string;
  1089 + switch (*p) {
  1090 + case '\\':
  1091 + case '\"':
  1092 + case '/':
  1093 + // \/ is allowed in json input, but so is /, so we
  1094 + // don't map / to \/ in output.
  1095 + token += *p;
  1096 + break;
  1097 + case 'b':
  1098 + token += '\b';
  1099 + break;
  1100 + case 'f':
  1101 + token += '\f';
  1102 + break;
  1103 + case 'n':
  1104 + token += '\n';
  1105 + break;
  1106 + case 'r':
  1107 + token += '\r';
  1108 + break;
  1109 + case 't':
  1110 + token += '\t';
  1111 + break;
  1112 + case 'u':
  1113 + lex_state = ls_u4;
  1114 + u_count = 0;
  1115 + u_value = 0;
  1116 + break;
  1117 + default:
  1118 + lex_state = ls_backslash;
  1119 + tokenError();
  1120 + }
  1121 + ignore();
1031 break; 1122 break;
  1123 +
  1124 + case ls_u4:
  1125 + using ui = unsigned int;
  1126 + if ('0' <= *p && *p <= '9') {
  1127 + u_value = 16 * u_value + (ui(*p) - ui('0'));
  1128 + } else if ('a' <= *p && *p <= 'f') {
  1129 + u_value = 16 * u_value + (10 + ui(*p) - ui('a'));
  1130 + } else if ('A' <= *p && *p <= 'F') {
  1131 + u_value = 16 * u_value + (10 + ui(*p) - ui('A'));
  1132 + } else {
  1133 + tokenError();
  1134 + }
  1135 + if (++u_count == 4) {
  1136 + handle_u_code(
  1137 + u_value,
  1138 + offset - 5,
  1139 + high_surrogate,
  1140 + high_offset,
  1141 + token);
  1142 + lex_state = ls_string;
  1143 + }
  1144 + ignore();
  1145 + break;
  1146 +
  1147 + default:
  1148 + throw std::logic_error(
  1149 + "JSONParser::getToken : trying to handle delimiter state");
1032 } 1150 }
1033 } 1151 }
1034 } 1152 }
  1153 +
  1154 + // We only get here if on end of input or if the last character was a
  1155 + // control character or other delimiter.
  1156 +
  1157 + if (!token.empty()) {
  1158 + switch (lex_state) {
  1159 + case ls_top:
  1160 + // Can't happen
  1161 + throw std::logic_error("tok_start set in ls_top while parsing");
  1162 + break;
  1163 +
  1164 + case ls_number_leading_zero:
  1165 + case ls_number_before_point:
  1166 + case ls_number_after_point:
  1167 + lex_state = ls_number;
  1168 + break;
  1169 +
  1170 + case ls_number:
  1171 + case ls_alpha:
  1172 + // terminal state
  1173 + break;
  1174 +
  1175 + default:
  1176 + tokenError();
  1177 + }
  1178 + }
1035 } 1179 }
1036 1180
1037 void 1181 void
1038 JSONParser::handleToken() 1182 JSONParser::handleToken()
1039 { 1183 {
1040 - if (token.empty()) { 1184 + if (lex_state == ls_top) {
1041 return; 1185 return;
1042 } 1186 }
1043 1187
@@ -1048,73 +1192,96 @@ JSONParser::handleToken() @@ -1048,73 +1192,96 @@ JSONParser::handleToken()
1048 ": material follows end of object: " + token); 1192 ": material follows end of object: " + token);
1049 } 1193 }
1050 1194
1051 - // Git string value  
1052 - std::string s_value;  
1053 - if (lex_state == ls_string) {  
1054 - // Token includes the quotation marks  
1055 - if (token.length() < 2) {  
1056 - throw std::logic_error("JSON string length < 2");  
1057 - }  
1058 - s_value = decode_string(token, offset - toO(token.length()));  
1059 - }  
1060 - // Based on the lexical state and value, figure out whether we are  
1061 - // looking at an item or a delimiter. It will always be exactly  
1062 - // one of those two or an error condition.  
1063 -  
1064 - std::shared_ptr<JSON> item;  
1065 - char delimiter = '\0';  
1066 - // Already verified that token is not empty  
1067 - char first_char = token.at(0);  
1068 - switch (lex_state) {  
1069 - case ls_top:  
1070 - switch (first_char) {  
1071 - case '{':  
1072 - item = std::make_shared<JSON>(JSON::makeDictionary());  
1073 - item->setStart(offset - toO(token.length()));  
1074 - break; 1195 + const static JSON null_item = JSON::makeNull();
  1196 + JSON item;
  1197 + auto tos = stack.empty() ? null_item : stack.back().item;
  1198 + auto ls = lex_state;
  1199 + lex_state = ls_top;
1075 1200
1076 - case '[':  
1077 - item = std::make_shared<JSON>(JSON::makeArray());  
1078 - item->setStart(offset - toO(token.length()));  
1079 - break; 1201 + switch (ls) {
  1202 + case ls_begin_dict:
  1203 + item = JSON::makeDictionary();
  1204 + break;
1080 1205
1081 - default:  
1082 - delimiter = first_char;  
1083 - break;  
1084 - } 1206 + case ls_begin_array:
  1207 + item = JSON::makeArray();
1085 break; 1208 break;
1086 1209
1087 - case ls_number:  
1088 - if (number_saw_point && (number_after_point == 0)) {  
1089 - QTC::TC("libtests", "JSON parse decimal with no digits"); 1210 + case ls_colon:
  1211 + if (parser_state != ps_dict_after_key) {
  1212 + QTC::TC("libtests", "JSON parse unexpected :");
1090 throw std::runtime_error( 1213 throw std::runtime_error(
1091 "JSON: offset " + std::to_string(offset) + 1214 "JSON: offset " + std::to_string(offset) +
1092 - ": decimal point with no digits"); 1215 + ": unexpected colon");
1093 } 1216 }
1094 - if ((number_before_point > 1) &&  
1095 - ((first_char == '0') ||  
1096 - ((first_char == '-') && (token.at(1) == '0')))) {  
1097 - QTC::TC("libtests", "JSON parse leading zero"); 1217 + parser_state = ps_dict_after_colon;
  1218 + return;
  1219 +
  1220 + case ls_comma:
  1221 + if (!((parser_state == ps_dict_after_item) ||
  1222 + (parser_state == ps_array_after_item))) {
  1223 + QTC::TC("libtests", "JSON parse unexpected ,");
1098 throw std::runtime_error( 1224 throw std::runtime_error(
1099 "JSON: offset " + std::to_string(offset) + 1225 "JSON: offset " + std::to_string(offset) +
1100 - ": number with leading zero"); 1226 + ": unexpected comma");
1101 } 1227 }
1102 - if ((number_before_point == 0) && (number_after_point == 0)) {  
1103 - QTC::TC("libtests", "JSON parse number no digits"); 1228 + if (parser_state == ps_dict_after_item) {
  1229 + parser_state = ps_dict_after_comma;
  1230 + } else if (parser_state == ps_array_after_item) {
  1231 + parser_state = ps_array_after_comma;
  1232 + } else {
  1233 + throw std::logic_error("JSONParser::handleToken: unexpected parser"
  1234 + " state for comma");
  1235 + }
  1236 + return;
  1237 +
  1238 + case ls_end_array:
  1239 + if (!(parser_state == ps_array_begin ||
  1240 + parser_state == ps_array_after_item)) {
  1241 + QTC::TC("libtests", "JSON parse unexpected ]");
1104 throw std::runtime_error( 1242 throw std::runtime_error(
1105 "JSON: offset " + std::to_string(offset) + 1243 "JSON: offset " + std::to_string(offset) +
1106 - ": number with no digits"); 1244 + ": unexpected array end delimiter");
1107 } 1245 }
1108 - item = std::make_shared<JSON>(JSON::makeNumber(token)); 1246 + parser_state = stack.back().state;
  1247 + tos.setEnd(offset);
  1248 + if (reactor) {
  1249 + reactor->containerEnd(tos);
  1250 + }
  1251 + if (parser_state != ps_done) {
  1252 + stack.pop_back();
  1253 + }
  1254 + return;
  1255 +
  1256 + case ls_end_dict:
  1257 + if (!((parser_state == ps_dict_begin) ||
  1258 + (parser_state == ps_dict_after_item))) {
  1259 + QTC::TC("libtests", "JSON parse unexpected }");
  1260 + throw std::runtime_error(
  1261 + "JSON: offset " + std::to_string(offset) +
  1262 + ": unexpected dictionary end delimiter");
  1263 + }
  1264 + parser_state = stack.back().state;
  1265 + tos.setEnd(offset);
  1266 + if (reactor) {
  1267 + reactor->containerEnd(tos);
  1268 + }
  1269 + if (parser_state != ps_done) {
  1270 + stack.pop_back();
  1271 + }
  1272 + return;
  1273 +
  1274 + case ls_number:
  1275 + item = JSON::makeNumber(token);
1109 break; 1276 break;
1110 1277
1111 case ls_alpha: 1278 case ls_alpha:
1112 if (token == "true") { 1279 if (token == "true") {
1113 - item = std::make_shared<JSON>(JSON::makeBool(true)); 1280 + item = JSON::makeBool(true);
1114 } else if (token == "false") { 1281 } else if (token == "false") {
1115 - item = std::make_shared<JSON>(JSON::makeBool(false)); 1282 + item = JSON::makeBool(false);
1116 } else if (token == "null") { 1283 } else if (token == "null") {
1117 - item = std::make_shared<JSON>(JSON::makeNull()); 1284 + item = JSON::makeNull();
1118 } else { 1285 } else {
1119 QTC::TC("libtests", "JSON parse invalid keyword"); 1286 QTC::TC("libtests", "JSON parse invalid keyword");
1120 throw std::runtime_error( 1287 throw std::runtime_error(
@@ -1124,227 +1291,115 @@ JSONParser::handleToken() @@ -1124,227 +1291,115 @@ JSONParser::handleToken()
1124 break; 1291 break;
1125 1292
1126 case ls_string: 1293 case ls_string:
1127 - item = std::make_shared<JSON>(JSON::makeString(s_value)); 1294 + if (parser_state == ps_dict_begin ||
  1295 + parser_state == ps_dict_after_comma) {
  1296 + dict_key = token;
  1297 + dict_key_offset = token_start;
  1298 + parser_state = ps_dict_after_key;
  1299 + return;
  1300 + } else {
  1301 + item = JSON::makeString(token);
  1302 + }
1128 break; 1303 break;
1129 1304
1130 - case ls_backslash:  
1131 - case ls_u4: 1305 + default:
1132 throw std::logic_error( 1306 throw std::logic_error(
1133 - "tok_end is set while state = ls_backslash or ls_u4"); 1307 + "JSONParser::handleToken : non-terminal lexer state encountered");
1134 break; 1308 break;
1135 } 1309 }
1136 1310
1137 - if ((item == nullptr) == (delimiter == '\0')) {  
1138 - throw std::logic_error(  
1139 - "JSONParser::handleToken: logic error: exactly one of item"  
1140 - " or delimiter must be set");  
1141 - }  
1142 -  
1143 - // See whether what we have is allowed at this point.  
1144 -  
1145 - if (item.get()) {  
1146 - switch (parser_state) {  
1147 - case ps_done:  
1148 - throw std::logic_error("can't happen; ps_done already handled");  
1149 - break;  
1150 -  
1151 - case ps_dict_after_key:  
1152 - QTC::TC("libtests", "JSON parse expected colon");  
1153 - throw std::runtime_error(  
1154 - "JSON: offset " + std::to_string(offset) + ": expected ':'");  
1155 - break;  
1156 -  
1157 - case ps_dict_after_item:  
1158 - QTC::TC("libtests", "JSON parse expected , or }");  
1159 - throw std::runtime_error(  
1160 - "JSON: offset " + std::to_string(offset) +  
1161 - ": expected ',' or '}'");  
1162 - break;  
1163 -  
1164 - case ps_array_after_item:  
1165 - QTC::TC("libtests", "JSON parse expected, or ]");  
1166 - throw std::runtime_error(  
1167 - "JSON: offset " + std::to_string(offset) +  
1168 - ": expected ',' or ']'");  
1169 - break;  
1170 -  
1171 - case ps_dict_begin:  
1172 - case ps_dict_after_comma:  
1173 - if (lex_state != ls_string) {  
1174 - QTC::TC("libtests", "JSON parse string as dict key");  
1175 - throw std::runtime_error(  
1176 - "JSON: offset " + std::to_string(offset) +  
1177 - ": expect string as dictionary key");  
1178 - }  
1179 - break;  
1180 -  
1181 - case ps_top:  
1182 - case ps_dict_after_colon:  
1183 - case ps_array_begin:  
1184 - case ps_array_after_comma:  
1185 - break;  
1186 - // okay  
1187 - }  
1188 - } else if (delimiter == '}') {  
1189 - if (!((parser_state == ps_dict_begin) ||  
1190 - (parser_state == ps_dict_after_item))) 1311 + item.setStart(token_start);
  1312 + item.setEnd(offset);
1191 1313
1192 - {  
1193 - QTC::TC("libtests", "JSON parse unexpected }");  
1194 - throw std::runtime_error(  
1195 - "JSON: offset " + std::to_string(offset) +  
1196 - ": unexpected dictionary end delimiter");  
1197 - }  
1198 - } else if (delimiter == ']') {  
1199 - if (!((parser_state == ps_array_begin) ||  
1200 - (parser_state == ps_array_after_item))) 1314 + switch (parser_state) {
  1315 + case ps_dict_begin:
  1316 + case ps_dict_after_comma:
  1317 + QTC::TC("libtests", "JSON parse string as dict key");
  1318 + throw std::runtime_error(
  1319 + "JSON: offset " + std::to_string(offset) +
  1320 + ": expect string as dictionary key");
  1321 + break;
1201 1322
1202 - {  
1203 - QTC::TC("libtests", "JSON parse unexpected ]"); 1323 + case ps_dict_after_colon:
  1324 + if (tos.checkDictionaryKeySeen(dict_key)) {
  1325 + QTC::TC("libtests", "JSON parse duplicate key");
1204 throw std::runtime_error( 1326 throw std::runtime_error(
1205 - "JSON: offset " + std::to_string(offset) +  
1206 - ": unexpected array end delimiter"); 1327 + "JSON: offset " + std::to_string(dict_key_offset) +
  1328 + ": duplicated dictionary key");
1207 } 1329 }
1208 - } else if (delimiter == ':') {  
1209 - if (parser_state != ps_dict_after_key) {  
1210 - QTC::TC("libtests", "JSON parse unexpected :");  
1211 - throw std::runtime_error(  
1212 - "JSON: offset " + std::to_string(offset) +  
1213 - ": unexpected colon");  
1214 - }  
1215 - } else if (delimiter == ',') {  
1216 - if (!((parser_state == ps_dict_after_item) ||  
1217 - (parser_state == ps_array_after_item))) {  
1218 - QTC::TC("libtests", "JSON parse unexpected ,");  
1219 - throw std::runtime_error(  
1220 - "JSON: offset " + std::to_string(offset) +  
1221 - ": unexpected comma"); 1330 + if (!reactor || !reactor->dictionaryItem(dict_key, item)) {
  1331 + tos.addDictionaryMember(dict_key, item);
1222 } 1332 }
1223 - } else if (delimiter != '\0') {  
1224 - throw std::logic_error("JSONParser::handleToken: bad delimiter");  
1225 - }  
1226 -  
1227 - // Now we know we have a delimiter or item that is allowed. Do  
1228 - // whatever we need to do with it. 1333 + parser_state = ps_dict_after_item;
  1334 + break;
1229 1335
1230 - parser_state_e next_state = ps_top;  
1231 - if (delimiter == ':') {  
1232 - next_state = ps_dict_after_colon;  
1233 - } else if (delimiter == ',') {  
1234 - if (parser_state == ps_dict_after_item) {  
1235 - next_state = ps_dict_after_comma;  
1236 - } else if (parser_state == ps_array_after_item) {  
1237 - next_state = ps_array_after_comma;  
1238 - } else {  
1239 - throw std::logic_error("JSONParser::handleToken: unexpected parser"  
1240 - " state for comma");  
1241 - }  
1242 - } else if ((delimiter == '}') || (delimiter == ']')) {  
1243 - next_state = ps_stack.back();  
1244 - ps_stack.pop_back();  
1245 - auto tos = stack.back();  
1246 - tos->setEnd(offset);  
1247 - if (reactor) {  
1248 - reactor->containerEnd(*tos);  
1249 - }  
1250 - if (next_state != ps_done) {  
1251 - stack.pop_back();  
1252 - }  
1253 - } else if (delimiter != '\0') {  
1254 - throw std::logic_error(  
1255 - "JSONParser::handleToken: unexpected delimiter in transition");  
1256 - } else if (item.get()) {  
1257 - if (!(item->isArray() || item->isDictionary())) {  
1258 - item->setStart(offset - toO(token.length()));  
1259 - item->setEnd(offset); 1336 + case ps_array_begin:
  1337 + case ps_array_after_comma:
  1338 + if (!reactor || !reactor->arrayItem(item)) {
  1339 + tos.addArrayElement(item);
1260 } 1340 }
  1341 + parser_state = ps_array_after_item;
  1342 + break;
1261 1343
1262 - std::shared_ptr<JSON> tos;  
1263 - if (!stack.empty()) {  
1264 - tos = stack.back(); 1344 + case ps_top:
  1345 + if (!(item.isDictionary() || item.isArray())) {
  1346 + stack.push_back({ps_done, item});
  1347 + parser_state = ps_done;
  1348 + return;
1265 } 1349 }
1266 - switch (parser_state) {  
1267 - case ps_dict_begin:  
1268 - case ps_dict_after_comma:  
1269 - this->dict_key = s_value;  
1270 - this->dict_key_offset = item->getStart();  
1271 - item = nullptr;  
1272 - next_state = ps_dict_after_key;  
1273 - break; 1350 + parser_state = ps_done;
  1351 + break;
1274 1352
1275 - case ps_dict_after_colon:  
1276 - if (tos->checkDictionaryKeySeen(dict_key)) {  
1277 - QTC::TC("libtests", "JSON parse duplicate key");  
1278 - throw std::runtime_error(  
1279 - "JSON: offset " + std::to_string(dict_key_offset) +  
1280 - ": duplicated dictionary key");  
1281 - }  
1282 - if (!reactor || !reactor->dictionaryItem(dict_key, *item)) {  
1283 - tos->addDictionaryMember(dict_key, *item);  
1284 - }  
1285 - next_state = ps_dict_after_item;  
1286 - break; 1353 + case ps_dict_after_key:
  1354 + QTC::TC("libtests", "JSON parse expected colon");
  1355 + throw std::runtime_error(
  1356 + "JSON: offset " + std::to_string(offset) + ": expected ':'");
  1357 + break;
1287 1358
1288 - case ps_array_begin:  
1289 - case ps_array_after_comma:  
1290 - if (!reactor || !reactor->arrayItem(*item)) {  
1291 - tos->addArrayElement(*item);  
1292 - }  
1293 - next_state = ps_array_after_item;  
1294 - break; 1359 + case ps_dict_after_item:
  1360 + QTC::TC("libtests", "JSON parse expected , or }");
  1361 + throw std::runtime_error(
  1362 + "JSON: offset " + std::to_string(offset) + ": expected ',' or '}'");
  1363 + break;
1295 1364
1296 - case ps_top:  
1297 - next_state = ps_done;  
1298 - break; 1365 + case ps_array_after_item:
  1366 + QTC::TC("libtests", "JSON parse expected, or ]");
  1367 + throw std::runtime_error(
  1368 + "JSON: offset " + std::to_string(offset) + ": expected ',' or ']'");
  1369 + break;
1299 1370
1300 - case ps_dict_after_key:  
1301 - case ps_dict_after_item:  
1302 - case ps_array_after_item:  
1303 - case ps_done:  
1304 - throw std::logic_error(  
1305 - "JSONParser::handleToken: unexpected parser state");  
1306 - }  
1307 - } else { 1371 + case ps_done:
1308 throw std::logic_error( 1372 throw std::logic_error(
1309 - "JSONParser::handleToken: unexpected null item in transition"); 1373 + "JSONParser::handleToken: unexpected parser state");
1310 } 1374 }
1311 1375
1312 - if (reactor && item.get()) { 1376 + if (item.isDictionary() || item.isArray()) {
  1377 + stack.push_back({parser_state, item});
1313 // Calling container start method is postponed until after 1378 // Calling container start method is postponed until after
1314 // adding the containers to their parent containers, if any. 1379 // adding the containers to their parent containers, if any.
1315 // This makes it much easier to keep track of the current 1380 // This makes it much easier to keep track of the current
1316 // nesting level. 1381 // nesting level.
1317 - if (item->isDictionary()) {  
1318 - reactor->dictionaryStart();  
1319 - } else if (item->isArray()) {  
1320 - reactor->arrayStart(); 1382 + if (item.isDictionary()) {
  1383 + if (reactor) {
  1384 + reactor->dictionaryStart();
  1385 + }
  1386 + parser_state = ps_dict_begin;
  1387 + } else if (item.isArray()) {
  1388 + if (reactor) {
  1389 + reactor->arrayStart();
  1390 + }
  1391 + parser_state = ps_array_begin;
1321 } 1392 }
1322 - }  
1323 1393
1324 - // Prepare for next token  
1325 - if (item.get()) {  
1326 - if (item->isDictionary()) {  
1327 - stack.push_back(item);  
1328 - ps_stack.push_back(next_state);  
1329 - next_state = ps_dict_begin;  
1330 - } else if (item->isArray()) {  
1331 - stack.push_back(item);  
1332 - ps_stack.push_back(next_state);  
1333 - next_state = ps_array_begin;  
1334 - } else if (parser_state == ps_top) {  
1335 - stack.push_back(item); 1394 + if (stack.size() > 500) {
  1395 + throw std::runtime_error(
  1396 + "JSON: offset " + std::to_string(offset) +
  1397 + ": maximum object depth exceeded");
1336 } 1398 }
1337 } 1399 }
1338 - if (ps_stack.size() > 500) {  
1339 - throw std::runtime_error(  
1340 - "JSON: offset " + std::to_string(offset) +  
1341 - ": maximum object depth exceeded");  
1342 - }  
1343 - parser_state = next_state;  
1344 - lex_state = ls_top;  
1345 } 1400 }
1346 1401
1347 -std::shared_ptr<JSON> 1402 +JSON
1348 JSONParser::parse() 1403 JSONParser::parse()
1349 { 1404 {
1350 while (!done) { 1405 while (!done) {
@@ -1355,8 +1410,8 @@ JSONParser::parse() @@ -1355,8 +1410,8 @@ JSONParser::parse()
1355 QTC::TC("libtests", "JSON parse premature EOF"); 1410 QTC::TC("libtests", "JSON parse premature EOF");
1356 throw std::runtime_error("JSON: premature end of input"); 1411 throw std::runtime_error("JSON: premature end of input");
1357 } 1412 }
1358 - auto const& tos = stack.back();  
1359 - if (reactor && tos.get() && !(tos->isArray() || tos->isDictionary())) { 1413 + auto const& tos = stack.back().item;
  1414 + if (reactor && !(tos.isArray() || tos.isDictionary())) {
1360 reactor->topLevelScalar(); 1415 reactor->topLevelScalar();
1361 } 1416 }
1362 return tos; 1417 return tos;
@@ -1366,7 +1421,7 @@ JSON @@ -1366,7 +1421,7 @@ JSON
1366 JSON::parse(InputSource& is, Reactor* reactor) 1421 JSON::parse(InputSource& is, Reactor* reactor)
1367 { 1422 {
1368 JSONParser jp(is, reactor); 1423 JSONParser jp(is, reactor);
1369 - return *jp.parse(); 1424 + return jp.parse();
1370 } 1425 }
1371 1426
1372 JSON 1427 JSON
@@ -1374,7 +1429,7 @@ JSON::parse(std::string const&amp; s) @@ -1374,7 +1429,7 @@ JSON::parse(std::string const&amp; s)
1374 { 1429 {
1375 BufferInputSource bis("json input", s); 1430 BufferInputSource bis("json input", s);
1376 JSONParser jp(bis, nullptr); 1431 JSONParser jp(bis, nullptr);
1377 - return *jp.parse(); 1432 + return jp.parse();
1378 } 1433 }
1379 1434
1380 void 1435 void
libtests/libtests.testcov
@@ -58,7 +58,6 @@ QPDFArgParser bad option for help 0 @@ -58,7 +58,6 @@ QPDFArgParser bad option for help 0
58 QPDFArgParser bad topic for help 0 58 QPDFArgParser bad topic for help 0
59 QPDFArgParser invalid choice handler to unknown 0 59 QPDFArgParser invalid choice handler to unknown 0
60 JSON parse junk after object 0 60 JSON parse junk after object 0
61 -JSON parse decimal with no digits 0  
62 JSON parse invalid keyword 0 61 JSON parse invalid keyword 0
63 JSON parse expected colon 0 62 JSON parse expected colon 0
64 JSON parse expected , or } 0 63 JSON parse expected , or } 0
@@ -76,12 +75,13 @@ JSON parse duplicate point 0 @@ -76,12 +75,13 @@ JSON parse duplicate point 0
76 JSON parse duplicate e 0 75 JSON parse duplicate e 0
77 JSON parse unexpected sign 0 76 JSON parse unexpected sign 0
78 JSON parse numeric bad character 0 77 JSON parse numeric bad character 0
  78 +JSON parse number minus no digits 0
  79 +JSON parse incomplete number 0
79 JSON parse keyword bad character 0 80 JSON parse keyword bad character 0
80 JSON parse backslash bad character 0 81 JSON parse backslash bad character 0
81 -JSON parse unterminated string 0 82 +JSON parse control char in string 0
82 JSON parse leading zero 0 83 JSON parse leading zero 0
83 -JSON parse number no digits 0  
84 -JSON parse premature end of u 0 84 +JSON parse ls premature end of input 0
85 JSON parse bad hex after u 0 85 JSON parse bad hex after u 0
86 JSONHandler unhandled value 0 86 JSONHandler unhandled value 0
87 JSONHandler unexpected key 0 87 JSONHandler unexpected key 0
libtests/qtest/json_parse.test
@@ -121,6 +121,14 @@ my @bad = ( @@ -121,6 +121,14 @@ my @bad = (
121 "high high surrogate", # 38 121 "high high surrogate", # 38
122 "dangling high surrogate", # 39 122 "dangling high surrogate", # 39
123 "duplicate dictionary key", # 40 123 "duplicate dictionary key", # 40
  124 + "decimal point after minus",# 41
  125 + "e after minus", # 42
  126 + "missing digit after e", # 43
  127 + "missing digit after e+/-", # 44
  128 + "tab char in string", # 45
  129 + "cr char in string", # 46
  130 + "lf char in string", # 47
  131 + "bs char in string", # 48
124 ); 132 );
125 133
126 my $i = 0; 134 my $i = 0;
libtests/qtest/json_parse/bad-01.out
1 -exception: bad-01.json: JSON: offset 9: material follows end of object: junk 1 +exception: bad-01.json: JSON: offset 8: material follows end of object: junk
libtests/qtest/json_parse/bad-02.out
1 -exception: bad-02.json: JSON: offset 11: material follows end of object: junk 1 +exception: bad-02.json: JSON: offset 10: material follows end of object: junk
libtests/qtest/json_parse/bad-03.out
1 -exception: bad-03.json: JSON: offset 16: material follows end of object: junk 1 +exception: bad-03.json: JSON: offset 15: material follows end of object: junk
libtests/qtest/json_parse/bad-04.out
1 -exception: bad-04.json: JSON: offset 5: decimal point with no digits 1 +exception: bad-04.json: JSON: offset 4: unexpected character .
libtests/qtest/json_parse/bad-09.out
1 -exception: bad-09.json: JSON: offset 3: expect string as dictionary key 1 +exception: bad-09.json: JSON: offset 2: expect string as dictionary key
libtests/qtest/json_parse/bad-18.out
1 -exception: bad-18.json: JSON: null character at offset 5 1 +exception: bad-18.json: JSON: control or null character at offset 5
libtests/qtest/json_parse/bad-27.out
1 -exception: bad-27.json: JSON: offset 6: unterminated string 1 +exception: bad-27.json: JSON: offset 5: control character in string (missing "?)
libtests/qtest/json_parse/bad-28.out
1 -exception: bad-28.json: JSON: offset 16: unterminated string 1 +exception: bad-28.json: JSON: premature end of input
libtests/qtest/json_parse/bad-30.out
1 -exception: bad-30.json: JSON: offset 5: decimal point with no digits 1 +exception: bad-30.json: JSON: offset 4: numeric literal: incomplete number
libtests/qtest/json_parse/bad-31.json
1 -- 1 +-
libtests/qtest/json_parse/bad-31.out
1 -exception: bad-31.json: JSON: offset 2: number with no digits 1 +exception: bad-31.json: JSON: offset 1: numeric literal: incomplete number
libtests/qtest/json_parse/bad-32.out
1 -exception: bad-32.json: JSON: offset 5: number with leading zero 1 +exception: bad-32.json: JSON: offset 1: number with leading zero
libtests/qtest/json_parse/bad-33.out
1 -exception: bad-33.json: JSON: offset 6: number with leading zero 1 +exception: bad-33.json: JSON: offset 2: number with leading zero
libtests/qtest/json_parse/bad-34.out
1 -exception: bad-34.json: JSON: offset 3: \u must be followed by four characters 1 +exception: bad-34.json: JSON: premature end of input
libtests/qtest/json_parse/bad-41.json 0 → 100644
  1 +-.123
  2 +
libtests/qtest/json_parse/bad-41.out 0 → 100644
  1 +exception: bad-41.json: JSON: offset 1: numeric literal: no digit after minus sign
libtests/qtest/json_parse/bad-42.json 0 → 100644
  1 +-e123
libtests/qtest/json_parse/bad-42.out 0 → 100644
  1 +exception: bad-42.json: JSON: offset 1: numeric literal: no digit after minus sign
libtests/qtest/json_parse/bad-43.json 0 → 100644
  1 +123e
libtests/qtest/json_parse/bad-43.out 0 → 100644
  1 +exception: bad-43.json: JSON: offset 4: numeric literal: incomplete number
libtests/qtest/json_parse/bad-44.json 0 → 100644
  1 +123e+
libtests/qtest/json_parse/bad-44.out 0 → 100644
  1 +exception: bad-44.json: JSON: offset 5: numeric literal: incomplete number
libtests/qtest/json_parse/bad-45.json 0 → 100644
  1 +"Tab in str ing"
libtests/qtest/json_parse/bad-45.out 0 → 100644
  1 +exception: bad-45.json: JSON: offset 11: control character in string (missing "?)
libtests/qtest/json_parse/bad-46.json 0 → 100644
  1 +"cr in str ing"
libtests/qtest/json_parse/bad-46.out 0 → 100644
  1 +exception: bad-46.json: JSON: offset 10: control character in string (missing "?)
libtests/qtest/json_parse/bad-47.json 0 → 100644
  1 +"lf in str
  2 +ing"
libtests/qtest/json_parse/bad-47.out 0 → 100644
  1 +exception: bad-47.json: JSON: offset 10: control character in string (missing "?)
libtests/qtest/json_parse/bad-48.json 0 → 100644
  1 +"bs in string"
0 \ No newline at end of file 2 \ No newline at end of file
libtests/qtest/json_parse/bad-48.out 0 → 100644
  1 +exception: bad-48.json: JSON: control or null character at offset 10
libtests/qtest/json_parse/good-01-react.out
1 dictionary start 1 dictionary start
2 dictionary item: a -> [6, 11): "bcd" 2 dictionary item: a -> [6, 11): "bcd"
3 -dictionary item: e -> [18, 0): [] 3 +dictionary item: e -> [18, 19): []
4 array start 4 array start
5 array item: [19, 20): 1 5 array item: [19, 20): 1
6 array item: [41, 42): 2 6 array item: [41, 42): 2
7 array item: [44, 45): 3 7 array item: [44, 45): 3
8 array item: [46, 47): 4 8 array item: [46, 47): 4
9 array item: [48, 54): "five" 9 array item: [48, 54): "five"
10 -array item: [56, 0): {} 10 +array item: [56, 57): {}
11 dictionary start 11 dictionary start
12 dictionary item: six -> [64, 65): 7 12 dictionary item: six -> [64, 65): 7
13 dictionary item: 8 -> [72, 73): 9 13 dictionary item: 8 -> [72, 73): 9
libtests/qtest/json_parse/good-04-react.out
1 array start 1 array start
2 -array item: [1, 0): [] 2 +array item: [1, 2): []
3 array start 3 array start
4 -array item: [2, 0): [] 4 +array item: [2, 3): []
5 array start 5 array start
6 -array item: [3, 0): {} 6 +array item: [3, 4): {}
7 dictionary start 7 dictionary start
8 container end: [3, 5): {} 8 container end: [3, 5): {}
9 container end: [2, 6): [] 9 container end: [2, 6): []
10 -array item: [8, 0): {} 10 +array item: [8, 9): {}
11 dictionary start 11 dictionary start
12 -dictionary item: -> [13, 0): {} 12 +dictionary item: -> [13, 14): {}
13 dictionary start 13 dictionary start
14 container end: [13, 15): {} 14 container end: [13, 15): {}
15 container end: [8, 16): {} 15 container end: [8, 16): {}
libtests/qtest/json_parse/good-10-react.out
1 dictionary start 1 dictionary start
2 -dictionary item: a -> [9, 0): [] 2 +dictionary item: a -> [9, 10): []
3 array start 3 array start
4 array item: [10, 11): 1 4 array item: [10, 11): 1
5 array item: [13, 14): 2 5 array item: [13, 14): 2
6 -array item: [16, 0): {} 6 +array item: [16, 17): {}
7 dictionary start 7 dictionary start
8 dictionary item: x -> [22, 25): "y" 8 dictionary item: x -> [22, 25): "y"
9 container end: [16, 26): {} 9 container end: [16, 26): {}
10 array item: [28, 29): 3 10 array item: [28, 29): 3
11 -array item: [31, 0): {} 11 +array item: [31, 32): {}
12 dictionary start 12 dictionary start
13 dictionary item: keep -> [40, 61): "not in final output" 13 dictionary item: keep -> [40, 61): "not in final output"
14 container end: [31, 62): { 14 container end: [31, 62): {
15 "keep": "not in final output" 15 "keep": "not in final output"
16 } 16 }
17 container end: [9, 63): [] 17 container end: [9, 63): []
18 -dictionary item: keep -> [75, 0): [] 18 +dictionary item: keep -> [75, 76): []
19 array start 19 array start
20 array item: [76, 77): 1 20 array item: [76, 77): 1
21 array item: [79, 83): null 21 array item: [79, 83): null
@@ -23,7 +23,7 @@ array item: [85, 86): 2 @@ -23,7 +23,7 @@ array item: [85, 86): 2
23 array item: [88, 93): false 23 array item: [88, 93): false
24 array item: [95, 101): "keep" 24 array item: [95, 101): "keep"
25 array item: [103, 104): 3 25 array item: [103, 104): 3
26 -array item: [106, 0): [] 26 +array item: [106, 107): []
27 array start 27 array start
28 array item: [107, 113): "this" 28 array item: [107, 113): "this"
29 array item: [115, 121): "keep" 29 array item: [115, 121): "keep"
libtests/qtest/json_parse/good-11-react.out
1 array start 1 array start
2 -array item: [4, 0): [] 2 +array item: [4, 5): []
3 array start 3 array start
4 array item: [5, 11): "u:π" 4 array item: [5, 11): "u:π"
5 array item: [13, 23): "u:π" 5 array item: [13, 23): "u:π"
6 array item: [25, 39): "b:EFBBBFCF80" 6 array item: [25, 39): "b:EFBBBFCF80"
7 array item: [41, 53): "b:feff03c0" 7 array item: [41, 53): "b:feff03c0"
8 container end: [4, 54): [] 8 container end: [4, 54): []
9 -array item: [58, 0): [] 9 +array item: [58, 59): []
10 array start 10 array start
11 array item: [59, 67): "u:🥔" 11 array item: [59, 67): "u:🥔"
12 array item: [69, 85): "u:🥔" 12 array item: [69, 85): "u:🥔"
qpdf/qtest/qpdf/qjson-objects-not-dict.out
1 -WARNING: qjson-objects-not-dict.json (offset 82): "qpdf[1]" must be a dictionary 1 +WARNING: qjson-objects-not-dict.json (offset 81): "qpdf[1]" must be a dictionary
2 WARNING: qjson-objects-not-dict.json: "qpdf[1].trailer" was not seen 2 WARNING: qjson-objects-not-dict.json: "qpdf[1].trailer" was not seen
3 qpdf: qjson-objects-not-dict.json: errors found in JSON 3 qpdf: qjson-objects-not-dict.json: errors found in JSON
qpdf/qtest/qpdf/qjson-stream-not-dict.out
1 -WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 123): "stream" must be a dictionary 1 +WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 122): "stream" must be a dictionary
2 WARNING: qjson-stream-not-dict.json: "qpdf[1].trailer" was not seen 2 WARNING: qjson-stream-not-dict.json: "qpdf[1].trailer" was not seen
3 qpdf: qjson-stream-not-dict.json: errors found in JSON 3 qpdf: qjson-stream-not-dict.json: errors found in JSON
qpdf/qtest/qpdf/qjson-trailer-not-dict.out
1 -WARNING: qjson-trailer-not-dict.json (trailer, offset 1269): "trailer.value" must be a dictionary 1 +WARNING: qjson-trailer-not-dict.json (trailer, offset 1268): "trailer.value" must be a dictionary
2 qpdf: qjson-trailer-not-dict.json: errors found in JSON 2 qpdf: qjson-trailer-not-dict.json: errors found in JSON