Commit e4e03e9ac10d8ea0c1a8ef8c78f0103068928822

Authored by Jay Berkenbilt
Committed by GitHub
2 parents 76189c44 b6f04854

Merge pull request #890 from m-holger/jpsp

Eliminate the use of shared pointers in JSONParser
Showing 41 changed files with 641 additions and 559 deletions
include/qpdf/JSON.hh
... ... @@ -54,6 +54,7 @@ class JSON
54 54 {
55 55 public:
56 56 static int constexpr LATEST = 2;
  57 + JSON() = default;
57 58  
58 59 QPDF_DLL
59 60 std::string unparse() const;
... ...
libqpdf/JSON.cc
... ... @@ -4,19 +4,11 @@
4 4 #include <qpdf/Pl_Base64.hh>
5 5 #include <qpdf/Pl_Concatenate.hh>
6 6 #include <qpdf/Pl_String.hh>
7   -#include <qpdf/QIntC.hh>
8 7 #include <qpdf/QTC.hh>
9 8 #include <qpdf/QUtil.hh>
10 9 #include <cstring>
11 10 #include <stdexcept>
12 11  
13   -template <typename T>
14   -static qpdf_offset_t
15   -toO(T const& i)
16   -{
17   - return QIntC::to_offset(i);
18   -}
19   -
20 12 JSON::Members::Members(std::shared_ptr<JSON_value> value) :
21 13 value(value),
22 14 start(0),
... ... @@ -622,11 +614,6 @@ namespace
622 614 is(is),
623 615 reactor(reactor),
624 616 lex_state(ls_top),
625   - number_before_point(0),
626   - number_after_point(0),
627   - number_after_e(0),
628   - number_saw_point(false),
629   - number_saw_e(false),
630 617 bytes(0),
631 618 p(buf),
632 619 u_count(0),
... ... @@ -637,21 +624,9 @@ namespace
637 624 {
638 625 }
639 626  
640   - std::shared_ptr<JSON> parse();
  627 + JSON parse();
641 628  
642 629 private:
643   - void getToken();
644   - void handleToken();
645   - static std::string
646   - decode_string(std::string const& json, qpdf_offset_t offset);
647   - static void handle_u_code(
648   - char const* s,
649   - qpdf_offset_t offset,
650   - qpdf_offset_t i,
651   - unsigned long& high_surrogate,
652   - qpdf_offset_t& high_offset,
653   - std::string& result);
654   -
655 630 enum parser_state_e {
656 631 ps_top,
657 632 ps_dict_begin,
... ... @@ -668,30 +643,65 @@ namespace
668 643 enum lex_state_e {
669 644 ls_top,
670 645 ls_number,
  646 + ls_number_minus,
  647 + ls_number_leading_zero,
  648 + ls_number_before_point,
  649 + ls_number_point,
  650 + ls_number_after_point,
  651 + ls_number_e,
  652 + ls_number_e_sign,
671 653 ls_alpha,
672 654 ls_string,
673 655 ls_backslash,
674 656 ls_u4,
  657 + ls_begin_array,
  658 + ls_end_array,
  659 + ls_begin_dict,
  660 + ls_end_dict,
  661 + ls_colon,
  662 + ls_comma,
675 663 };
676 664  
  665 + struct StackFrame
  666 + {
  667 + StackFrame(parser_state_e state, JSON& item) :
  668 + state(state),
  669 + item(item)
  670 + {
  671 + }
  672 +
  673 + parser_state_e state;
  674 + JSON item;
  675 + };
  676 +
  677 + void getToken();
  678 + void handleToken();
  679 + void tokenError();
  680 + static void handle_u_code(
  681 + unsigned long codepoint,
  682 + qpdf_offset_t offset,
  683 + unsigned long& high_surrogate,
  684 + qpdf_offset_t& high_offset,
  685 + std::string& result);
  686 + inline void append();
  687 + inline void append(lex_state_e);
  688 + inline void ignore();
  689 + inline void ignore(lex_state_e);
  690 +
677 691 InputSource& is;
678 692 JSON::Reactor* reactor;
679 693 lex_state_e lex_state;
680   - size_t number_before_point;
681   - size_t number_after_point;
682   - size_t number_after_e;
683   - bool number_saw_point;
684   - bool number_saw_e;
685 694 char buf[16384];
686 695 size_t bytes;
687 696 char const* p;
688 697 qpdf_offset_t u_count;
  698 + unsigned long u_value{0};
689 699 qpdf_offset_t offset;
690 700 bool done;
691 701 std::string token;
  702 + qpdf_offset_t token_start{0};
692 703 parser_state_e parser_state;
693   - std::vector<std::shared_ptr<JSON>> stack;
694   - std::vector<parser_state_e> ps_stack;
  704 + std::vector<StackFrame> stack;
695 705 std::string dict_key;
696 706 qpdf_offset_t dict_key_offset;
697 707 };
... ... @@ -699,22 +709,15 @@ namespace
699 709  
700 710 void
701 711 JSONParser::handle_u_code(
702   - char const* s,
  712 + unsigned long codepoint,
703 713 qpdf_offset_t offset,
704   - qpdf_offset_t i,
705 714 unsigned long& high_surrogate,
706 715 qpdf_offset_t& high_offset,
707 716 std::string& result)
708 717 {
709   - std::string hex = QUtil::hex_decode(std::string(s + i + 1, s + i + 5));
710   - unsigned char high = static_cast<unsigned char>(hex.at(0));
711   - unsigned char low = static_cast<unsigned char>(hex.at(1));
712   - unsigned long codepoint = high;
713   - codepoint <<= 8;
714   - codepoint += low;
715 718 if ((codepoint & 0xFC00) == 0xD800) {
716 719 // high surrogate
717   - qpdf_offset_t new_high_offset = offset + i;
  720 + qpdf_offset_t new_high_offset = offset;
718 721 if (high_offset) {
719 722 QTC::TC("libtests", "JSON 16 high high");
720 723 throw std::runtime_error(
... ... @@ -727,10 +730,10 @@ JSONParser::handle_u_code(
727 730 high_surrogate = codepoint;
728 731 } else if ((codepoint & 0xFC00) == 0xDC00) {
729 732 // low surrogate
730   - if (offset + i != (high_offset + 6)) {
  733 + if (offset != (high_offset + 6)) {
731 734 QTC::TC("libtests", "JSON 16 low not after high");
732 735 throw std::runtime_error(
733   - "JSON: offset " + std::to_string(offset + i) +
  736 + "JSON: offset " + std::to_string(offset) +
734 737 ": UTF-16 low surrogate found not immediately after high"
735 738 " surrogate");
736 739 }
... ... @@ -743,88 +746,123 @@ JSONParser::handle_u_code(
743 746 }
744 747 }
745 748  
746   -std::string
747   -JSONParser::decode_string(std::string const& str, qpdf_offset_t offset)
  749 +void
  750 +JSONParser::tokenError()
748 751 {
749   - // The string has already been validated when this private method
750   - // is called, so errors are logic errors instead of runtime
751   - // errors.
752   - size_t len = str.length();
753   - if ((len < 2) || (str.at(0) != '"') || (str.at(len - 1) != '"')) {
754   - throw std::logic_error(
755   - "JSON Parse: decode_string called with other than \"...\"");
  752 + if (done) {
  753 + QTC::TC("libtests", "JSON parse ls premature end of input");
  754 + throw std::runtime_error("JSON: premature end of input");
756 755 }
757   - char const* s = str.c_str();
758   - // Move inside the quotation marks
759   - ++s;
760   - len -= 2;
761   - // Keep track of UTF-16 surrogate pairs.
762   - unsigned long high_surrogate = 0;
763   - qpdf_offset_t high_offset = 0;
764   - std::string result;
765   - qpdf_offset_t olen = toO(len);
766   - for (qpdf_offset_t i = 0; i < olen; ++i) {
767   - if (s[i] == '\\') {
768   - if (i + 1 >= olen) {
769   - throw std::logic_error("JSON parse: nothing after \\");
770   - }
771   - char ch = s[++i];
772   - switch (ch) {
773   - case '\\':
774   - case '\"':
775   - case '/':
776   - // \/ is allowed in json input, but so is /, so we
777   - // don't map / to \/ in output.
778   - result.append(1, ch);
779   - break;
780   - case 'b':
781   - result.append(1, '\b');
782   - break;
783   - case 'f':
784   - result.append(1, '\f');
785   - break;
786   - case 'n':
787   - result.append(1, '\n');
788   - break;
789   - case 'r':
790   - result.append(1, '\r');
791   - break;
792   - case 't':
793   - result.append(1, '\t');
794   - break;
795   - case 'u':
796   - if (i + 4 >= olen) {
797   - throw std::logic_error(
798   - "JSON parse: not enough characters after \\u");
799   - }
800   - handle_u_code(
801   - s, offset, i, high_surrogate, high_offset, result);
802   - i += 4;
803   - break;
804   - default:
805   - throw std::logic_error("JSON parse: bad character after \\");
806   - break;
807   - }
  756 +
  757 + if (lex_state == ls_u4) {
  758 + QTC::TC("libtests", "JSON parse bad hex after u");
  759 + throw std::runtime_error(
  760 + "JSON: offset " + std::to_string(offset - u_count - 1) +
  761 + ": \\u must be followed by four hex digits");
  762 + } else if (lex_state == ls_alpha) {
  763 + QTC::TC("libtests", "JSON parse keyword bad character");
  764 + throw std::runtime_error(
  765 + "JSON: offset " + std::to_string(offset) +
  766 + ": keyword: unexpected character " + std::string(p, 1));
  767 + } else if (lex_state == ls_string) {
  768 + QTC::TC("libtests", "JSON parse control char in string");
  769 + throw std::runtime_error(
  770 + "JSON: offset " + std::to_string(offset) +
  771 + ": control character in string (missing \"?)");
  772 + } else if (lex_state == ls_backslash) {
  773 + QTC::TC("libtests", "JSON parse backslash bad character");
  774 + throw std::runtime_error(
  775 + "JSON: offset " + std::to_string(offset) +
  776 + ": invalid character after backslash: " + std::string(p, 1));
  777 + }
  778 +
  779 + if (*p == '.') {
  780 + if (lex_state == ls_number || lex_state == ls_number_e ||
  781 + lex_state == ls_number_e_sign) {
  782 + QTC::TC("libtests", "JSON parse point after e");
  783 + throw std::runtime_error(
  784 + "JSON: offset " + std::to_string(offset) +
  785 + ": numeric literal: decimal point after e");
808 786 } else {
809   - result.append(1, s[i]);
  787 + QTC::TC("libtests", "JSON parse duplicate point");
  788 + throw std::runtime_error(
  789 + "JSON: offset " + std::to_string(offset) +
  790 + ": numeric literal: decimal point already seen");
810 791 }
811   - }
812   - if (high_offset) {
813   - QTC::TC("libtests", "JSON 16 dangling high");
  792 + } else if (*p == 'e' || *p == 'E') {
  793 + QTC::TC("libtests", "JSON parse duplicate e");
  794 + throw std::runtime_error(
  795 + "JSON: offset " + std::to_string(offset) +
  796 + ": numeric literal: e already seen");
  797 + } else if ((*p == '+') || (*p == '-')) {
  798 + QTC::TC("libtests", "JSON parse unexpected sign");
  799 + throw std::runtime_error(
  800 + "JSON: offset " + std::to_string(offset) +
  801 + ": numeric literal: unexpected sign");
  802 + } else if (QUtil::is_space(*p) || strchr("{}[]:,", *p)) {
  803 + QTC::TC("libtests", "JSON parse incomplete number");
  804 + throw std::runtime_error(
  805 + "JSON: offset " + std::to_string(offset) +
  806 + ": numeric literal: incomplete number");
  807 +
  808 + } else {
  809 + QTC::TC("libtests", "JSON parse numeric bad character");
814 810 throw std::runtime_error(
815   - "JSON: offset " + std::to_string(high_offset) +
816   - ": UTF-16 high surrogate not followed by low surrogate");
  811 + "JSON: offset " + std::to_string(offset) +
  812 + ": numeric literal: unexpected character " + std::string(p, 1));
817 813 }
818   - return result;
  814 + throw std::logic_error("JSON::tokenError : unhandled error");
  815 +}
  816 +
  817 +// Append current character to token and advance to next input character.
  818 +inline void
  819 +JSONParser::append()
  820 +{
  821 + token += *p;
  822 + ++p;
  823 + ++offset;
  824 +}
  825 +
  826 +// Append current character to token, advance to next input character and
  827 +// transition to 'next' lexer state.
  828 +inline void
  829 +JSONParser::append(lex_state_e next)
  830 +{
  831 + lex_state = next;
  832 + token += *p;
  833 + ++p;
  834 + ++offset;
  835 +}
  836 +
  837 +// Advance to next input character without appending the current character to
  838 +// token.
  839 +inline void
  840 +JSONParser::ignore()
  841 +{
  842 + ++p;
  843 + ++offset;
  844 +}
  845 +
  846 +// Advance to next input character without appending the current character to
  847 +// token and transition to 'next' lexer state.
  848 +inline void
  849 +JSONParser::ignore(lex_state_e next)
  850 +{
  851 + lex_state = next;
  852 + ++p;
  853 + ++offset;
819 854 }
820 855  
821 856 void
822 857 JSONParser::getToken()
823 858 {
824   - enum { append, ignore, reread } action = append;
825   - bool ready = false;
826 859 token.clear();
827   - while (!done) {
  860 +
  861 + // Keep track of UTF-16 surrogate pairs.
  862 + unsigned long high_surrogate = 0;
  863 + qpdf_offset_t high_offset = 0;
  864 +
  865 + while (true) {
828 866 if (p == (buf + bytes)) {
829 867 p = buf;
830 868 bytes = is.read(buf, sizeof(buf));
... ... @@ -834,210 +872,316 @@ JSONParser::getToken()
834 872 }
835 873 }
836 874  
837   - if (*p == 0) {
838   - QTC::TC("libtests", "JSON parse null character");
839   - throw std::runtime_error(
840   - "JSON: null character at offset " + std::to_string(offset));
841   - }
842   - action = append;
843   - switch (lex_state) {
844   - case ls_top:
845   - if (*p == '"') {
846   - lex_state = ls_string;
847   - } else if (QUtil::is_space(*p)) {
848   - action = ignore;
849   - } else if ((*p >= 'a') && (*p <= 'z')) {
850   - lex_state = ls_alpha;
851   - } else if (*p == '-') {
852   - lex_state = ls_number;
853   - number_before_point = 0;
854   - number_after_point = 0;
855   - number_after_e = 0;
856   - number_saw_point = false;
857   - number_saw_e = false;
858   - } else if ((*p >= '0') && (*p <= '9')) {
859   - lex_state = ls_number;
860   - number_before_point = 1;
861   - number_after_point = 0;
862   - number_after_e = 0;
863   - number_saw_point = false;
864   - number_saw_e = false;
865   - } else if (*p == '.') {
866   - lex_state = ls_number;
867   - number_before_point = 0;
868   - number_after_point = 0;
869   - number_after_e = 0;
870   - number_saw_point = true;
871   - number_saw_e = false;
872   - } else if (strchr("{}[]:,", *p)) {
873   - ready = true;
  875 + if ((*p < 32 && *p >= 0)) {
  876 + if (*p == '\t' || *p == '\n' || *p == '\r') {
  877 + // Legal white space not permitted in strings. This will always
  878 + // end the current token (unless we are still before the start
  879 + // of the token).
  880 + if (lex_state == ls_top) {
  881 + ignore();
  882 + } else {
  883 + break;
  884 + }
  885 +
874 886 } else {
875   - QTC::TC("libtests", "JSON parse bad character");
  887 + QTC::TC("libtests", "JSON parse null character");
876 888 throw std::runtime_error(
877   - "JSON: offset " + std::to_string(offset) +
878   - ": unexpected character " + std::string(p, 1));
  889 + "JSON: control or null character at offset " +
  890 + std::to_string(offset));
879 891 }
880   - break;
881   -
882   - case ls_number:
883   - if ((*p >= '0') && (*p <= '9')) {
884   - if (number_saw_e) {
885   - ++number_after_e;
886   - } else if (number_saw_point) {
887   - ++number_after_point;
  892 + } else if (*p == ',') {
  893 + if (lex_state == ls_top) {
  894 + ignore(ls_comma);
  895 + return;
  896 + } else if (lex_state == ls_string) {
  897 + append();
  898 + } else {
  899 + break;
  900 + }
  901 + } else if (*p == ':') {
  902 + if (lex_state == ls_top) {
  903 + ignore(ls_colon);
  904 + return;
  905 + } else if (lex_state == ls_string) {
  906 + append();
  907 + } else {
  908 + break;
  909 + }
  910 + } else if (*p == ' ') {
  911 + if (lex_state == ls_top) {
  912 + ignore();
  913 + } else if (lex_state == ls_string) {
  914 + append();
  915 + } else {
  916 + break;
  917 + }
  918 + } else if (*p == '{') {
  919 + if (lex_state == ls_top) {
  920 + token_start = offset;
  921 + ignore(ls_begin_dict);
  922 + return;
  923 + } else if (lex_state == ls_string) {
  924 + append();
  925 + } else {
  926 + break;
  927 + }
  928 + } else if (*p == '}') {
  929 + if (lex_state == ls_top) {
  930 + ignore(ls_end_dict);
  931 + return;
  932 + } else if (lex_state == ls_string) {
  933 + append();
  934 + } else {
  935 + break;
  936 + }
  937 + } else if (*p == '[') {
  938 + if (lex_state == ls_top) {
  939 + token_start = offset;
  940 + ignore(ls_begin_array);
  941 + return;
  942 + } else if (lex_state == ls_string) {
  943 + append();
  944 + } else {
  945 + break;
  946 + }
  947 + } else if (*p == ']') {
  948 + if (lex_state == ls_top) {
  949 + ignore(ls_end_array);
  950 + return;
  951 + } else if (lex_state == ls_string) {
  952 + append();
  953 + } else {
  954 + break;
  955 + }
  956 + } else {
  957 + switch (lex_state) {
  958 + case ls_top:
  959 + token_start = offset;
  960 + if (*p == '"') {
  961 + ignore(ls_string);
  962 + } else if ((*p >= 'a') && (*p <= 'z')) {
  963 + append(ls_alpha);
  964 + } else if (*p == '-') {
  965 + append(ls_number_minus);
  966 + } else if ((*p >= '1') && (*p <= '9')) {
  967 + append(ls_number_before_point);
  968 + } else if (*p == '0') {
  969 + append(ls_number_leading_zero);
888 970 } else {
889   - ++number_before_point;
890   - }
891   - } else if (*p == '.') {
892   - if (number_saw_e) {
893   - QTC::TC("libtests", "JSON parse point after e");
894   - throw std::runtime_error(
895   - "JSON: offset " + std::to_string(offset) +
896   - ": numeric literal: decimal point after e");
897   - } else if (number_saw_point) {
898   - QTC::TC("libtests", "JSON parse duplicate point");
  971 + QTC::TC("libtests", "JSON parse bad character");
899 972 throw std::runtime_error(
900 973 "JSON: offset " + std::to_string(offset) +
901   - ": numeric literal: decimal point already seen");
902   - } else {
903   - number_saw_point = true;
  974 + ": unexpected character " + std::string(p, 1));
904 975 }
905   - } else if (*p == 'e') {
906   - if (number_saw_e) {
907   - QTC::TC("libtests", "JSON parse duplicate e");
  976 + break;
  977 +
  978 + case ls_number_minus:
  979 + if ((*p >= '1') && (*p <= '9')) {
  980 + append(ls_number_before_point);
  981 + } else if (*p == '0') {
  982 + append(ls_number_leading_zero);
  983 + } else {
  984 + QTC::TC("libtests", "JSON parse number minus no digits");
908 985 throw std::runtime_error(
909 986 "JSON: offset " + std::to_string(offset) +
910   - ": numeric literal: e already seen");
911   - } else {
912   - number_saw_e = true;
  987 + ": numeric literal: no digit after minus sign");
913 988 }
914   - } else if ((*p == '+') || (*p == '-')) {
915   - if (number_saw_e && (number_after_e == 0)) {
916   - // okay
  989 + break;
  990 +
  991 + case ls_number_leading_zero:
  992 + if (*p == '.') {
  993 + append(ls_number_point);
  994 + } else if (*p == 'e' || *p == 'E') {
  995 + append(ls_number_e);
917 996 } else {
918   - QTC::TC("libtests", "JSON parse unexpected sign");
  997 + QTC::TC("libtests", "JSON parse leading zero");
919 998 throw std::runtime_error(
920 999 "JSON: offset " + std::to_string(offset) +
921   - ": numeric literal: unexpected sign");
  1000 + ": number with leading zero");
922 1001 }
923   - } else if (QUtil::is_space(*p)) {
924   - action = ignore;
925   - ready = true;
926   - } else if (strchr("{}[]:,", *p)) {
927   - action = reread;
928   - ready = true;
929   - } else {
930   - QTC::TC("libtests", "JSON parse numeric bad character");
931   - throw std::runtime_error(
932   - "JSON: offset " + std::to_string(offset) +
933   - ": numeric literal: unexpected character " +
934   - std::string(p, 1));
935   - }
936   - break;
  1002 + break;
937 1003  
938   - case ls_alpha:
939   - if ((*p >= 'a') && (*p <= 'z')) {
940   - // okay
941   - } else if (QUtil::is_space(*p)) {
942   - action = ignore;
943   - ready = true;
944   - } else if (strchr("{}[]:,", *p)) {
945   - action = reread;
946   - ready = true;
947   - } else {
948   - QTC::TC("libtests", "JSON parse keyword bad character");
949   - throw std::runtime_error(
950   - "JSON: offset " + std::to_string(offset) +
951   - ": keyword: unexpected character " + std::string(p, 1));
952   - }
953   - break;
  1004 + case ls_number_before_point:
  1005 + if ((*p >= '0') && (*p <= '9')) {
  1006 + append();
  1007 + } else if (*p == '.') {
  1008 + append(ls_number_point);
  1009 + } else if (*p == 'e' || *p == 'E') {
  1010 + append(ls_number_e);
  1011 + } else {
  1012 + tokenError();
  1013 + }
  1014 + break;
954 1015  
955   - case ls_string:
956   - if (*p == '"') {
957   - ready = true;
958   - } else if (*p == '\\') {
959   - lex_state = ls_backslash;
960   - }
961   - break;
  1016 + case ls_number_point:
  1017 + if ((*p >= '0') && (*p <= '9')) {
  1018 + append(ls_number_after_point);
  1019 + } else {
  1020 + tokenError();
  1021 + }
  1022 + break;
962 1023  
963   - case ls_backslash:
964   - /* cSpell: ignore bfnrt */
965   - if (strchr("\\\"/bfnrt", *p)) {
966   - lex_state = ls_string;
967   - } else if (*p == 'u') {
968   - lex_state = ls_u4;
969   - u_count = 0;
970   - } else {
971   - QTC::TC("libtests", "JSON parse backslash bad character");
972   - throw std::runtime_error(
973   - "JSON: offset " + std::to_string(offset) +
974   - ": invalid character after backslash: " +
975   - std::string(p, 1));
976   - }
977   - break;
  1024 + case ls_number_after_point:
  1025 + if ((*p >= '0') && (*p <= '9')) {
  1026 + append();
  1027 + } else if (*p == 'e' || *p == 'E') {
  1028 + append(ls_number_e);
  1029 + } else {
  1030 + tokenError();
  1031 + }
  1032 + break;
978 1033  
979   - case ls_u4:
980   - if (!QUtil::is_hex_digit(*p)) {
981   - QTC::TC("libtests", "JSON parse bad hex after u");
982   - throw std::runtime_error(
983   - "JSON: offset " + std::to_string(offset - u_count - 1) +
984   - ": \\u must be followed by four hex digits");
985   - }
986   - if (++u_count == 4) {
987   - lex_state = ls_string;
988   - }
989   - break;
990   - }
991   - switch (action) {
992   - case reread:
993   - break;
994   - case append:
995   - token.append(1, *p);
996   - // fall through
997   - case ignore:
998   - ++p;
999   - ++offset;
1000   - break;
1001   - }
1002   - if (ready) {
1003   - break;
1004   - }
1005   - }
1006   - if (done) {
1007   - if ((!token.empty()) && (!ready)) {
1008   - switch (lex_state) {
1009   - case ls_top:
1010   - // Can't happen
1011   - throw std::logic_error("tok_start set in ls_top while parsing");
  1034 + case ls_number_e:
  1035 + if ((*p >= '0') && (*p <= '9')) {
  1036 + append(ls_number);
  1037 + } else if ((*p == '+') || (*p == '-')) {
  1038 + append(ls_number_e_sign);
  1039 + } else {
  1040 + tokenError();
  1041 + }
  1042 + break;
  1043 +
  1044 + case ls_number_e_sign:
  1045 + if ((*p >= '0') && (*p <= '9')) {
  1046 + append(ls_number);
  1047 + } else {
  1048 + tokenError();
  1049 + }
1012 1050 break;
1013 1051  
1014 1052 case ls_number:
1015   - case ls_alpha:
1016   - // okay
  1053 + // We only get here after we have seen an exponent.
  1054 + if ((*p >= '0') && (*p <= '9')) {
  1055 + append();
  1056 + } else {
  1057 + tokenError();
  1058 + }
1017 1059 break;
1018 1060  
1019   - case ls_u4:
1020   - QTC::TC("libtests", "JSON parse premature end of u");
1021   - throw std::runtime_error(
1022   - "JSON: offset " + std::to_string(offset - u_count - 1) +
1023   - ": \\u must be followed by four characters");
  1061 + case ls_alpha:
  1062 + if ((*p >= 'a') && (*p <= 'z')) {
  1063 + append();
  1064 + } else {
  1065 + tokenError();
  1066 + }
  1067 + break;
1024 1068  
1025 1069 case ls_string:
  1070 + if (*p == '"') {
  1071 + if (high_offset) {
  1072 + QTC::TC("libtests", "JSON 16 dangling high");
  1073 + throw std::runtime_error(
  1074 + "JSON: offset " + std::to_string(high_offset) +
  1075 + ": UTF-16 high surrogate not followed by low "
  1076 + "surrogate");
  1077 + }
  1078 + ignore();
  1079 + return;
  1080 + } else if (*p == '\\') {
  1081 + ignore(ls_backslash);
  1082 + } else {
  1083 + append();
  1084 + }
  1085 + break;
  1086 +
1026 1087 case ls_backslash:
1027   - QTC::TC("libtests", "JSON parse unterminated string");
1028   - throw std::runtime_error(
1029   - "JSON: offset " + std::to_string(offset) +
1030   - ": unterminated string");
  1088 + lex_state = ls_string;
  1089 + switch (*p) {
  1090 + case '\\':
  1091 + case '\"':
  1092 + case '/':
  1093 + // \/ is allowed in json input, but so is /, so we
  1094 + // don't map / to \/ in output.
  1095 + token += *p;
  1096 + break;
  1097 + case 'b':
  1098 + token += '\b';
  1099 + break;
  1100 + case 'f':
  1101 + token += '\f';
  1102 + break;
  1103 + case 'n':
  1104 + token += '\n';
  1105 + break;
  1106 + case 'r':
  1107 + token += '\r';
  1108 + break;
  1109 + case 't':
  1110 + token += '\t';
  1111 + break;
  1112 + case 'u':
  1113 + lex_state = ls_u4;
  1114 + u_count = 0;
  1115 + u_value = 0;
  1116 + break;
  1117 + default:
  1118 + lex_state = ls_backslash;
  1119 + tokenError();
  1120 + }
  1121 + ignore();
1031 1122 break;
  1123 +
  1124 + case ls_u4:
  1125 + using ui = unsigned int;
  1126 + if ('0' <= *p && *p <= '9') {
  1127 + u_value = 16 * u_value + (ui(*p) - ui('0'));
  1128 + } else if ('a' <= *p && *p <= 'f') {
  1129 + u_value = 16 * u_value + (10 + ui(*p) - ui('a'));
  1130 + } else if ('A' <= *p && *p <= 'F') {
  1131 + u_value = 16 * u_value + (10 + ui(*p) - ui('A'));
  1132 + } else {
  1133 + tokenError();
  1134 + }
  1135 + if (++u_count == 4) {
  1136 + handle_u_code(
  1137 + u_value,
  1138 + offset - 5,
  1139 + high_surrogate,
  1140 + high_offset,
  1141 + token);
  1142 + lex_state = ls_string;
  1143 + }
  1144 + ignore();
  1145 + break;
  1146 +
  1147 + default:
  1148 + throw std::logic_error(
  1149 + "JSONParser::getToken : trying to handle delimiter state");
1032 1150 }
1033 1151 }
1034 1152 }
  1153 +
  1154 + // We only get here if on end of input or if the last character was a
  1155 + // control character or other delimiter.
  1156 +
  1157 + if (!token.empty()) {
  1158 + switch (lex_state) {
  1159 + case ls_top:
  1160 + // Can't happen
  1161 + throw std::logic_error("tok_start set in ls_top while parsing");
  1162 + break;
  1163 +
  1164 + case ls_number_leading_zero:
  1165 + case ls_number_before_point:
  1166 + case ls_number_after_point:
  1167 + lex_state = ls_number;
  1168 + break;
  1169 +
  1170 + case ls_number:
  1171 + case ls_alpha:
  1172 + // terminal state
  1173 + break;
  1174 +
  1175 + default:
  1176 + tokenError();
  1177 + }
  1178 + }
1035 1179 }
1036 1180  
1037 1181 void
1038 1182 JSONParser::handleToken()
1039 1183 {
1040   - if (token.empty()) {
  1184 + if (lex_state == ls_top) {
1041 1185 return;
1042 1186 }
1043 1187  
... ... @@ -1048,73 +1192,96 @@ JSONParser::handleToken()
1048 1192 ": material follows end of object: " + token);
1049 1193 }
1050 1194  
1051   - // Git string value
1052   - std::string s_value;
1053   - if (lex_state == ls_string) {
1054   - // Token includes the quotation marks
1055   - if (token.length() < 2) {
1056   - throw std::logic_error("JSON string length < 2");
1057   - }
1058   - s_value = decode_string(token, offset - toO(token.length()));
1059   - }
1060   - // Based on the lexical state and value, figure out whether we are
1061   - // looking at an item or a delimiter. It will always be exactly
1062   - // one of those two or an error condition.
1063   -
1064   - std::shared_ptr<JSON> item;
1065   - char delimiter = '\0';
1066   - // Already verified that token is not empty
1067   - char first_char = token.at(0);
1068   - switch (lex_state) {
1069   - case ls_top:
1070   - switch (first_char) {
1071   - case '{':
1072   - item = std::make_shared<JSON>(JSON::makeDictionary());
1073   - item->setStart(offset - toO(token.length()));
1074   - break;
  1195 + const static JSON null_item = JSON::makeNull();
  1196 + JSON item;
  1197 + auto tos = stack.empty() ? null_item : stack.back().item;
  1198 + auto ls = lex_state;
  1199 + lex_state = ls_top;
1075 1200  
1076   - case '[':
1077   - item = std::make_shared<JSON>(JSON::makeArray());
1078   - item->setStart(offset - toO(token.length()));
1079   - break;
  1201 + switch (ls) {
  1202 + case ls_begin_dict:
  1203 + item = JSON::makeDictionary();
  1204 + break;
1080 1205  
1081   - default:
1082   - delimiter = first_char;
1083   - break;
1084   - }
  1206 + case ls_begin_array:
  1207 + item = JSON::makeArray();
1085 1208 break;
1086 1209  
1087   - case ls_number:
1088   - if (number_saw_point && (number_after_point == 0)) {
1089   - QTC::TC("libtests", "JSON parse decimal with no digits");
  1210 + case ls_colon:
  1211 + if (parser_state != ps_dict_after_key) {
  1212 + QTC::TC("libtests", "JSON parse unexpected :");
1090 1213 throw std::runtime_error(
1091 1214 "JSON: offset " + std::to_string(offset) +
1092   - ": decimal point with no digits");
  1215 + ": unexpected colon");
1093 1216 }
1094   - if ((number_before_point > 1) &&
1095   - ((first_char == '0') ||
1096   - ((first_char == '-') && (token.at(1) == '0')))) {
1097   - QTC::TC("libtests", "JSON parse leading zero");
  1217 + parser_state = ps_dict_after_colon;
  1218 + return;
  1219 +
  1220 + case ls_comma:
  1221 + if (!((parser_state == ps_dict_after_item) ||
  1222 + (parser_state == ps_array_after_item))) {
  1223 + QTC::TC("libtests", "JSON parse unexpected ,");
1098 1224 throw std::runtime_error(
1099 1225 "JSON: offset " + std::to_string(offset) +
1100   - ": number with leading zero");
  1226 + ": unexpected comma");
1101 1227 }
1102   - if ((number_before_point == 0) && (number_after_point == 0)) {
1103   - QTC::TC("libtests", "JSON parse number no digits");
  1228 + if (parser_state == ps_dict_after_item) {
  1229 + parser_state = ps_dict_after_comma;
  1230 + } else if (parser_state == ps_array_after_item) {
  1231 + parser_state = ps_array_after_comma;
  1232 + } else {
  1233 + throw std::logic_error("JSONParser::handleToken: unexpected parser"
  1234 + " state for comma");
  1235 + }
  1236 + return;
  1237 +
  1238 + case ls_end_array:
  1239 + if (!(parser_state == ps_array_begin ||
  1240 + parser_state == ps_array_after_item)) {
  1241 + QTC::TC("libtests", "JSON parse unexpected ]");
1104 1242 throw std::runtime_error(
1105 1243 "JSON: offset " + std::to_string(offset) +
1106   - ": number with no digits");
  1244 + ": unexpected array end delimiter");
1107 1245 }
1108   - item = std::make_shared<JSON>(JSON::makeNumber(token));
  1246 + parser_state = stack.back().state;
  1247 + tos.setEnd(offset);
  1248 + if (reactor) {
  1249 + reactor->containerEnd(tos);
  1250 + }
  1251 + if (parser_state != ps_done) {
  1252 + stack.pop_back();
  1253 + }
  1254 + return;
  1255 +
  1256 + case ls_end_dict:
  1257 + if (!((parser_state == ps_dict_begin) ||
  1258 + (parser_state == ps_dict_after_item))) {
  1259 + QTC::TC("libtests", "JSON parse unexpected }");
  1260 + throw std::runtime_error(
  1261 + "JSON: offset " + std::to_string(offset) +
  1262 + ": unexpected dictionary end delimiter");
  1263 + }
  1264 + parser_state = stack.back().state;
  1265 + tos.setEnd(offset);
  1266 + if (reactor) {
  1267 + reactor->containerEnd(tos);
  1268 + }
  1269 + if (parser_state != ps_done) {
  1270 + stack.pop_back();
  1271 + }
  1272 + return;
  1273 +
  1274 + case ls_number:
  1275 + item = JSON::makeNumber(token);
1109 1276 break;
1110 1277  
1111 1278 case ls_alpha:
1112 1279 if (token == "true") {
1113   - item = std::make_shared<JSON>(JSON::makeBool(true));
  1280 + item = JSON::makeBool(true);
1114 1281 } else if (token == "false") {
1115   - item = std::make_shared<JSON>(JSON::makeBool(false));
  1282 + item = JSON::makeBool(false);
1116 1283 } else if (token == "null") {
1117   - item = std::make_shared<JSON>(JSON::makeNull());
  1284 + item = JSON::makeNull();
1118 1285 } else {
1119 1286 QTC::TC("libtests", "JSON parse invalid keyword");
1120 1287 throw std::runtime_error(
... ... @@ -1124,227 +1291,115 @@ JSONParser::handleToken()
1124 1291 break;
1125 1292  
1126 1293 case ls_string:
1127   - item = std::make_shared<JSON>(JSON::makeString(s_value));
  1294 + if (parser_state == ps_dict_begin ||
  1295 + parser_state == ps_dict_after_comma) {
  1296 + dict_key = token;
  1297 + dict_key_offset = token_start;
  1298 + parser_state = ps_dict_after_key;
  1299 + return;
  1300 + } else {
  1301 + item = JSON::makeString(token);
  1302 + }
1128 1303 break;
1129 1304  
1130   - case ls_backslash:
1131   - case ls_u4:
  1305 + default:
1132 1306 throw std::logic_error(
1133   - "tok_end is set while state = ls_backslash or ls_u4");
  1307 + "JSONParser::handleToken : non-terminal lexer state encountered");
1134 1308 break;
1135 1309 }
1136 1310  
1137   - if ((item == nullptr) == (delimiter == '\0')) {
1138   - throw std::logic_error(
1139   - "JSONParser::handleToken: logic error: exactly one of item"
1140   - " or delimiter must be set");
1141   - }
1142   -
1143   - // See whether what we have is allowed at this point.
1144   -
1145   - if (item.get()) {
1146   - switch (parser_state) {
1147   - case ps_done:
1148   - throw std::logic_error("can't happen; ps_done already handled");
1149   - break;
1150   -
1151   - case ps_dict_after_key:
1152   - QTC::TC("libtests", "JSON parse expected colon");
1153   - throw std::runtime_error(
1154   - "JSON: offset " + std::to_string(offset) + ": expected ':'");
1155   - break;
1156   -
1157   - case ps_dict_after_item:
1158   - QTC::TC("libtests", "JSON parse expected , or }");
1159   - throw std::runtime_error(
1160   - "JSON: offset " + std::to_string(offset) +
1161   - ": expected ',' or '}'");
1162   - break;
1163   -
1164   - case ps_array_after_item:
1165   - QTC::TC("libtests", "JSON parse expected, or ]");
1166   - throw std::runtime_error(
1167   - "JSON: offset " + std::to_string(offset) +
1168   - ": expected ',' or ']'");
1169   - break;
1170   -
1171   - case ps_dict_begin:
1172   - case ps_dict_after_comma:
1173   - if (lex_state != ls_string) {
1174   - QTC::TC("libtests", "JSON parse string as dict key");
1175   - throw std::runtime_error(
1176   - "JSON: offset " + std::to_string(offset) +
1177   - ": expect string as dictionary key");
1178   - }
1179   - break;
1180   -
1181   - case ps_top:
1182   - case ps_dict_after_colon:
1183   - case ps_array_begin:
1184   - case ps_array_after_comma:
1185   - break;
1186   - // okay
1187   - }
1188   - } else if (delimiter == '}') {
1189   - if (!((parser_state == ps_dict_begin) ||
1190   - (parser_state == ps_dict_after_item)))
  1311 + item.setStart(token_start);
  1312 + item.setEnd(offset);
1191 1313  
1192   - {
1193   - QTC::TC("libtests", "JSON parse unexpected }");
1194   - throw std::runtime_error(
1195   - "JSON: offset " + std::to_string(offset) +
1196   - ": unexpected dictionary end delimiter");
1197   - }
1198   - } else if (delimiter == ']') {
1199   - if (!((parser_state == ps_array_begin) ||
1200   - (parser_state == ps_array_after_item)))
  1314 + switch (parser_state) {
  1315 + case ps_dict_begin:
  1316 + case ps_dict_after_comma:
  1317 + QTC::TC("libtests", "JSON parse string as dict key");
  1318 + throw std::runtime_error(
  1319 + "JSON: offset " + std::to_string(offset) +
  1320 + ": expect string as dictionary key");
  1321 + break;
1201 1322  
1202   - {
1203   - QTC::TC("libtests", "JSON parse unexpected ]");
  1323 + case ps_dict_after_colon:
  1324 + if (tos.checkDictionaryKeySeen(dict_key)) {
  1325 + QTC::TC("libtests", "JSON parse duplicate key");
1204 1326 throw std::runtime_error(
1205   - "JSON: offset " + std::to_string(offset) +
1206   - ": unexpected array end delimiter");
  1327 + "JSON: offset " + std::to_string(dict_key_offset) +
  1328 + ": duplicated dictionary key");
1207 1329 }
1208   - } else if (delimiter == ':') {
1209   - if (parser_state != ps_dict_after_key) {
1210   - QTC::TC("libtests", "JSON parse unexpected :");
1211   - throw std::runtime_error(
1212   - "JSON: offset " + std::to_string(offset) +
1213   - ": unexpected colon");
1214   - }
1215   - } else if (delimiter == ',') {
1216   - if (!((parser_state == ps_dict_after_item) ||
1217   - (parser_state == ps_array_after_item))) {
1218   - QTC::TC("libtests", "JSON parse unexpected ,");
1219   - throw std::runtime_error(
1220   - "JSON: offset " + std::to_string(offset) +
1221   - ": unexpected comma");
  1330 + if (!reactor || !reactor->dictionaryItem(dict_key, item)) {
  1331 + tos.addDictionaryMember(dict_key, item);
1222 1332 }
1223   - } else if (delimiter != '\0') {
1224   - throw std::logic_error("JSONParser::handleToken: bad delimiter");
1225   - }
1226   -
1227   - // Now we know we have a delimiter or item that is allowed. Do
1228   - // whatever we need to do with it.
  1333 + parser_state = ps_dict_after_item;
  1334 + break;
1229 1335  
1230   - parser_state_e next_state = ps_top;
1231   - if (delimiter == ':') {
1232   - next_state = ps_dict_after_colon;
1233   - } else if (delimiter == ',') {
1234   - if (parser_state == ps_dict_after_item) {
1235   - next_state = ps_dict_after_comma;
1236   - } else if (parser_state == ps_array_after_item) {
1237   - next_state = ps_array_after_comma;
1238   - } else {
1239   - throw std::logic_error("JSONParser::handleToken: unexpected parser"
1240   - " state for comma");
1241   - }
1242   - } else if ((delimiter == '}') || (delimiter == ']')) {
1243   - next_state = ps_stack.back();
1244   - ps_stack.pop_back();
1245   - auto tos = stack.back();
1246   - tos->setEnd(offset);
1247   - if (reactor) {
1248   - reactor->containerEnd(*tos);
1249   - }
1250   - if (next_state != ps_done) {
1251   - stack.pop_back();
1252   - }
1253   - } else if (delimiter != '\0') {
1254   - throw std::logic_error(
1255   - "JSONParser::handleToken: unexpected delimiter in transition");
1256   - } else if (item.get()) {
1257   - if (!(item->isArray() || item->isDictionary())) {
1258   - item->setStart(offset - toO(token.length()));
1259   - item->setEnd(offset);
  1336 + case ps_array_begin:
  1337 + case ps_array_after_comma:
  1338 + if (!reactor || !reactor->arrayItem(item)) {
  1339 + tos.addArrayElement(item);
1260 1340 }
  1341 + parser_state = ps_array_after_item;
  1342 + break;
1261 1343  
1262   - std::shared_ptr<JSON> tos;
1263   - if (!stack.empty()) {
1264   - tos = stack.back();
  1344 + case ps_top:
  1345 + if (!(item.isDictionary() || item.isArray())) {
  1346 + stack.push_back({ps_done, item});
  1347 + parser_state = ps_done;
  1348 + return;
1265 1349 }
1266   - switch (parser_state) {
1267   - case ps_dict_begin:
1268   - case ps_dict_after_comma:
1269   - this->dict_key = s_value;
1270   - this->dict_key_offset = item->getStart();
1271   - item = nullptr;
1272   - next_state = ps_dict_after_key;
1273   - break;
  1350 + parser_state = ps_done;
  1351 + break;
1274 1352  
1275   - case ps_dict_after_colon:
1276   - if (tos->checkDictionaryKeySeen(dict_key)) {
1277   - QTC::TC("libtests", "JSON parse duplicate key");
1278   - throw std::runtime_error(
1279   - "JSON: offset " + std::to_string(dict_key_offset) +
1280   - ": duplicated dictionary key");
1281   - }
1282   - if (!reactor || !reactor->dictionaryItem(dict_key, *item)) {
1283   - tos->addDictionaryMember(dict_key, *item);
1284   - }
1285   - next_state = ps_dict_after_item;
1286   - break;
  1353 + case ps_dict_after_key:
  1354 + QTC::TC("libtests", "JSON parse expected colon");
  1355 + throw std::runtime_error(
  1356 + "JSON: offset " + std::to_string(offset) + ": expected ':'");
  1357 + break;
1287 1358  
1288   - case ps_array_begin:
1289   - case ps_array_after_comma:
1290   - if (!reactor || !reactor->arrayItem(*item)) {
1291   - tos->addArrayElement(*item);
1292   - }
1293   - next_state = ps_array_after_item;
1294   - break;
  1359 + case ps_dict_after_item:
  1360 + QTC::TC("libtests", "JSON parse expected , or }");
  1361 + throw std::runtime_error(
  1362 + "JSON: offset " + std::to_string(offset) + ": expected ',' or '}'");
  1363 + break;
1295 1364  
1296   - case ps_top:
1297   - next_state = ps_done;
1298   - break;
  1365 + case ps_array_after_item:
  1366 + QTC::TC("libtests", "JSON parse expected, or ]");
  1367 + throw std::runtime_error(
  1368 + "JSON: offset " + std::to_string(offset) + ": expected ',' or ']'");
  1369 + break;
1299 1370  
1300   - case ps_dict_after_key:
1301   - case ps_dict_after_item:
1302   - case ps_array_after_item:
1303   - case ps_done:
1304   - throw std::logic_error(
1305   - "JSONParser::handleToken: unexpected parser state");
1306   - }
1307   - } else {
  1371 + case ps_done:
1308 1372 throw std::logic_error(
1309   - "JSONParser::handleToken: unexpected null item in transition");
  1373 + "JSONParser::handleToken: unexpected parser state");
1310 1374 }
1311 1375  
1312   - if (reactor && item.get()) {
  1376 + if (item.isDictionary() || item.isArray()) {
  1377 + stack.push_back({parser_state, item});
1313 1378 // Calling container start method is postponed until after
1314 1379 // adding the containers to their parent containers, if any.
1315 1380 // This makes it much easier to keep track of the current
1316 1381 // nesting level.
1317   - if (item->isDictionary()) {
1318   - reactor->dictionaryStart();
1319   - } else if (item->isArray()) {
1320   - reactor->arrayStart();
  1382 + if (item.isDictionary()) {
  1383 + if (reactor) {
  1384 + reactor->dictionaryStart();
  1385 + }
  1386 + parser_state = ps_dict_begin;
  1387 + } else if (item.isArray()) {
  1388 + if (reactor) {
  1389 + reactor->arrayStart();
  1390 + }
  1391 + parser_state = ps_array_begin;
1321 1392 }
1322   - }
1323 1393  
1324   - // Prepare for next token
1325   - if (item.get()) {
1326   - if (item->isDictionary()) {
1327   - stack.push_back(item);
1328   - ps_stack.push_back(next_state);
1329   - next_state = ps_dict_begin;
1330   - } else if (item->isArray()) {
1331   - stack.push_back(item);
1332   - ps_stack.push_back(next_state);
1333   - next_state = ps_array_begin;
1334   - } else if (parser_state == ps_top) {
1335   - stack.push_back(item);
  1394 + if (stack.size() > 500) {
  1395 + throw std::runtime_error(
  1396 + "JSON: offset " + std::to_string(offset) +
  1397 + ": maximum object depth exceeded");
1336 1398 }
1337 1399 }
1338   - if (ps_stack.size() > 500) {
1339   - throw std::runtime_error(
1340   - "JSON: offset " + std::to_string(offset) +
1341   - ": maximum object depth exceeded");
1342   - }
1343   - parser_state = next_state;
1344   - lex_state = ls_top;
1345 1400 }
1346 1401  
1347   -std::shared_ptr<JSON>
  1402 +JSON
1348 1403 JSONParser::parse()
1349 1404 {
1350 1405 while (!done) {
... ... @@ -1355,8 +1410,8 @@ JSONParser::parse()
1355 1410 QTC::TC("libtests", "JSON parse premature EOF");
1356 1411 throw std::runtime_error("JSON: premature end of input");
1357 1412 }
1358   - auto const& tos = stack.back();
1359   - if (reactor && tos.get() && !(tos->isArray() || tos->isDictionary())) {
  1413 + auto const& tos = stack.back().item;
  1414 + if (reactor && !(tos.isArray() || tos.isDictionary())) {
1360 1415 reactor->topLevelScalar();
1361 1416 }
1362 1417 return tos;
... ... @@ -1366,7 +1421,7 @@ JSON
1366 1421 JSON::parse(InputSource& is, Reactor* reactor)
1367 1422 {
1368 1423 JSONParser jp(is, reactor);
1369   - return *jp.parse();
  1424 + return jp.parse();
1370 1425 }
1371 1426  
1372 1427 JSON
... ... @@ -1374,7 +1429,7 @@ JSON::parse(std::string const&amp; s)
1374 1429 {
1375 1430 BufferInputSource bis("json input", s);
1376 1431 JSONParser jp(bis, nullptr);
1377   - return *jp.parse();
  1432 + return jp.parse();
1378 1433 }
1379 1434  
1380 1435 void
... ...
libtests/libtests.testcov
... ... @@ -58,7 +58,6 @@ QPDFArgParser bad option for help 0
58 58 QPDFArgParser bad topic for help 0
59 59 QPDFArgParser invalid choice handler to unknown 0
60 60 JSON parse junk after object 0
61   -JSON parse decimal with no digits 0
62 61 JSON parse invalid keyword 0
63 62 JSON parse expected colon 0
64 63 JSON parse expected , or } 0
... ... @@ -76,12 +75,13 @@ JSON parse duplicate point 0
76 75 JSON parse duplicate e 0
77 76 JSON parse unexpected sign 0
78 77 JSON parse numeric bad character 0
  78 +JSON parse number minus no digits 0
  79 +JSON parse incomplete number 0
79 80 JSON parse keyword bad character 0
80 81 JSON parse backslash bad character 0
81   -JSON parse unterminated string 0
  82 +JSON parse control char in string 0
82 83 JSON parse leading zero 0
83   -JSON parse number no digits 0
84   -JSON parse premature end of u 0
  84 +JSON parse ls premature end of input 0
85 85 JSON parse bad hex after u 0
86 86 JSONHandler unhandled value 0
87 87 JSONHandler unexpected key 0
... ...
libtests/qtest/json_parse.test
... ... @@ -121,6 +121,14 @@ my @bad = (
121 121 "high high surrogate", # 38
122 122 "dangling high surrogate", # 39
123 123 "duplicate dictionary key", # 40
  124 + "decimal point after minus",# 41
  125 + "e after minus", # 42
  126 + "missing digit after e", # 43
  127 + "missing digit after e+/-", # 44
  128 + "tab char in string", # 45
  129 + "cr char in string", # 46
  130 + "lf char in string", # 47
  131 + "bs char in string", # 48
124 132 );
125 133  
126 134 my $i = 0;
... ...
libtests/qtest/json_parse/bad-01.out
1   -exception: bad-01.json: JSON: offset 9: material follows end of object: junk
  1 +exception: bad-01.json: JSON: offset 8: material follows end of object: junk
... ...
libtests/qtest/json_parse/bad-02.out
1   -exception: bad-02.json: JSON: offset 11: material follows end of object: junk
  1 +exception: bad-02.json: JSON: offset 10: material follows end of object: junk
... ...
libtests/qtest/json_parse/bad-03.out
1   -exception: bad-03.json: JSON: offset 16: material follows end of object: junk
  1 +exception: bad-03.json: JSON: offset 15: material follows end of object: junk
... ...
libtests/qtest/json_parse/bad-04.out
1   -exception: bad-04.json: JSON: offset 5: decimal point with no digits
  1 +exception: bad-04.json: JSON: offset 4: unexpected character .
... ...
libtests/qtest/json_parse/bad-09.out
1   -exception: bad-09.json: JSON: offset 3: expect string as dictionary key
  1 +exception: bad-09.json: JSON: offset 2: expect string as dictionary key
... ...
libtests/qtest/json_parse/bad-18.out
1   -exception: bad-18.json: JSON: null character at offset 5
  1 +exception: bad-18.json: JSON: control or null character at offset 5
... ...
libtests/qtest/json_parse/bad-27.out
1   -exception: bad-27.json: JSON: offset 6: unterminated string
  1 +exception: bad-27.json: JSON: offset 5: control character in string (missing "?)
... ...
libtests/qtest/json_parse/bad-28.out
1   -exception: bad-28.json: JSON: offset 16: unterminated string
  1 +exception: bad-28.json: JSON: premature end of input
... ...
libtests/qtest/json_parse/bad-30.out
1   -exception: bad-30.json: JSON: offset 5: decimal point with no digits
  1 +exception: bad-30.json: JSON: offset 4: numeric literal: incomplete number
... ...
libtests/qtest/json_parse/bad-31.json
1   --
  1 +-
... ...
libtests/qtest/json_parse/bad-31.out
1   -exception: bad-31.json: JSON: offset 2: number with no digits
  1 +exception: bad-31.json: JSON: offset 1: numeric literal: incomplete number
... ...
libtests/qtest/json_parse/bad-32.out
1   -exception: bad-32.json: JSON: offset 5: number with leading zero
  1 +exception: bad-32.json: JSON: offset 1: number with leading zero
... ...
libtests/qtest/json_parse/bad-33.out
1   -exception: bad-33.json: JSON: offset 6: number with leading zero
  1 +exception: bad-33.json: JSON: offset 2: number with leading zero
... ...
libtests/qtest/json_parse/bad-34.out
1   -exception: bad-34.json: JSON: offset 3: \u must be followed by four characters
  1 +exception: bad-34.json: JSON: premature end of input
... ...
libtests/qtest/json_parse/bad-41.json 0 โ†’ 100644
  1 +-.123
  2 +
... ...
libtests/qtest/json_parse/bad-41.out 0 โ†’ 100644
  1 +exception: bad-41.json: JSON: offset 1: numeric literal: no digit after minus sign
... ...
libtests/qtest/json_parse/bad-42.json 0 โ†’ 100644
  1 +-e123
... ...
libtests/qtest/json_parse/bad-42.out 0 โ†’ 100644
  1 +exception: bad-42.json: JSON: offset 1: numeric literal: no digit after minus sign
... ...
libtests/qtest/json_parse/bad-43.json 0 โ†’ 100644
  1 +123e
... ...
libtests/qtest/json_parse/bad-43.out 0 โ†’ 100644
  1 +exception: bad-43.json: JSON: offset 4: numeric literal: incomplete number
... ...
libtests/qtest/json_parse/bad-44.json 0 โ†’ 100644
  1 +123e+
... ...
libtests/qtest/json_parse/bad-44.out 0 โ†’ 100644
  1 +exception: bad-44.json: JSON: offset 5: numeric literal: incomplete number
... ...
libtests/qtest/json_parse/bad-45.json 0 โ†’ 100644
  1 +"Tab in str ing"
... ...
libtests/qtest/json_parse/bad-45.out 0 โ†’ 100644
  1 +exception: bad-45.json: JSON: offset 11: control character in string (missing "?)
... ...
libtests/qtest/json_parse/bad-46.json 0 โ†’ 100644
  1 +"cr in str ing"
... ...
libtests/qtest/json_parse/bad-46.out 0 โ†’ 100644
  1 +exception: bad-46.json: JSON: offset 10: control character in string (missing "?)
... ...
libtests/qtest/json_parse/bad-47.json 0 โ†’ 100644
  1 +"lf in str
  2 +ing"
... ...
libtests/qtest/json_parse/bad-47.out 0 โ†’ 100644
  1 +exception: bad-47.json: JSON: offset 10: control character in string (missing "?)
... ...
libtests/qtest/json_parse/bad-48.json 0 โ†’ 100644
  1 +"bs in string"
0 2 \ No newline at end of file
... ...
libtests/qtest/json_parse/bad-48.out 0 โ†’ 100644
  1 +exception: bad-48.json: JSON: control or null character at offset 10
... ...
libtests/qtest/json_parse/good-01-react.out
1 1 dictionary start
2 2 dictionary item: a -> [6, 11): "bcd"
3   -dictionary item: e -> [18, 0): []
  3 +dictionary item: e -> [18, 19): []
4 4 array start
5 5 array item: [19, 20): 1
6 6 array item: [41, 42): 2
7 7 array item: [44, 45): 3
8 8 array item: [46, 47): 4
9 9 array item: [48, 54): "five"
10   -array item: [56, 0): {}
  10 +array item: [56, 57): {}
11 11 dictionary start
12 12 dictionary item: six -> [64, 65): 7
13 13 dictionary item: 8 -> [72, 73): 9
... ...
libtests/qtest/json_parse/good-04-react.out
1 1 array start
2   -array item: [1, 0): []
  2 +array item: [1, 2): []
3 3 array start
4   -array item: [2, 0): []
  4 +array item: [2, 3): []
5 5 array start
6   -array item: [3, 0): {}
  6 +array item: [3, 4): {}
7 7 dictionary start
8 8 container end: [3, 5): {}
9 9 container end: [2, 6): []
10   -array item: [8, 0): {}
  10 +array item: [8, 9): {}
11 11 dictionary start
12   -dictionary item: -> [13, 0): {}
  12 +dictionary item: -> [13, 14): {}
13 13 dictionary start
14 14 container end: [13, 15): {}
15 15 container end: [8, 16): {}
... ...
libtests/qtest/json_parse/good-10-react.out
1 1 dictionary start
2   -dictionary item: a -> [9, 0): []
  2 +dictionary item: a -> [9, 10): []
3 3 array start
4 4 array item: [10, 11): 1
5 5 array item: [13, 14): 2
6   -array item: [16, 0): {}
  6 +array item: [16, 17): {}
7 7 dictionary start
8 8 dictionary item: x -> [22, 25): "y"
9 9 container end: [16, 26): {}
10 10 array item: [28, 29): 3
11   -array item: [31, 0): {}
  11 +array item: [31, 32): {}
12 12 dictionary start
13 13 dictionary item: keep -> [40, 61): "not in final output"
14 14 container end: [31, 62): {
15 15 "keep": "not in final output"
16 16 }
17 17 container end: [9, 63): []
18   -dictionary item: keep -> [75, 0): []
  18 +dictionary item: keep -> [75, 76): []
19 19 array start
20 20 array item: [76, 77): 1
21 21 array item: [79, 83): null
... ... @@ -23,7 +23,7 @@ array item: [85, 86): 2
23 23 array item: [88, 93): false
24 24 array item: [95, 101): "keep"
25 25 array item: [103, 104): 3
26   -array item: [106, 0): []
  26 +array item: [106, 107): []
27 27 array start
28 28 array item: [107, 113): "this"
29 29 array item: [115, 121): "keep"
... ...
libtests/qtest/json_parse/good-11-react.out
1 1 array start
2   -array item: [4, 0): []
  2 +array item: [4, 5): []
3 3 array start
4 4 array item: [5, 11): "u:ฯ€"
5 5 array item: [13, 23): "u:ฯ€"
6 6 array item: [25, 39): "b:EFBBBFCF80"
7 7 array item: [41, 53): "b:feff03c0"
8 8 container end: [4, 54): []
9   -array item: [58, 0): []
  9 +array item: [58, 59): []
10 10 array start
11 11 array item: [59, 67): "u:๐Ÿฅ”"
12 12 array item: [69, 85): "u:๐Ÿฅ”"
... ...
qpdf/qtest/qpdf/qjson-objects-not-dict.out
1   -WARNING: qjson-objects-not-dict.json (offset 82): "qpdf[1]" must be a dictionary
  1 +WARNING: qjson-objects-not-dict.json (offset 81): "qpdf[1]" must be a dictionary
2 2 WARNING: qjson-objects-not-dict.json: "qpdf[1].trailer" was not seen
3 3 qpdf: qjson-objects-not-dict.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-stream-not-dict.out
1   -WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 123): "stream" must be a dictionary
  1 +WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 122): "stream" must be a dictionary
2 2 WARNING: qjson-stream-not-dict.json: "qpdf[1].trailer" was not seen
3 3 qpdf: qjson-stream-not-dict.json: errors found in JSON
... ...
qpdf/qtest/qpdf/qjson-trailer-not-dict.out
1   -WARNING: qjson-trailer-not-dict.json (trailer, offset 1269): "trailer.value" must be a dictionary
  1 +WARNING: qjson-trailer-not-dict.json (trailer, offset 1268): "trailer.value" must be a dictionary
2 2 qpdf: qjson-trailer-not-dict.json: errors found in JSON
... ...