Commit 05fda4afa289ef248804865d7648c9ac3ae75fbd

Authored by Jay Berkenbilt
1 parent e5f3910c

Change JSON parser to parse from an InputSource

@@ -51,9 +51,6 @@ library, when context is available, to have a pipeline rather than a @@ -51,9 +51,6 @@ library, when context is available, to have a pipeline rather than a
51 FILE* or std::ostream. This makes it possible for people to capture 51 FILE* or std::ostream. This makes it possible for people to capture
52 output more flexibly. 52 output more flexibly.
53 53
54 -JSON::parse should work from an InputSource. BufferInputSource can  
55 -already start with a std::string.  
56 -  
57 Have a json blob defined by a function that takes a pipeline and 54 Have a json blob defined by a function that takes a pipeline and
58 writes data to the pipeline. It's writer should create a Pl_Base64 -> 55 writes data to the pipeline. It's writer should create a Pl_Base64 ->
59 Pl_Concatenate in front of the pipeline passed to write and call the 56 Pl_Concatenate in front of the pipeline passed to write and call the
include/qpdf/JSON.hh
@@ -46,6 +46,7 @@ @@ -46,6 +46,7 @@
46 #include <vector> 46 #include <vector>
47 47
48 class Pipeline; 48 class Pipeline;
  49 +class InputSource;
49 50
50 class JSON 51 class JSON
51 { 52 {
@@ -249,10 +250,13 @@ class JSON @@ -249,10 +250,13 @@ class JSON
249 virtual bool arrayItem(JSON const& value) = 0; 250 virtual bool arrayItem(JSON const& value) = 0;
250 }; 251 };
251 252
252 - // Create a JSON object from a string. See above for information  
253 - // about how to use the Reactor. 253 + // Create a JSON object from a string.
254 QPDF_DLL 254 QPDF_DLL
255 - static JSON parse(std::string const&, Reactor* reactor = nullptr); 255 + static JSON parse(std::string const&);
  256 + // Create a JSON object from an input source. See above for
  257 + // information about how to use the Reactor.
  258 + QPDF_DLL
  259 + static JSON parse(InputSource&, Reactor* reactor = nullptr);
256 260
257 // parse calls setOffsets to set the inclusive start and 261 // parse calls setOffsets to set the inclusive start and
258 // non-inclusive end offsets of an object relative to its input 262 // non-inclusive end offsets of an object relative to its input
libqpdf/JSON.cc
1 #include <qpdf/JSON.hh> 1 #include <qpdf/JSON.hh>
2 2
3 -#include <qpdf/Pipeline.hh> 3 +#include <qpdf/BufferInputSource.hh>
4 #include <qpdf/Pl_String.hh> 4 #include <qpdf/Pl_String.hh>
5 -#include <qpdf/QIntC.hh>  
6 #include <qpdf/QTC.hh> 5 #include <qpdf/QTC.hh>
7 #include <qpdf/QUtil.hh> 6 #include <qpdf/QUtil.hh>
8 #include <cstring> 7 #include <cstring>
@@ -521,7 +520,8 @@ namespace @@ -521,7 +520,8 @@ namespace
521 class JSONParser 520 class JSONParser
522 { 521 {
523 public: 522 public:
524 - JSONParser(JSON::Reactor* reactor) : 523 + JSONParser(InputSource& is, JSON::Reactor* reactor) :
  524 + is(is),
525 reactor(reactor), 525 reactor(reactor),
526 lex_state(ls_top), 526 lex_state(ls_top),
527 number_before_point(0), 527 number_before_point(0),
@@ -529,16 +529,16 @@ namespace @@ -529,16 +529,16 @@ namespace
529 number_after_e(0), 529 number_after_e(0),
530 number_saw_point(false), 530 number_saw_point(false),
531 number_saw_e(false), 531 number_saw_e(false),
532 - cstr(nullptr),  
533 - end(nullptr),  
534 - tok_start(nullptr),  
535 - tok_end(nullptr),  
536 - p(nullptr), 532 + bytes(0),
  533 + p(buf),
  534 + u_count(0),
  535 + offset(0),
  536 + done(false),
537 parser_state(ps_top) 537 parser_state(ps_top)
538 { 538 {
539 } 539 }
540 540
541 - std::shared_ptr<JSON> parse(std::string const& s); 541 + std::shared_ptr<JSON> parse();
542 542
543 private: 543 private:
544 void getToken(); 544 void getToken();
@@ -564,8 +564,10 @@ namespace @@ -564,8 +564,10 @@ namespace
564 ls_alpha, 564 ls_alpha,
565 ls_string, 565 ls_string,
566 ls_backslash, 566 ls_backslash,
  567 + ls_u4,
567 }; 568 };
568 569
  570 + InputSource& is;
569 JSON::Reactor* reactor; 571 JSON::Reactor* reactor;
570 lex_state_e lex_state; 572 lex_state_e lex_state;
571 size_t number_before_point; 573 size_t number_before_point;
@@ -573,11 +575,13 @@ namespace @@ -573,11 +575,13 @@ namespace
573 size_t number_after_e; 575 size_t number_after_e;
574 bool number_saw_point; 576 bool number_saw_point;
575 bool number_saw_e; 577 bool number_saw_e;
576 - char const* cstr;  
577 - char const* end;  
578 - char const* tok_start;  
579 - char const* tok_end; 578 + char buf[16384];
  579 + size_t bytes;
580 char const* p; 580 char const* p;
  581 + size_t u_count;
  582 + size_t offset;
  583 + bool done;
  584 + std::string token;
581 parser_state_e parser_state; 585 parser_state_e parser_state;
582 std::vector<std::shared_ptr<JSON>> stack; 586 std::vector<std::shared_ptr<JSON>> stack;
583 std::vector<parser_state_e> ps_stack; 587 std::vector<parser_state_e> ps_stack;
@@ -661,28 +665,35 @@ JSONParser::decode_string(std::string const&amp; str) @@ -661,28 +665,35 @@ JSONParser::decode_string(std::string const&amp; str)
661 void 665 void
662 JSONParser::getToken() 666 JSONParser::getToken()
663 { 667 {
664 - while (p < end) { 668 + enum { append, ignore, reread } action = append;
  669 + bool ready = false;
  670 + token.clear();
  671 + while (!done) {
  672 + if (p == (buf + bytes)) {
  673 + p = buf;
  674 + bytes = is.read(buf, sizeof(buf));
  675 + if (bytes == 0) {
  676 + done = true;
  677 + break;
  678 + }
  679 + }
  680 +
665 if (*p == 0) { 681 if (*p == 0) {
666 QTC::TC("libtests", "JSON parse null character"); 682 QTC::TC("libtests", "JSON parse null character");
667 throw std::runtime_error( 683 throw std::runtime_error(
668 "JSON: null character at offset " + 684 "JSON: null character at offset " +
669 - QUtil::int_to_string(p - cstr)); 685 + QUtil::uint_to_string(offset));
670 } 686 }
  687 + action = append;
671 switch (lex_state) { 688 switch (lex_state) {
672 case ls_top: 689 case ls_top:
673 if (*p == '"') { 690 if (*p == '"') {
674 - tok_start = p;  
675 - tok_end = nullptr;  
676 lex_state = ls_string; 691 lex_state = ls_string;
677 } else if (QUtil::is_space(*p)) { 692 } else if (QUtil::is_space(*p)) {
678 - // ignore 693 + action = ignore;
679 } else if ((*p >= 'a') && (*p <= 'z')) { 694 } else if ((*p >= 'a') && (*p <= 'z')) {
680 - tok_start = p;  
681 - tok_end = nullptr;  
682 lex_state = ls_alpha; 695 lex_state = ls_alpha;
683 } else if (*p == '-') { 696 } else if (*p == '-') {
684 - tok_start = p;  
685 - tok_end = nullptr;  
686 lex_state = ls_number; 697 lex_state = ls_number;
687 number_before_point = 0; 698 number_before_point = 0;
688 number_after_point = 0; 699 number_after_point = 0;
@@ -690,8 +701,6 @@ JSONParser::getToken() @@ -690,8 +701,6 @@ JSONParser::getToken()
690 number_saw_point = false; 701 number_saw_point = false;
691 number_saw_e = false; 702 number_saw_e = false;
692 } else if ((*p >= '0') && (*p <= '9')) { 703 } else if ((*p >= '0') && (*p <= '9')) {
693 - tok_start = p;  
694 - tok_end = nullptr;  
695 lex_state = ls_number; 704 lex_state = ls_number;
696 number_before_point = 1; 705 number_before_point = 1;
697 number_after_point = 0; 706 number_after_point = 0;
@@ -699,8 +708,6 @@ JSONParser::getToken() @@ -699,8 +708,6 @@ JSONParser::getToken()
699 number_saw_point = false; 708 number_saw_point = false;
700 number_saw_e = false; 709 number_saw_e = false;
701 } else if (*p == '.') { 710 } else if (*p == '.') {
702 - tok_start = p;  
703 - tok_end = nullptr;  
704 lex_state = ls_number; 711 lex_state = ls_number;
705 number_before_point = 0; 712 number_before_point = 0;
706 number_after_point = 0; 713 number_after_point = 0;
@@ -708,12 +715,11 @@ JSONParser::getToken() @@ -708,12 +715,11 @@ JSONParser::getToken()
708 number_saw_point = true; 715 number_saw_point = true;
709 number_saw_e = false; 716 number_saw_e = false;
710 } else if (strchr("{}[]:,", *p)) { 717 } else if (strchr("{}[]:,", *p)) {
711 - tok_start = p;  
712 - tok_end = p + 1; 718 + ready = true;
713 } else { 719 } else {
714 QTC::TC("libtests", "JSON parse bad character"); 720 QTC::TC("libtests", "JSON parse bad character");
715 throw std::runtime_error( 721 throw std::runtime_error(
716 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 722 + "JSON: offset " + QUtil::uint_to_string(offset) +
717 ": unexpected character " + std::string(p, 1)); 723 ": unexpected character " + std::string(p, 1));
718 } 724 }
719 break; 725 break;
@@ -731,12 +737,12 @@ JSONParser::getToken() @@ -731,12 +737,12 @@ JSONParser::getToken()
731 if (number_saw_e) { 737 if (number_saw_e) {
732 QTC::TC("libtests", "JSON parse point after e"); 738 QTC::TC("libtests", "JSON parse point after e");
733 throw std::runtime_error( 739 throw std::runtime_error(
734 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 740 + "JSON: offset " + QUtil::uint_to_string(offset) +
735 ": numeric literal: decimal point after e"); 741 ": numeric literal: decimal point after e");
736 } else if (number_saw_point) { 742 } else if (number_saw_point) {
737 QTC::TC("libtests", "JSON parse duplicate point"); 743 QTC::TC("libtests", "JSON parse duplicate point");
738 throw std::runtime_error( 744 throw std::runtime_error(
739 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 745 + "JSON: offset " + QUtil::uint_to_string(offset) +
740 ": numeric literal: decimal point already seen"); 746 ": numeric literal: decimal point already seen");
741 } else { 747 } else {
742 number_saw_point = true; 748 number_saw_point = true;
@@ -745,7 +751,7 @@ JSONParser::getToken() @@ -745,7 +751,7 @@ JSONParser::getToken()
745 if (number_saw_e) { 751 if (number_saw_e) {
746 QTC::TC("libtests", "JSON parse duplicate e"); 752 QTC::TC("libtests", "JSON parse duplicate e");
747 throw std::runtime_error( 753 throw std::runtime_error(
748 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 754 + "JSON: offset " + QUtil::uint_to_string(offset) +
749 ": numeric literal: e already seen"); 755 ": numeric literal: e already seen");
750 } else { 756 } else {
751 number_saw_e = true; 757 number_saw_e = true;
@@ -756,18 +762,19 @@ JSONParser::getToken() @@ -756,18 +762,19 @@ JSONParser::getToken()
756 } else { 762 } else {
757 QTC::TC("libtests", "JSON parse unexpected sign"); 763 QTC::TC("libtests", "JSON parse unexpected sign");
758 throw std::runtime_error( 764 throw std::runtime_error(
759 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 765 + "JSON: offset " + QUtil::uint_to_string(offset) +
760 ": numeric literal: unexpected sign"); 766 ": numeric literal: unexpected sign");
761 } 767 }
762 } else if (QUtil::is_space(*p)) { 768 } else if (QUtil::is_space(*p)) {
763 - tok_end = p; 769 + action = ignore;
  770 + ready = true;
764 } else if (strchr("{}[]:,", *p)) { 771 } else if (strchr("{}[]:,", *p)) {
765 - tok_end = p;  
766 - --p; 772 + action = reread;
  773 + ready = true;
767 } else { 774 } else {
768 QTC::TC("libtests", "JSON parse numeric bad character"); 775 QTC::TC("libtests", "JSON parse numeric bad character");
769 throw std::runtime_error( 776 throw std::runtime_error(
770 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 777 + "JSON: offset " + QUtil::uint_to_string(offset) +
771 ": numeric literal: unexpected character " + 778 ": numeric literal: unexpected character " +
772 std::string(p, 1)); 779 std::string(p, 1));
773 } 780 }
@@ -777,21 +784,22 @@ JSONParser::getToken() @@ -777,21 +784,22 @@ JSONParser::getToken()
777 if ((*p >= 'a') && (*p <= 'z')) { 784 if ((*p >= 'a') && (*p <= 'z')) {
778 // okay 785 // okay
779 } else if (QUtil::is_space(*p)) { 786 } else if (QUtil::is_space(*p)) {
780 - tok_end = p; 787 + action = ignore;
  788 + ready = true;
781 } else if (strchr("{}[]:,", *p)) { 789 } else if (strchr("{}[]:,", *p)) {
782 - tok_end = p;  
783 - --p; 790 + action = reread;
  791 + ready = true;
784 } else { 792 } else {
785 QTC::TC("libtests", "JSON parse keyword bad character"); 793 QTC::TC("libtests", "JSON parse keyword bad character");
786 throw std::runtime_error( 794 throw std::runtime_error(
787 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 795 + "JSON: offset " + QUtil::uint_to_string(offset) +
788 ": keyword: unexpected character " + std::string(p, 1)); 796 ": keyword: unexpected character " + std::string(p, 1));
789 } 797 }
790 break; 798 break;
791 799
792 case ls_string: 800 case ls_string:
793 if (*p == '"') { 801 if (*p == '"') {
794 - tok_end = p + 1; 802 + ready = true;
795 } else if (*p == '\\') { 803 } else if (*p == '\\') {
796 lex_state = ls_backslash; 804 lex_state = ls_backslash;
797 } 805 }
@@ -802,56 +810,70 @@ JSONParser::getToken() @@ -802,56 +810,70 @@ JSONParser::getToken()
802 if (strchr("\\\"/bfnrt", *p)) { 810 if (strchr("\\\"/bfnrt", *p)) {
803 lex_state = ls_string; 811 lex_state = ls_string;
804 } else if (*p == 'u') { 812 } else if (*p == 'u') {
805 - if (p + 4 >= end) {  
806 - QTC::TC("libtests", "JSON parse premature end of u");  
807 - throw std::runtime_error(  
808 - "JSON: offset " + QUtil::int_to_string(p - cstr) +  
809 - ": \\u must be followed by four characters");  
810 - }  
811 - for (size_t i = 1; i <= 4; ++i) {  
812 - if (!QUtil::is_hex_digit(p[i])) {  
813 - QTC::TC("libtests", "JSON parse bad hex after u");  
814 - throw std::runtime_error(  
815 - "JSON: offset " + QUtil::int_to_string(p - cstr) +  
816 - ": \\u must be followed by four hex digits");  
817 - }  
818 - }  
819 - p += 4;  
820 - lex_state = ls_string; 813 + lex_state = ls_u4;
  814 + u_count = 0;
821 } else { 815 } else {
822 QTC::TC("libtests", "JSON parse backslash bad character"); 816 QTC::TC("libtests", "JSON parse backslash bad character");
823 throw std::runtime_error( 817 throw std::runtime_error(
824 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 818 + "JSON: offset " + QUtil::uint_to_string(offset) +
825 ": invalid character after backslash: " + 819 ": invalid character after backslash: " +
826 std::string(p, 1)); 820 std::string(p, 1));
827 } 821 }
828 break; 822 break;
  823 +
  824 + case ls_u4:
  825 + if (!QUtil::is_hex_digit(*p)) {
  826 + QTC::TC("libtests", "JSON parse bad hex after u");
  827 + throw std::runtime_error(
  828 + "JSON: offset " +
  829 + QUtil::uint_to_string(offset - u_count - 1) +
  830 + ": \\u must be followed by four hex digits");
  831 + }
  832 + if (++u_count == 4) {
  833 + lex_state = ls_string;
  834 + }
  835 + break;
829 } 836 }
830 - ++p;  
831 - if (tok_start && tok_end) { 837 + switch (action) {
  838 + case reread:
  839 + break;
  840 + case append:
  841 + token.append(1, *p);
  842 + // fall through
  843 + case ignore:
  844 + ++p;
  845 + ++offset;
  846 + break;
  847 + }
  848 + if (ready) {
832 break; 849 break;
833 } 850 }
834 } 851 }
835 - if (p == end) {  
836 - if (tok_start && (!tok_end)) { 852 + if (done) {
  853 + if ((!token.empty()) && (!ready)) {
837 switch (lex_state) { 854 switch (lex_state) {
838 case ls_top: 855 case ls_top:
839 // Can't happen 856 // Can't happen
840 - throw std::logic_error(  
841 - "tok_start set in ls_top while parsing " +  
842 - std::string(cstr)); 857 + throw std::logic_error("tok_start set in ls_top while parsing");
843 break; 858 break;
844 859
845 case ls_number: 860 case ls_number:
846 case ls_alpha: 861 case ls_alpha:
847 - tok_end = p; 862 + // okay
848 break; 863 break;
849 864
  865 + case ls_u4:
  866 + QTC::TC("libtests", "JSON parse premature end of u");
  867 + throw std::runtime_error(
  868 + "JSON: offset " +
  869 + QUtil::uint_to_string(offset - u_count - 1) +
  870 + ": \\u must be followed by four characters");
  871 +
850 case ls_string: 872 case ls_string:
851 case ls_backslash: 873 case ls_backslash:
852 QTC::TC("libtests", "JSON parse unterminated string"); 874 QTC::TC("libtests", "JSON parse unterminated string");
853 throw std::runtime_error( 875 throw std::runtime_error(
854 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 876 + "JSON: offset " + QUtil::uint_to_string(offset) +
855 ": unterminated string"); 877 ": unterminated string");
856 break; 878 break;
857 } 879 }
@@ -862,28 +884,25 @@ JSONParser::getToken() @@ -862,28 +884,25 @@ JSONParser::getToken()
862 void 884 void
863 JSONParser::handleToken() 885 JSONParser::handleToken()
864 { 886 {
865 - if (!(tok_start && tok_end)) { 887 + if (token.empty()) {
866 return; 888 return;
867 } 889 }
868 890
869 - // Get token value.  
870 - std::string value(tok_start, tok_end);  
871 -  
872 if (parser_state == ps_done) { 891 if (parser_state == ps_done) {
873 QTC::TC("libtests", "JSON parse junk after object"); 892 QTC::TC("libtests", "JSON parse junk after object");
874 throw std::runtime_error( 893 throw std::runtime_error(
875 - "JSON: offset " + QUtil::int_to_string(p - cstr) +  
876 - ": material follows end of object: " + value); 894 + "JSON: offset " + QUtil::uint_to_string(offset) +
  895 + ": material follows end of object: " + token);
877 } 896 }
878 897
879 // Git string value 898 // Git string value
880 std::string s_value; 899 std::string s_value;
881 if (lex_state == ls_string) { 900 if (lex_state == ls_string) {
882 // Token includes the quotation marks 901 // Token includes the quotation marks
883 - if (tok_end - tok_start < 2) { 902 + if (token.length() < 2) {
884 throw std::logic_error("JSON string length < 2"); 903 throw std::logic_error("JSON string length < 2");
885 } 904 }
886 - s_value = decode_string(value); 905 + s_value = decode_string(token);
887 } 906 }
888 // Based on the lexical state and value, figure out whether we are 907 // Based on the lexical state and value, figure out whether we are
889 // looking at an item or a delimiter. It will always be exactly 908 // looking at an item or a delimiter. It will always be exactly
@@ -891,12 +910,14 @@ JSONParser::handleToken() @@ -891,12 +910,14 @@ JSONParser::handleToken()
891 910
892 std::shared_ptr<JSON> item; 911 std::shared_ptr<JSON> item;
893 char delimiter = '\0'; 912 char delimiter = '\0';
  913 + // Already verified that token is not empty
  914 + char first_char = token.at(0);
894 switch (lex_state) { 915 switch (lex_state) {
895 case ls_top: 916 case ls_top:
896 - switch (*tok_start) { 917 + switch (first_char) {
897 case '{': 918 case '{':
898 item = std::make_shared<JSON>(JSON::makeDictionary()); 919 item = std::make_shared<JSON>(JSON::makeDictionary());
899 - item->setStart(QIntC::to_size(tok_start - cstr)); 920 + item->setStart(offset - token.length());
900 if (reactor) { 921 if (reactor) {
901 reactor->dictionaryStart(); 922 reactor->dictionaryStart();
902 } 923 }
@@ -904,14 +925,14 @@ JSONParser::handleToken() @@ -904,14 +925,14 @@ JSONParser::handleToken()
904 925
905 case '[': 926 case '[':
906 item = std::make_shared<JSON>(JSON::makeArray()); 927 item = std::make_shared<JSON>(JSON::makeArray());
907 - item->setStart(QIntC::to_size(tok_start - cstr)); 928 + item->setStart(offset - token.length());
908 if (reactor) { 929 if (reactor) {
909 reactor->arrayStart(); 930 reactor->arrayStart();
910 } 931 }
911 break; 932 break;
912 933
913 default: 934 default:
914 - delimiter = *tok_start; 935 + delimiter = first_char;
915 break; 936 break;
916 } 937 }
917 break; 938 break;
@@ -920,38 +941,38 @@ JSONParser::handleToken() @@ -920,38 +941,38 @@ JSONParser::handleToken()
920 if (number_saw_point && (number_after_point == 0)) { 941 if (number_saw_point && (number_after_point == 0)) {
921 QTC::TC("libtests", "JSON parse decimal with no digits"); 942 QTC::TC("libtests", "JSON parse decimal with no digits");
922 throw std::runtime_error( 943 throw std::runtime_error(
923 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 944 + "JSON: offset " + QUtil::uint_to_string(offset) +
924 ": decimal point with no digits"); 945 ": decimal point with no digits");
925 } 946 }
926 if ((number_before_point > 1) && 947 if ((number_before_point > 1) &&
927 - ((tok_start[0] == '0') ||  
928 - ((tok_start[0] == '-') && (tok_start[1] == '0')))) { 948 + ((first_char == '0') ||
  949 + ((first_char == '-') && (token.at(1) == '0')))) {
929 QTC::TC("libtests", "JSON parse leading zero"); 950 QTC::TC("libtests", "JSON parse leading zero");
930 throw std::runtime_error( 951 throw std::runtime_error(
931 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 952 + "JSON: offset " + QUtil::uint_to_string(offset) +
932 ": number with leading zero"); 953 ": number with leading zero");
933 } 954 }
934 if ((number_before_point == 0) && (number_after_point == 0)) { 955 if ((number_before_point == 0) && (number_after_point == 0)) {
935 QTC::TC("libtests", "JSON parse number no digits"); 956 QTC::TC("libtests", "JSON parse number no digits");
936 throw std::runtime_error( 957 throw std::runtime_error(
937 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 958 + "JSON: offset " + QUtil::uint_to_string(offset) +
938 ": number with no digits"); 959 ": number with no digits");
939 } 960 }
940 - item = std::make_shared<JSON>(JSON::makeNumber(value)); 961 + item = std::make_shared<JSON>(JSON::makeNumber(token));
941 break; 962 break;
942 963
943 case ls_alpha: 964 case ls_alpha:
944 - if (value == "true") { 965 + if (token == "true") {
945 item = std::make_shared<JSON>(JSON::makeBool(true)); 966 item = std::make_shared<JSON>(JSON::makeBool(true));
946 - } else if (value == "false") { 967 + } else if (token == "false") {
947 item = std::make_shared<JSON>(JSON::makeBool(false)); 968 item = std::make_shared<JSON>(JSON::makeBool(false));
948 - } else if (value == "null") { 969 + } else if (token == "null") {
949 item = std::make_shared<JSON>(JSON::makeNull()); 970 item = std::make_shared<JSON>(JSON::makeNull());
950 } else { 971 } else {
951 QTC::TC("libtests", "JSON parse invalid keyword"); 972 QTC::TC("libtests", "JSON parse invalid keyword");
952 throw std::runtime_error( 973 throw std::runtime_error(
953 - "JSON: offset " + QUtil::int_to_string(p - cstr) +  
954 - ": invalid keyword " + value); 974 + "JSON: offset " + QUtil::uint_to_string(offset) +
  975 + ": invalid keyword " + token);
955 } 976 }
956 break; 977 break;
957 978
@@ -960,7 +981,9 @@ JSONParser::handleToken() @@ -960,7 +981,9 @@ JSONParser::handleToken()
960 break; 981 break;
961 982
962 case ls_backslash: 983 case ls_backslash:
963 - throw std::logic_error("tok_end is set while state = ls_backslash"); 984 + case ls_u4:
  985 + throw std::logic_error(
  986 + "tok_end is set while state = ls_backslash or ls_u4");
964 break; 987 break;
965 } 988 }
966 989
@@ -981,21 +1004,21 @@ JSONParser::handleToken() @@ -981,21 +1004,21 @@ JSONParser::handleToken()
981 case ps_dict_after_key: 1004 case ps_dict_after_key:
982 QTC::TC("libtests", "JSON parse expected colon"); 1005 QTC::TC("libtests", "JSON parse expected colon");
983 throw std::runtime_error( 1006 throw std::runtime_error(
984 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 1007 + "JSON: offset " + QUtil::uint_to_string(offset) +
985 ": expected ':'"); 1008 ": expected ':'");
986 break; 1009 break;
987 1010
988 case ps_dict_after_item: 1011 case ps_dict_after_item:
989 QTC::TC("libtests", "JSON parse expected , or }"); 1012 QTC::TC("libtests", "JSON parse expected , or }");
990 throw std::runtime_error( 1013 throw std::runtime_error(
991 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 1014 + "JSON: offset " + QUtil::uint_to_string(offset) +
992 ": expected ',' or '}'"); 1015 ": expected ',' or '}'");
993 break; 1016 break;
994 1017
995 case ps_array_after_item: 1018 case ps_array_after_item:
996 QTC::TC("libtests", "JSON parse expected, or ]"); 1019 QTC::TC("libtests", "JSON parse expected, or ]");
997 throw std::runtime_error( 1020 throw std::runtime_error(
998 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 1021 + "JSON: offset " + QUtil::uint_to_string(offset) +
999 ": expected ',' or ']'"); 1022 ": expected ',' or ']'");
1000 break; 1023 break;
1001 1024
@@ -1004,7 +1027,7 @@ JSONParser::handleToken() @@ -1004,7 +1027,7 @@ JSONParser::handleToken()
1004 if (lex_state != ls_string) { 1027 if (lex_state != ls_string) {
1005 QTC::TC("libtests", "JSON parse string as dict key"); 1028 QTC::TC("libtests", "JSON parse string as dict key");
1006 throw std::runtime_error( 1029 throw std::runtime_error(
1007 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 1030 + "JSON: offset " + QUtil::uint_to_string(offset) +
1008 ": expect string as dictionary key"); 1031 ": expect string as dictionary key");
1009 } 1032 }
1010 break; 1033 break;
@@ -1023,7 +1046,7 @@ JSONParser::handleToken() @@ -1023,7 +1046,7 @@ JSONParser::handleToken()
1023 { 1046 {
1024 QTC::TC("libtests", "JSON parse unexpected }"); 1047 QTC::TC("libtests", "JSON parse unexpected }");
1025 throw std::runtime_error( 1048 throw std::runtime_error(
1026 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 1049 + "JSON: offset " + QUtil::uint_to_string(offset) +
1027 ": unexpected dictionary end delimiter"); 1050 ": unexpected dictionary end delimiter");
1028 } 1051 }
1029 } else if (delimiter == ']') { 1052 } else if (delimiter == ']') {
@@ -1033,14 +1056,14 @@ JSONParser::handleToken() @@ -1033,14 +1056,14 @@ JSONParser::handleToken()
1033 { 1056 {
1034 QTC::TC("libtests", "JSON parse unexpected ]"); 1057 QTC::TC("libtests", "JSON parse unexpected ]");
1035 throw std::runtime_error( 1058 throw std::runtime_error(
1036 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 1059 + "JSON: offset " + QUtil::uint_to_string(offset) +
1037 ": unexpected array end delimiter"); 1060 ": unexpected array end delimiter");
1038 } 1061 }
1039 } else if (delimiter == ':') { 1062 } else if (delimiter == ':') {
1040 if (parser_state != ps_dict_after_key) { 1063 if (parser_state != ps_dict_after_key) {
1041 QTC::TC("libtests", "JSON parse unexpected :"); 1064 QTC::TC("libtests", "JSON parse unexpected :");
1042 throw std::runtime_error( 1065 throw std::runtime_error(
1043 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 1066 + "JSON: offset " + QUtil::uint_to_string(offset) +
1044 ": unexpected colon"); 1067 ": unexpected colon");
1045 } 1068 }
1046 } else if (delimiter == ',') { 1069 } else if (delimiter == ',') {
@@ -1048,7 +1071,7 @@ JSONParser::handleToken() @@ -1048,7 +1071,7 @@ JSONParser::handleToken()
1048 (parser_state == ps_array_after_item))) { 1071 (parser_state == ps_array_after_item))) {
1049 QTC::TC("libtests", "JSON parse unexpected ,"); 1072 QTC::TC("libtests", "JSON parse unexpected ,");
1050 throw std::runtime_error( 1073 throw std::runtime_error(
1051 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 1074 + "JSON: offset " + QUtil::uint_to_string(offset) +
1052 ": unexpected comma"); 1075 ": unexpected comma");
1053 } 1076 }
1054 } else if (delimiter != '\0') { 1077 } else if (delimiter != '\0') {
@@ -1074,7 +1097,7 @@ JSONParser::handleToken() @@ -1074,7 +1097,7 @@ JSONParser::handleToken()
1074 next_state = ps_stack.back(); 1097 next_state = ps_stack.back();
1075 ps_stack.pop_back(); 1098 ps_stack.pop_back();
1076 auto tos = stack.back(); 1099 auto tos = stack.back();
1077 - tos->setEnd(QIntC::to_size(tok_end - cstr)); 1100 + tos->setEnd(offset);
1078 if (reactor) { 1101 if (reactor) {
1079 reactor->containerEnd(*tos); 1102 reactor->containerEnd(*tos);
1080 } 1103 }
@@ -1086,8 +1109,8 @@ JSONParser::handleToken() @@ -1086,8 +1109,8 @@ JSONParser::handleToken()
1086 "JSONParser::handleToken: unexpected delimiter in transition"); 1109 "JSONParser::handleToken: unexpected delimiter in transition");
1087 } else if (item.get()) { 1110 } else if (item.get()) {
1088 if (!(item->isArray() || item->isDictionary())) { 1111 if (!(item->isArray() || item->isDictionary())) {
1089 - item->setStart(QIntC::to_size(tok_start - cstr));  
1090 - item->setEnd(QIntC::to_size(tok_end - cstr)); 1112 + item->setStart(offset - token.length());
  1113 + item->setEnd(offset);
1091 } 1114 }
1092 1115
1093 std::shared_ptr<JSON> tos; 1116 std::shared_ptr<JSON> tos;
@@ -1149,23 +1172,17 @@ JSONParser::handleToken() @@ -1149,23 +1172,17 @@ JSONParser::handleToken()
1149 } 1172 }
1150 if (ps_stack.size() > 500) { 1173 if (ps_stack.size() > 500) {
1151 throw std::runtime_error( 1174 throw std::runtime_error(
1152 - "JSON: offset " + QUtil::int_to_string(p - cstr) + 1175 + "JSON: offset " + QUtil::uint_to_string(offset) +
1153 ": maximum object depth exceeded"); 1176 ": maximum object depth exceeded");
1154 } 1177 }
1155 parser_state = next_state; 1178 parser_state = next_state;
1156 - tok_start = nullptr;  
1157 - tok_end = nullptr;  
1158 lex_state = ls_top; 1179 lex_state = ls_top;
1159 } 1180 }
1160 1181
1161 std::shared_ptr<JSON> 1182 std::shared_ptr<JSON>
1162 -JSONParser::parse(std::string const& s) 1183 +JSONParser::parse()
1163 { 1184 {
1164 - cstr = s.c_str();  
1165 - end = cstr + s.length();  
1166 - p = cstr;  
1167 -  
1168 - while (p < end) { 1185 + while (!done) {
1169 getToken(); 1186 getToken();
1170 handleToken(); 1187 handleToken();
1171 } 1188 }
@@ -1181,10 +1198,18 @@ JSONParser::parse(std::string const&amp; s) @@ -1181,10 +1198,18 @@ JSONParser::parse(std::string const&amp; s)
1181 } 1198 }
1182 1199
1183 JSON 1200 JSON
1184 -JSON::parse(std::string const& s, Reactor* reactor) 1201 +JSON::parse(InputSource& is, Reactor* reactor)
  1202 +{
  1203 + JSONParser jp(is, reactor);
  1204 + return *jp.parse();
  1205 +}
  1206 +
  1207 +JSON
  1208 +JSON::parse(std::string const& s)
1185 { 1209 {
1186 - JSONParser jp(reactor);  
1187 - return *jp.parse(s); 1210 + BufferInputSource bis("json input", s);
  1211 + JSONParser jp(bis, nullptr);
  1212 + return *jp.parse();
1188 } 1213 }
1189 1214
1190 void 1215 void
libtests/json_parse.cc
  1 +#include <qpdf/FileInputSource.hh>
1 #include <qpdf/JSON.hh> 2 #include <qpdf/JSON.hh>
2 #include <qpdf/QUtil.hh> 3 #include <qpdf/QUtil.hh>
3 #include <cstdlib> 4 #include <cstdlib>
@@ -103,11 +104,8 @@ main(int argc, char* argv[]) @@ -103,11 +104,8 @@ main(int argc, char* argv[])
103 } 104 }
104 } 105 }
105 try { 106 try {
106 - std::shared_ptr<char> buf;  
107 - size_t size;  
108 - QUtil::read_file_into_memory(filename, buf, size);  
109 - std::string s(buf.get(), size);  
110 - std::cout << JSON::parse(s, reactor.get()).unparse() << std::endl; 107 + FileInputSource is(filename);
  108 + std::cout << JSON::parse(is, reactor.get()).unparse() << std::endl;
111 } catch (std::exception& e) { 109 } catch (std::exception& e) {
112 std::cerr << "exception: " << filename << ": " << e.what() << std::endl; 110 std::cerr << "exception: " << filename << ": " << e.what() << std::endl;
113 return 2; 111 return 2;