Commit 05fda4afa289ef248804865d7648c9ac3ae75fbd
1 parent
e5f3910c
Change JSON parser to parse from an InputSource
Showing
4 changed files
with
150 additions
and
126 deletions
TODO
| ... | ... | @@ -51,9 +51,6 @@ library, when context is available, to have a pipeline rather than a |
| 51 | 51 | FILE* or std::ostream. This makes it possible for people to capture |
| 52 | 52 | output more flexibly. |
| 53 | 53 | |
| 54 | -JSON::parse should work from an InputSource. BufferInputSource can | |
| 55 | -already start with a std::string. | |
| 56 | - | |
| 57 | 54 | Have a json blob defined by a function that takes a pipeline and |
| 58 | 55 | writes data to the pipeline. It's writer should create a Pl_Base64 -> |
| 59 | 56 | Pl_Concatenate in front of the pipeline passed to write and call the | ... | ... |
include/qpdf/JSON.hh
| ... | ... | @@ -46,6 +46,7 @@ |
| 46 | 46 | #include <vector> |
| 47 | 47 | |
| 48 | 48 | class Pipeline; |
| 49 | +class InputSource; | |
| 49 | 50 | |
| 50 | 51 | class JSON |
| 51 | 52 | { |
| ... | ... | @@ -249,10 +250,13 @@ class JSON |
| 249 | 250 | virtual bool arrayItem(JSON const& value) = 0; |
| 250 | 251 | }; |
| 251 | 252 | |
| 252 | - // Create a JSON object from a string. See above for information | |
| 253 | - // about how to use the Reactor. | |
| 253 | + // Create a JSON object from a string. | |
| 254 | 254 | QPDF_DLL |
| 255 | - static JSON parse(std::string const&, Reactor* reactor = nullptr); | |
| 255 | + static JSON parse(std::string const&); | |
| 256 | + // Create a JSON object from an input source. See above for | |
| 257 | + // information about how to use the Reactor. | |
| 258 | + QPDF_DLL | |
| 259 | + static JSON parse(InputSource&, Reactor* reactor = nullptr); | |
| 256 | 260 | |
| 257 | 261 | // parse calls setOffsets to set the inclusive start and |
| 258 | 262 | // non-inclusive end offsets of an object relative to its input | ... | ... |
libqpdf/JSON.cc
| 1 | 1 | #include <qpdf/JSON.hh> |
| 2 | 2 | |
| 3 | -#include <qpdf/Pipeline.hh> | |
| 3 | +#include <qpdf/BufferInputSource.hh> | |
| 4 | 4 | #include <qpdf/Pl_String.hh> |
| 5 | -#include <qpdf/QIntC.hh> | |
| 6 | 5 | #include <qpdf/QTC.hh> |
| 7 | 6 | #include <qpdf/QUtil.hh> |
| 8 | 7 | #include <cstring> |
| ... | ... | @@ -521,7 +520,8 @@ namespace |
| 521 | 520 | class JSONParser |
| 522 | 521 | { |
| 523 | 522 | public: |
| 524 | - JSONParser(JSON::Reactor* reactor) : | |
| 523 | + JSONParser(InputSource& is, JSON::Reactor* reactor) : | |
| 524 | + is(is), | |
| 525 | 525 | reactor(reactor), |
| 526 | 526 | lex_state(ls_top), |
| 527 | 527 | number_before_point(0), |
| ... | ... | @@ -529,16 +529,16 @@ namespace |
| 529 | 529 | number_after_e(0), |
| 530 | 530 | number_saw_point(false), |
| 531 | 531 | number_saw_e(false), |
| 532 | - cstr(nullptr), | |
| 533 | - end(nullptr), | |
| 534 | - tok_start(nullptr), | |
| 535 | - tok_end(nullptr), | |
| 536 | - p(nullptr), | |
| 532 | + bytes(0), | |
| 533 | + p(buf), | |
| 534 | + u_count(0), | |
| 535 | + offset(0), | |
| 536 | + done(false), | |
| 537 | 537 | parser_state(ps_top) |
| 538 | 538 | { |
| 539 | 539 | } |
| 540 | 540 | |
| 541 | - std::shared_ptr<JSON> parse(std::string const& s); | |
| 541 | + std::shared_ptr<JSON> parse(); | |
| 542 | 542 | |
| 543 | 543 | private: |
| 544 | 544 | void getToken(); |
| ... | ... | @@ -564,8 +564,10 @@ namespace |
| 564 | 564 | ls_alpha, |
| 565 | 565 | ls_string, |
| 566 | 566 | ls_backslash, |
| 567 | + ls_u4, | |
| 567 | 568 | }; |
| 568 | 569 | |
| 570 | + InputSource& is; | |
| 569 | 571 | JSON::Reactor* reactor; |
| 570 | 572 | lex_state_e lex_state; |
| 571 | 573 | size_t number_before_point; |
| ... | ... | @@ -573,11 +575,13 @@ namespace |
| 573 | 575 | size_t number_after_e; |
| 574 | 576 | bool number_saw_point; |
| 575 | 577 | bool number_saw_e; |
| 576 | - char const* cstr; | |
| 577 | - char const* end; | |
| 578 | - char const* tok_start; | |
| 579 | - char const* tok_end; | |
| 578 | + char buf[16384]; | |
| 579 | + size_t bytes; | |
| 580 | 580 | char const* p; |
| 581 | + size_t u_count; | |
| 582 | + size_t offset; | |
| 583 | + bool done; | |
| 584 | + std::string token; | |
| 581 | 585 | parser_state_e parser_state; |
| 582 | 586 | std::vector<std::shared_ptr<JSON>> stack; |
| 583 | 587 | std::vector<parser_state_e> ps_stack; |
| ... | ... | @@ -661,28 +665,35 @@ JSONParser::decode_string(std::string const& str) |
| 661 | 665 | void |
| 662 | 666 | JSONParser::getToken() |
| 663 | 667 | { |
| 664 | - while (p < end) { | |
| 668 | + enum { append, ignore, reread } action = append; | |
| 669 | + bool ready = false; | |
| 670 | + token.clear(); | |
| 671 | + while (!done) { | |
| 672 | + if (p == (buf + bytes)) { | |
| 673 | + p = buf; | |
| 674 | + bytes = is.read(buf, sizeof(buf)); | |
| 675 | + if (bytes == 0) { | |
| 676 | + done = true; | |
| 677 | + break; | |
| 678 | + } | |
| 679 | + } | |
| 680 | + | |
| 665 | 681 | if (*p == 0) { |
| 666 | 682 | QTC::TC("libtests", "JSON parse null character"); |
| 667 | 683 | throw std::runtime_error( |
| 668 | 684 | "JSON: null character at offset " + |
| 669 | - QUtil::int_to_string(p - cstr)); | |
| 685 | + QUtil::uint_to_string(offset)); | |
| 670 | 686 | } |
| 687 | + action = append; | |
| 671 | 688 | switch (lex_state) { |
| 672 | 689 | case ls_top: |
| 673 | 690 | if (*p == '"') { |
| 674 | - tok_start = p; | |
| 675 | - tok_end = nullptr; | |
| 676 | 691 | lex_state = ls_string; |
| 677 | 692 | } else if (QUtil::is_space(*p)) { |
| 678 | - // ignore | |
| 693 | + action = ignore; | |
| 679 | 694 | } else if ((*p >= 'a') && (*p <= 'z')) { |
| 680 | - tok_start = p; | |
| 681 | - tok_end = nullptr; | |
| 682 | 695 | lex_state = ls_alpha; |
| 683 | 696 | } else if (*p == '-') { |
| 684 | - tok_start = p; | |
| 685 | - tok_end = nullptr; | |
| 686 | 697 | lex_state = ls_number; |
| 687 | 698 | number_before_point = 0; |
| 688 | 699 | number_after_point = 0; |
| ... | ... | @@ -690,8 +701,6 @@ JSONParser::getToken() |
| 690 | 701 | number_saw_point = false; |
| 691 | 702 | number_saw_e = false; |
| 692 | 703 | } else if ((*p >= '0') && (*p <= '9')) { |
| 693 | - tok_start = p; | |
| 694 | - tok_end = nullptr; | |
| 695 | 704 | lex_state = ls_number; |
| 696 | 705 | number_before_point = 1; |
| 697 | 706 | number_after_point = 0; |
| ... | ... | @@ -699,8 +708,6 @@ JSONParser::getToken() |
| 699 | 708 | number_saw_point = false; |
| 700 | 709 | number_saw_e = false; |
| 701 | 710 | } else if (*p == '.') { |
| 702 | - tok_start = p; | |
| 703 | - tok_end = nullptr; | |
| 704 | 711 | lex_state = ls_number; |
| 705 | 712 | number_before_point = 0; |
| 706 | 713 | number_after_point = 0; |
| ... | ... | @@ -708,12 +715,11 @@ JSONParser::getToken() |
| 708 | 715 | number_saw_point = true; |
| 709 | 716 | number_saw_e = false; |
| 710 | 717 | } else if (strchr("{}[]:,", *p)) { |
| 711 | - tok_start = p; | |
| 712 | - tok_end = p + 1; | |
| 718 | + ready = true; | |
| 713 | 719 | } else { |
| 714 | 720 | QTC::TC("libtests", "JSON parse bad character"); |
| 715 | 721 | throw std::runtime_error( |
| 716 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 722 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 717 | 723 | ": unexpected character " + std::string(p, 1)); |
| 718 | 724 | } |
| 719 | 725 | break; |
| ... | ... | @@ -731,12 +737,12 @@ JSONParser::getToken() |
| 731 | 737 | if (number_saw_e) { |
| 732 | 738 | QTC::TC("libtests", "JSON parse point after e"); |
| 733 | 739 | throw std::runtime_error( |
| 734 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 740 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 735 | 741 | ": numeric literal: decimal point after e"); |
| 736 | 742 | } else if (number_saw_point) { |
| 737 | 743 | QTC::TC("libtests", "JSON parse duplicate point"); |
| 738 | 744 | throw std::runtime_error( |
| 739 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 745 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 740 | 746 | ": numeric literal: decimal point already seen"); |
| 741 | 747 | } else { |
| 742 | 748 | number_saw_point = true; |
| ... | ... | @@ -745,7 +751,7 @@ JSONParser::getToken() |
| 745 | 751 | if (number_saw_e) { |
| 746 | 752 | QTC::TC("libtests", "JSON parse duplicate e"); |
| 747 | 753 | throw std::runtime_error( |
| 748 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 754 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 749 | 755 | ": numeric literal: e already seen"); |
| 750 | 756 | } else { |
| 751 | 757 | number_saw_e = true; |
| ... | ... | @@ -756,18 +762,19 @@ JSONParser::getToken() |
| 756 | 762 | } else { |
| 757 | 763 | QTC::TC("libtests", "JSON parse unexpected sign"); |
| 758 | 764 | throw std::runtime_error( |
| 759 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 765 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 760 | 766 | ": numeric literal: unexpected sign"); |
| 761 | 767 | } |
| 762 | 768 | } else if (QUtil::is_space(*p)) { |
| 763 | - tok_end = p; | |
| 769 | + action = ignore; | |
| 770 | + ready = true; | |
| 764 | 771 | } else if (strchr("{}[]:,", *p)) { |
| 765 | - tok_end = p; | |
| 766 | - --p; | |
| 772 | + action = reread; | |
| 773 | + ready = true; | |
| 767 | 774 | } else { |
| 768 | 775 | QTC::TC("libtests", "JSON parse numeric bad character"); |
| 769 | 776 | throw std::runtime_error( |
| 770 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 777 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 771 | 778 | ": numeric literal: unexpected character " + |
| 772 | 779 | std::string(p, 1)); |
| 773 | 780 | } |
| ... | ... | @@ -777,21 +784,22 @@ JSONParser::getToken() |
| 777 | 784 | if ((*p >= 'a') && (*p <= 'z')) { |
| 778 | 785 | // okay |
| 779 | 786 | } else if (QUtil::is_space(*p)) { |
| 780 | - tok_end = p; | |
| 787 | + action = ignore; | |
| 788 | + ready = true; | |
| 781 | 789 | } else if (strchr("{}[]:,", *p)) { |
| 782 | - tok_end = p; | |
| 783 | - --p; | |
| 790 | + action = reread; | |
| 791 | + ready = true; | |
| 784 | 792 | } else { |
| 785 | 793 | QTC::TC("libtests", "JSON parse keyword bad character"); |
| 786 | 794 | throw std::runtime_error( |
| 787 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 795 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 788 | 796 | ": keyword: unexpected character " + std::string(p, 1)); |
| 789 | 797 | } |
| 790 | 798 | break; |
| 791 | 799 | |
| 792 | 800 | case ls_string: |
| 793 | 801 | if (*p == '"') { |
| 794 | - tok_end = p + 1; | |
| 802 | + ready = true; | |
| 795 | 803 | } else if (*p == '\\') { |
| 796 | 804 | lex_state = ls_backslash; |
| 797 | 805 | } |
| ... | ... | @@ -802,56 +810,70 @@ JSONParser::getToken() |
| 802 | 810 | if (strchr("\\\"/bfnrt", *p)) { |
| 803 | 811 | lex_state = ls_string; |
| 804 | 812 | } else if (*p == 'u') { |
| 805 | - if (p + 4 >= end) { | |
| 806 | - QTC::TC("libtests", "JSON parse premature end of u"); | |
| 807 | - throw std::runtime_error( | |
| 808 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 809 | - ": \\u must be followed by four characters"); | |
| 810 | - } | |
| 811 | - for (size_t i = 1; i <= 4; ++i) { | |
| 812 | - if (!QUtil::is_hex_digit(p[i])) { | |
| 813 | - QTC::TC("libtests", "JSON parse bad hex after u"); | |
| 814 | - throw std::runtime_error( | |
| 815 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 816 | - ": \\u must be followed by four hex digits"); | |
| 817 | - } | |
| 818 | - } | |
| 819 | - p += 4; | |
| 820 | - lex_state = ls_string; | |
| 813 | + lex_state = ls_u4; | |
| 814 | + u_count = 0; | |
| 821 | 815 | } else { |
| 822 | 816 | QTC::TC("libtests", "JSON parse backslash bad character"); |
| 823 | 817 | throw std::runtime_error( |
| 824 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 818 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 825 | 819 | ": invalid character after backslash: " + |
| 826 | 820 | std::string(p, 1)); |
| 827 | 821 | } |
| 828 | 822 | break; |
| 823 | + | |
| 824 | + case ls_u4: | |
| 825 | + if (!QUtil::is_hex_digit(*p)) { | |
| 826 | + QTC::TC("libtests", "JSON parse bad hex after u"); | |
| 827 | + throw std::runtime_error( | |
| 828 | + "JSON: offset " + | |
| 829 | + QUtil::uint_to_string(offset - u_count - 1) + | |
| 830 | + ": \\u must be followed by four hex digits"); | |
| 831 | + } | |
| 832 | + if (++u_count == 4) { | |
| 833 | + lex_state = ls_string; | |
| 834 | + } | |
| 835 | + break; | |
| 829 | 836 | } |
| 830 | - ++p; | |
| 831 | - if (tok_start && tok_end) { | |
| 837 | + switch (action) { | |
| 838 | + case reread: | |
| 839 | + break; | |
| 840 | + case append: | |
| 841 | + token.append(1, *p); | |
| 842 | + // fall through | |
| 843 | + case ignore: | |
| 844 | + ++p; | |
| 845 | + ++offset; | |
| 846 | + break; | |
| 847 | + } | |
| 848 | + if (ready) { | |
| 832 | 849 | break; |
| 833 | 850 | } |
| 834 | 851 | } |
| 835 | - if (p == end) { | |
| 836 | - if (tok_start && (!tok_end)) { | |
| 852 | + if (done) { | |
| 853 | + if ((!token.empty()) && (!ready)) { | |
| 837 | 854 | switch (lex_state) { |
| 838 | 855 | case ls_top: |
| 839 | 856 | // Can't happen |
| 840 | - throw std::logic_error( | |
| 841 | - "tok_start set in ls_top while parsing " + | |
| 842 | - std::string(cstr)); | |
| 857 | + throw std::logic_error("tok_start set in ls_top while parsing"); | |
| 843 | 858 | break; |
| 844 | 859 | |
| 845 | 860 | case ls_number: |
| 846 | 861 | case ls_alpha: |
| 847 | - tok_end = p; | |
| 862 | + // okay | |
| 848 | 863 | break; |
| 849 | 864 | |
| 865 | + case ls_u4: | |
| 866 | + QTC::TC("libtests", "JSON parse premature end of u"); | |
| 867 | + throw std::runtime_error( | |
| 868 | + "JSON: offset " + | |
| 869 | + QUtil::uint_to_string(offset - u_count - 1) + | |
| 870 | + ": \\u must be followed by four characters"); | |
| 871 | + | |
| 850 | 872 | case ls_string: |
| 851 | 873 | case ls_backslash: |
| 852 | 874 | QTC::TC("libtests", "JSON parse unterminated string"); |
| 853 | 875 | throw std::runtime_error( |
| 854 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 876 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 855 | 877 | ": unterminated string"); |
| 856 | 878 | break; |
| 857 | 879 | } |
| ... | ... | @@ -862,28 +884,25 @@ JSONParser::getToken() |
| 862 | 884 | void |
| 863 | 885 | JSONParser::handleToken() |
| 864 | 886 | { |
| 865 | - if (!(tok_start && tok_end)) { | |
| 887 | + if (token.empty()) { | |
| 866 | 888 | return; |
| 867 | 889 | } |
| 868 | 890 | |
| 869 | - // Get token value. | |
| 870 | - std::string value(tok_start, tok_end); | |
| 871 | - | |
| 872 | 891 | if (parser_state == ps_done) { |
| 873 | 892 | QTC::TC("libtests", "JSON parse junk after object"); |
| 874 | 893 | throw std::runtime_error( |
| 875 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 876 | - ": material follows end of object: " + value); | |
| 894 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 895 | + ": material follows end of object: " + token); | |
| 877 | 896 | } |
| 878 | 897 | |
| 879 | 898 | // Git string value |
| 880 | 899 | std::string s_value; |
| 881 | 900 | if (lex_state == ls_string) { |
| 882 | 901 | // Token includes the quotation marks |
| 883 | - if (tok_end - tok_start < 2) { | |
| 902 | + if (token.length() < 2) { | |
| 884 | 903 | throw std::logic_error("JSON string length < 2"); |
| 885 | 904 | } |
| 886 | - s_value = decode_string(value); | |
| 905 | + s_value = decode_string(token); | |
| 887 | 906 | } |
| 888 | 907 | // Based on the lexical state and value, figure out whether we are |
| 889 | 908 | // looking at an item or a delimiter. It will always be exactly |
| ... | ... | @@ -891,12 +910,14 @@ JSONParser::handleToken() |
| 891 | 910 | |
| 892 | 911 | std::shared_ptr<JSON> item; |
| 893 | 912 | char delimiter = '\0'; |
| 913 | + // Already verified that token is not empty | |
| 914 | + char first_char = token.at(0); | |
| 894 | 915 | switch (lex_state) { |
| 895 | 916 | case ls_top: |
| 896 | - switch (*tok_start) { | |
| 917 | + switch (first_char) { | |
| 897 | 918 | case '{': |
| 898 | 919 | item = std::make_shared<JSON>(JSON::makeDictionary()); |
| 899 | - item->setStart(QIntC::to_size(tok_start - cstr)); | |
| 920 | + item->setStart(offset - token.length()); | |
| 900 | 921 | if (reactor) { |
| 901 | 922 | reactor->dictionaryStart(); |
| 902 | 923 | } |
| ... | ... | @@ -904,14 +925,14 @@ JSONParser::handleToken() |
| 904 | 925 | |
| 905 | 926 | case '[': |
| 906 | 927 | item = std::make_shared<JSON>(JSON::makeArray()); |
| 907 | - item->setStart(QIntC::to_size(tok_start - cstr)); | |
| 928 | + item->setStart(offset - token.length()); | |
| 908 | 929 | if (reactor) { |
| 909 | 930 | reactor->arrayStart(); |
| 910 | 931 | } |
| 911 | 932 | break; |
| 912 | 933 | |
| 913 | 934 | default: |
| 914 | - delimiter = *tok_start; | |
| 935 | + delimiter = first_char; | |
| 915 | 936 | break; |
| 916 | 937 | } |
| 917 | 938 | break; |
| ... | ... | @@ -920,38 +941,38 @@ JSONParser::handleToken() |
| 920 | 941 | if (number_saw_point && (number_after_point == 0)) { |
| 921 | 942 | QTC::TC("libtests", "JSON parse decimal with no digits"); |
| 922 | 943 | throw std::runtime_error( |
| 923 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 944 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 924 | 945 | ": decimal point with no digits"); |
| 925 | 946 | } |
| 926 | 947 | if ((number_before_point > 1) && |
| 927 | - ((tok_start[0] == '0') || | |
| 928 | - ((tok_start[0] == '-') && (tok_start[1] == '0')))) { | |
| 948 | + ((first_char == '0') || | |
| 949 | + ((first_char == '-') && (token.at(1) == '0')))) { | |
| 929 | 950 | QTC::TC("libtests", "JSON parse leading zero"); |
| 930 | 951 | throw std::runtime_error( |
| 931 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 952 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 932 | 953 | ": number with leading zero"); |
| 933 | 954 | } |
| 934 | 955 | if ((number_before_point == 0) && (number_after_point == 0)) { |
| 935 | 956 | QTC::TC("libtests", "JSON parse number no digits"); |
| 936 | 957 | throw std::runtime_error( |
| 937 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 958 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 938 | 959 | ": number with no digits"); |
| 939 | 960 | } |
| 940 | - item = std::make_shared<JSON>(JSON::makeNumber(value)); | |
| 961 | + item = std::make_shared<JSON>(JSON::makeNumber(token)); | |
| 941 | 962 | break; |
| 942 | 963 | |
| 943 | 964 | case ls_alpha: |
| 944 | - if (value == "true") { | |
| 965 | + if (token == "true") { | |
| 945 | 966 | item = std::make_shared<JSON>(JSON::makeBool(true)); |
| 946 | - } else if (value == "false") { | |
| 967 | + } else if (token == "false") { | |
| 947 | 968 | item = std::make_shared<JSON>(JSON::makeBool(false)); |
| 948 | - } else if (value == "null") { | |
| 969 | + } else if (token == "null") { | |
| 949 | 970 | item = std::make_shared<JSON>(JSON::makeNull()); |
| 950 | 971 | } else { |
| 951 | 972 | QTC::TC("libtests", "JSON parse invalid keyword"); |
| 952 | 973 | throw std::runtime_error( |
| 953 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 954 | - ": invalid keyword " + value); | |
| 974 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 975 | + ": invalid keyword " + token); | |
| 955 | 976 | } |
| 956 | 977 | break; |
| 957 | 978 | |
| ... | ... | @@ -960,7 +981,9 @@ JSONParser::handleToken() |
| 960 | 981 | break; |
| 961 | 982 | |
| 962 | 983 | case ls_backslash: |
| 963 | - throw std::logic_error("tok_end is set while state = ls_backslash"); | |
| 984 | + case ls_u4: | |
| 985 | + throw std::logic_error( | |
| 986 | + "tok_end is set while state = ls_backslash or ls_u4"); | |
| 964 | 987 | break; |
| 965 | 988 | } |
| 966 | 989 | |
| ... | ... | @@ -981,21 +1004,21 @@ JSONParser::handleToken() |
| 981 | 1004 | case ps_dict_after_key: |
| 982 | 1005 | QTC::TC("libtests", "JSON parse expected colon"); |
| 983 | 1006 | throw std::runtime_error( |
| 984 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 1007 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 985 | 1008 | ": expected ':'"); |
| 986 | 1009 | break; |
| 987 | 1010 | |
| 988 | 1011 | case ps_dict_after_item: |
| 989 | 1012 | QTC::TC("libtests", "JSON parse expected , or }"); |
| 990 | 1013 | throw std::runtime_error( |
| 991 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 1014 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 992 | 1015 | ": expected ',' or '}'"); |
| 993 | 1016 | break; |
| 994 | 1017 | |
| 995 | 1018 | case ps_array_after_item: |
| 996 | 1019 | QTC::TC("libtests", "JSON parse expected, or ]"); |
| 997 | 1020 | throw std::runtime_error( |
| 998 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 1021 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 999 | 1022 | ": expected ',' or ']'"); |
| 1000 | 1023 | break; |
| 1001 | 1024 | |
| ... | ... | @@ -1004,7 +1027,7 @@ JSONParser::handleToken() |
| 1004 | 1027 | if (lex_state != ls_string) { |
| 1005 | 1028 | QTC::TC("libtests", "JSON parse string as dict key"); |
| 1006 | 1029 | throw std::runtime_error( |
| 1007 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 1030 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 1008 | 1031 | ": expect string as dictionary key"); |
| 1009 | 1032 | } |
| 1010 | 1033 | break; |
| ... | ... | @@ -1023,7 +1046,7 @@ JSONParser::handleToken() |
| 1023 | 1046 | { |
| 1024 | 1047 | QTC::TC("libtests", "JSON parse unexpected }"); |
| 1025 | 1048 | throw std::runtime_error( |
| 1026 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 1049 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 1027 | 1050 | ": unexpected dictionary end delimiter"); |
| 1028 | 1051 | } |
| 1029 | 1052 | } else if (delimiter == ']') { |
| ... | ... | @@ -1033,14 +1056,14 @@ JSONParser::handleToken() |
| 1033 | 1056 | { |
| 1034 | 1057 | QTC::TC("libtests", "JSON parse unexpected ]"); |
| 1035 | 1058 | throw std::runtime_error( |
| 1036 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 1059 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 1037 | 1060 | ": unexpected array end delimiter"); |
| 1038 | 1061 | } |
| 1039 | 1062 | } else if (delimiter == ':') { |
| 1040 | 1063 | if (parser_state != ps_dict_after_key) { |
| 1041 | 1064 | QTC::TC("libtests", "JSON parse unexpected :"); |
| 1042 | 1065 | throw std::runtime_error( |
| 1043 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 1066 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 1044 | 1067 | ": unexpected colon"); |
| 1045 | 1068 | } |
| 1046 | 1069 | } else if (delimiter == ',') { |
| ... | ... | @@ -1048,7 +1071,7 @@ JSONParser::handleToken() |
| 1048 | 1071 | (parser_state == ps_array_after_item))) { |
| 1049 | 1072 | QTC::TC("libtests", "JSON parse unexpected ,"); |
| 1050 | 1073 | throw std::runtime_error( |
| 1051 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 1074 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 1052 | 1075 | ": unexpected comma"); |
| 1053 | 1076 | } |
| 1054 | 1077 | } else if (delimiter != '\0') { |
| ... | ... | @@ -1074,7 +1097,7 @@ JSONParser::handleToken() |
| 1074 | 1097 | next_state = ps_stack.back(); |
| 1075 | 1098 | ps_stack.pop_back(); |
| 1076 | 1099 | auto tos = stack.back(); |
| 1077 | - tos->setEnd(QIntC::to_size(tok_end - cstr)); | |
| 1100 | + tos->setEnd(offset); | |
| 1078 | 1101 | if (reactor) { |
| 1079 | 1102 | reactor->containerEnd(*tos); |
| 1080 | 1103 | } |
| ... | ... | @@ -1086,8 +1109,8 @@ JSONParser::handleToken() |
| 1086 | 1109 | "JSONParser::handleToken: unexpected delimiter in transition"); |
| 1087 | 1110 | } else if (item.get()) { |
| 1088 | 1111 | if (!(item->isArray() || item->isDictionary())) { |
| 1089 | - item->setStart(QIntC::to_size(tok_start - cstr)); | |
| 1090 | - item->setEnd(QIntC::to_size(tok_end - cstr)); | |
| 1112 | + item->setStart(offset - token.length()); | |
| 1113 | + item->setEnd(offset); | |
| 1091 | 1114 | } |
| 1092 | 1115 | |
| 1093 | 1116 | std::shared_ptr<JSON> tos; |
| ... | ... | @@ -1149,23 +1172,17 @@ JSONParser::handleToken() |
| 1149 | 1172 | } |
| 1150 | 1173 | if (ps_stack.size() > 500) { |
| 1151 | 1174 | throw std::runtime_error( |
| 1152 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | |
| 1175 | + "JSON: offset " + QUtil::uint_to_string(offset) + | |
| 1153 | 1176 | ": maximum object depth exceeded"); |
| 1154 | 1177 | } |
| 1155 | 1178 | parser_state = next_state; |
| 1156 | - tok_start = nullptr; | |
| 1157 | - tok_end = nullptr; | |
| 1158 | 1179 | lex_state = ls_top; |
| 1159 | 1180 | } |
| 1160 | 1181 | |
| 1161 | 1182 | std::shared_ptr<JSON> |
| 1162 | -JSONParser::parse(std::string const& s) | |
| 1183 | +JSONParser::parse() | |
| 1163 | 1184 | { |
| 1164 | - cstr = s.c_str(); | |
| 1165 | - end = cstr + s.length(); | |
| 1166 | - p = cstr; | |
| 1167 | - | |
| 1168 | - while (p < end) { | |
| 1185 | + while (!done) { | |
| 1169 | 1186 | getToken(); |
| 1170 | 1187 | handleToken(); |
| 1171 | 1188 | } |
| ... | ... | @@ -1181,10 +1198,18 @@ JSONParser::parse(std::string const& s) |
| 1181 | 1198 | } |
| 1182 | 1199 | |
| 1183 | 1200 | JSON |
| 1184 | -JSON::parse(std::string const& s, Reactor* reactor) | |
| 1201 | +JSON::parse(InputSource& is, Reactor* reactor) | |
| 1202 | +{ | |
| 1203 | + JSONParser jp(is, reactor); | |
| 1204 | + return *jp.parse(); | |
| 1205 | +} | |
| 1206 | + | |
| 1207 | +JSON | |
| 1208 | +JSON::parse(std::string const& s) | |
| 1185 | 1209 | { |
| 1186 | - JSONParser jp(reactor); | |
| 1187 | - return *jp.parse(s); | |
| 1210 | + BufferInputSource bis("json input", s); | |
| 1211 | + JSONParser jp(bis, nullptr); | |
| 1212 | + return *jp.parse(); | |
| 1188 | 1213 | } |
| 1189 | 1214 | |
| 1190 | 1215 | void | ... | ... |
libtests/json_parse.cc
| 1 | +#include <qpdf/FileInputSource.hh> | |
| 1 | 2 | #include <qpdf/JSON.hh> |
| 2 | 3 | #include <qpdf/QUtil.hh> |
| 3 | 4 | #include <cstdlib> |
| ... | ... | @@ -103,11 +104,8 @@ main(int argc, char* argv[]) |
| 103 | 104 | } |
| 104 | 105 | } |
| 105 | 106 | try { |
| 106 | - std::shared_ptr<char> buf; | |
| 107 | - size_t size; | |
| 108 | - QUtil::read_file_into_memory(filename, buf, size); | |
| 109 | - std::string s(buf.get(), size); | |
| 110 | - std::cout << JSON::parse(s, reactor.get()).unparse() << std::endl; | |
| 107 | + FileInputSource is(filename); | |
| 108 | + std::cout << JSON::parse(is, reactor.get()).unparse() << std::endl; | |
| 111 | 109 | } catch (std::exception& e) { |
| 112 | 110 | std::cerr << "exception: " << filename << ": " << e.what() << std::endl; |
| 113 | 111 | return 2; | ... | ... |