Commit 05fda4afa289ef248804865d7648c9ac3ae75fbd
1 parent
e5f3910c
Change JSON parser to parse from an InputSource
Showing
4 changed files
with
150 additions
and
126 deletions
TODO
| @@ -51,9 +51,6 @@ library, when context is available, to have a pipeline rather than a | @@ -51,9 +51,6 @@ library, when context is available, to have a pipeline rather than a | ||
| 51 | FILE* or std::ostream. This makes it possible for people to capture | 51 | FILE* or std::ostream. This makes it possible for people to capture |
| 52 | output more flexibly. | 52 | output more flexibly. |
| 53 | 53 | ||
| 54 | -JSON::parse should work from an InputSource. BufferInputSource can | ||
| 55 | -already start with a std::string. | ||
| 56 | - | ||
| 57 | Have a json blob defined by a function that takes a pipeline and | 54 | Have a json blob defined by a function that takes a pipeline and |
| 58 | writes data to the pipeline. It's writer should create a Pl_Base64 -> | 55 | writes data to the pipeline. It's writer should create a Pl_Base64 -> |
| 59 | Pl_Concatenate in front of the pipeline passed to write and call the | 56 | Pl_Concatenate in front of the pipeline passed to write and call the |
include/qpdf/JSON.hh
| @@ -46,6 +46,7 @@ | @@ -46,6 +46,7 @@ | ||
| 46 | #include <vector> | 46 | #include <vector> |
| 47 | 47 | ||
| 48 | class Pipeline; | 48 | class Pipeline; |
| 49 | +class InputSource; | ||
| 49 | 50 | ||
| 50 | class JSON | 51 | class JSON |
| 51 | { | 52 | { |
| @@ -249,10 +250,13 @@ class JSON | @@ -249,10 +250,13 @@ class JSON | ||
| 249 | virtual bool arrayItem(JSON const& value) = 0; | 250 | virtual bool arrayItem(JSON const& value) = 0; |
| 250 | }; | 251 | }; |
| 251 | 252 | ||
| 252 | - // Create a JSON object from a string. See above for information | ||
| 253 | - // about how to use the Reactor. | 253 | + // Create a JSON object from a string. |
| 254 | QPDF_DLL | 254 | QPDF_DLL |
| 255 | - static JSON parse(std::string const&, Reactor* reactor = nullptr); | 255 | + static JSON parse(std::string const&); |
| 256 | + // Create a JSON object from an input source. See above for | ||
| 257 | + // information about how to use the Reactor. | ||
| 258 | + QPDF_DLL | ||
| 259 | + static JSON parse(InputSource&, Reactor* reactor = nullptr); | ||
| 256 | 260 | ||
| 257 | // parse calls setOffsets to set the inclusive start and | 261 | // parse calls setOffsets to set the inclusive start and |
| 258 | // non-inclusive end offsets of an object relative to its input | 262 | // non-inclusive end offsets of an object relative to its input |
libqpdf/JSON.cc
| 1 | #include <qpdf/JSON.hh> | 1 | #include <qpdf/JSON.hh> |
| 2 | 2 | ||
| 3 | -#include <qpdf/Pipeline.hh> | 3 | +#include <qpdf/BufferInputSource.hh> |
| 4 | #include <qpdf/Pl_String.hh> | 4 | #include <qpdf/Pl_String.hh> |
| 5 | -#include <qpdf/QIntC.hh> | ||
| 6 | #include <qpdf/QTC.hh> | 5 | #include <qpdf/QTC.hh> |
| 7 | #include <qpdf/QUtil.hh> | 6 | #include <qpdf/QUtil.hh> |
| 8 | #include <cstring> | 7 | #include <cstring> |
| @@ -521,7 +520,8 @@ namespace | @@ -521,7 +520,8 @@ namespace | ||
| 521 | class JSONParser | 520 | class JSONParser |
| 522 | { | 521 | { |
| 523 | public: | 522 | public: |
| 524 | - JSONParser(JSON::Reactor* reactor) : | 523 | + JSONParser(InputSource& is, JSON::Reactor* reactor) : |
| 524 | + is(is), | ||
| 525 | reactor(reactor), | 525 | reactor(reactor), |
| 526 | lex_state(ls_top), | 526 | lex_state(ls_top), |
| 527 | number_before_point(0), | 527 | number_before_point(0), |
| @@ -529,16 +529,16 @@ namespace | @@ -529,16 +529,16 @@ namespace | ||
| 529 | number_after_e(0), | 529 | number_after_e(0), |
| 530 | number_saw_point(false), | 530 | number_saw_point(false), |
| 531 | number_saw_e(false), | 531 | number_saw_e(false), |
| 532 | - cstr(nullptr), | ||
| 533 | - end(nullptr), | ||
| 534 | - tok_start(nullptr), | ||
| 535 | - tok_end(nullptr), | ||
| 536 | - p(nullptr), | 532 | + bytes(0), |
| 533 | + p(buf), | ||
| 534 | + u_count(0), | ||
| 535 | + offset(0), | ||
| 536 | + done(false), | ||
| 537 | parser_state(ps_top) | 537 | parser_state(ps_top) |
| 538 | { | 538 | { |
| 539 | } | 539 | } |
| 540 | 540 | ||
| 541 | - std::shared_ptr<JSON> parse(std::string const& s); | 541 | + std::shared_ptr<JSON> parse(); |
| 542 | 542 | ||
| 543 | private: | 543 | private: |
| 544 | void getToken(); | 544 | void getToken(); |
| @@ -564,8 +564,10 @@ namespace | @@ -564,8 +564,10 @@ namespace | ||
| 564 | ls_alpha, | 564 | ls_alpha, |
| 565 | ls_string, | 565 | ls_string, |
| 566 | ls_backslash, | 566 | ls_backslash, |
| 567 | + ls_u4, | ||
| 567 | }; | 568 | }; |
| 568 | 569 | ||
| 570 | + InputSource& is; | ||
| 569 | JSON::Reactor* reactor; | 571 | JSON::Reactor* reactor; |
| 570 | lex_state_e lex_state; | 572 | lex_state_e lex_state; |
| 571 | size_t number_before_point; | 573 | size_t number_before_point; |
| @@ -573,11 +575,13 @@ namespace | @@ -573,11 +575,13 @@ namespace | ||
| 573 | size_t number_after_e; | 575 | size_t number_after_e; |
| 574 | bool number_saw_point; | 576 | bool number_saw_point; |
| 575 | bool number_saw_e; | 577 | bool number_saw_e; |
| 576 | - char const* cstr; | ||
| 577 | - char const* end; | ||
| 578 | - char const* tok_start; | ||
| 579 | - char const* tok_end; | 578 | + char buf[16384]; |
| 579 | + size_t bytes; | ||
| 580 | char const* p; | 580 | char const* p; |
| 581 | + size_t u_count; | ||
| 582 | + size_t offset; | ||
| 583 | + bool done; | ||
| 584 | + std::string token; | ||
| 581 | parser_state_e parser_state; | 585 | parser_state_e parser_state; |
| 582 | std::vector<std::shared_ptr<JSON>> stack; | 586 | std::vector<std::shared_ptr<JSON>> stack; |
| 583 | std::vector<parser_state_e> ps_stack; | 587 | std::vector<parser_state_e> ps_stack; |
| @@ -661,28 +665,35 @@ JSONParser::decode_string(std::string const& str) | @@ -661,28 +665,35 @@ JSONParser::decode_string(std::string const& str) | ||
| 661 | void | 665 | void |
| 662 | JSONParser::getToken() | 666 | JSONParser::getToken() |
| 663 | { | 667 | { |
| 664 | - while (p < end) { | 668 | + enum { append, ignore, reread } action = append; |
| 669 | + bool ready = false; | ||
| 670 | + token.clear(); | ||
| 671 | + while (!done) { | ||
| 672 | + if (p == (buf + bytes)) { | ||
| 673 | + p = buf; | ||
| 674 | + bytes = is.read(buf, sizeof(buf)); | ||
| 675 | + if (bytes == 0) { | ||
| 676 | + done = true; | ||
| 677 | + break; | ||
| 678 | + } | ||
| 679 | + } | ||
| 680 | + | ||
| 665 | if (*p == 0) { | 681 | if (*p == 0) { |
| 666 | QTC::TC("libtests", "JSON parse null character"); | 682 | QTC::TC("libtests", "JSON parse null character"); |
| 667 | throw std::runtime_error( | 683 | throw std::runtime_error( |
| 668 | "JSON: null character at offset " + | 684 | "JSON: null character at offset " + |
| 669 | - QUtil::int_to_string(p - cstr)); | 685 | + QUtil::uint_to_string(offset)); |
| 670 | } | 686 | } |
| 687 | + action = append; | ||
| 671 | switch (lex_state) { | 688 | switch (lex_state) { |
| 672 | case ls_top: | 689 | case ls_top: |
| 673 | if (*p == '"') { | 690 | if (*p == '"') { |
| 674 | - tok_start = p; | ||
| 675 | - tok_end = nullptr; | ||
| 676 | lex_state = ls_string; | 691 | lex_state = ls_string; |
| 677 | } else if (QUtil::is_space(*p)) { | 692 | } else if (QUtil::is_space(*p)) { |
| 678 | - // ignore | 693 | + action = ignore; |
| 679 | } else if ((*p >= 'a') && (*p <= 'z')) { | 694 | } else if ((*p >= 'a') && (*p <= 'z')) { |
| 680 | - tok_start = p; | ||
| 681 | - tok_end = nullptr; | ||
| 682 | lex_state = ls_alpha; | 695 | lex_state = ls_alpha; |
| 683 | } else if (*p == '-') { | 696 | } else if (*p == '-') { |
| 684 | - tok_start = p; | ||
| 685 | - tok_end = nullptr; | ||
| 686 | lex_state = ls_number; | 697 | lex_state = ls_number; |
| 687 | number_before_point = 0; | 698 | number_before_point = 0; |
| 688 | number_after_point = 0; | 699 | number_after_point = 0; |
| @@ -690,8 +701,6 @@ JSONParser::getToken() | @@ -690,8 +701,6 @@ JSONParser::getToken() | ||
| 690 | number_saw_point = false; | 701 | number_saw_point = false; |
| 691 | number_saw_e = false; | 702 | number_saw_e = false; |
| 692 | } else if ((*p >= '0') && (*p <= '9')) { | 703 | } else if ((*p >= '0') && (*p <= '9')) { |
| 693 | - tok_start = p; | ||
| 694 | - tok_end = nullptr; | ||
| 695 | lex_state = ls_number; | 704 | lex_state = ls_number; |
| 696 | number_before_point = 1; | 705 | number_before_point = 1; |
| 697 | number_after_point = 0; | 706 | number_after_point = 0; |
| @@ -699,8 +708,6 @@ JSONParser::getToken() | @@ -699,8 +708,6 @@ JSONParser::getToken() | ||
| 699 | number_saw_point = false; | 708 | number_saw_point = false; |
| 700 | number_saw_e = false; | 709 | number_saw_e = false; |
| 701 | } else if (*p == '.') { | 710 | } else if (*p == '.') { |
| 702 | - tok_start = p; | ||
| 703 | - tok_end = nullptr; | ||
| 704 | lex_state = ls_number; | 711 | lex_state = ls_number; |
| 705 | number_before_point = 0; | 712 | number_before_point = 0; |
| 706 | number_after_point = 0; | 713 | number_after_point = 0; |
| @@ -708,12 +715,11 @@ JSONParser::getToken() | @@ -708,12 +715,11 @@ JSONParser::getToken() | ||
| 708 | number_saw_point = true; | 715 | number_saw_point = true; |
| 709 | number_saw_e = false; | 716 | number_saw_e = false; |
| 710 | } else if (strchr("{}[]:,", *p)) { | 717 | } else if (strchr("{}[]:,", *p)) { |
| 711 | - tok_start = p; | ||
| 712 | - tok_end = p + 1; | 718 | + ready = true; |
| 713 | } else { | 719 | } else { |
| 714 | QTC::TC("libtests", "JSON parse bad character"); | 720 | QTC::TC("libtests", "JSON parse bad character"); |
| 715 | throw std::runtime_error( | 721 | throw std::runtime_error( |
| 716 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 722 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 717 | ": unexpected character " + std::string(p, 1)); | 723 | ": unexpected character " + std::string(p, 1)); |
| 718 | } | 724 | } |
| 719 | break; | 725 | break; |
| @@ -731,12 +737,12 @@ JSONParser::getToken() | @@ -731,12 +737,12 @@ JSONParser::getToken() | ||
| 731 | if (number_saw_e) { | 737 | if (number_saw_e) { |
| 732 | QTC::TC("libtests", "JSON parse point after e"); | 738 | QTC::TC("libtests", "JSON parse point after e"); |
| 733 | throw std::runtime_error( | 739 | throw std::runtime_error( |
| 734 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 740 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 735 | ": numeric literal: decimal point after e"); | 741 | ": numeric literal: decimal point after e"); |
| 736 | } else if (number_saw_point) { | 742 | } else if (number_saw_point) { |
| 737 | QTC::TC("libtests", "JSON parse duplicate point"); | 743 | QTC::TC("libtests", "JSON parse duplicate point"); |
| 738 | throw std::runtime_error( | 744 | throw std::runtime_error( |
| 739 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 745 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 740 | ": numeric literal: decimal point already seen"); | 746 | ": numeric literal: decimal point already seen"); |
| 741 | } else { | 747 | } else { |
| 742 | number_saw_point = true; | 748 | number_saw_point = true; |
| @@ -745,7 +751,7 @@ JSONParser::getToken() | @@ -745,7 +751,7 @@ JSONParser::getToken() | ||
| 745 | if (number_saw_e) { | 751 | if (number_saw_e) { |
| 746 | QTC::TC("libtests", "JSON parse duplicate e"); | 752 | QTC::TC("libtests", "JSON parse duplicate e"); |
| 747 | throw std::runtime_error( | 753 | throw std::runtime_error( |
| 748 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 754 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 749 | ": numeric literal: e already seen"); | 755 | ": numeric literal: e already seen"); |
| 750 | } else { | 756 | } else { |
| 751 | number_saw_e = true; | 757 | number_saw_e = true; |
| @@ -756,18 +762,19 @@ JSONParser::getToken() | @@ -756,18 +762,19 @@ JSONParser::getToken() | ||
| 756 | } else { | 762 | } else { |
| 757 | QTC::TC("libtests", "JSON parse unexpected sign"); | 763 | QTC::TC("libtests", "JSON parse unexpected sign"); |
| 758 | throw std::runtime_error( | 764 | throw std::runtime_error( |
| 759 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 765 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 760 | ": numeric literal: unexpected sign"); | 766 | ": numeric literal: unexpected sign"); |
| 761 | } | 767 | } |
| 762 | } else if (QUtil::is_space(*p)) { | 768 | } else if (QUtil::is_space(*p)) { |
| 763 | - tok_end = p; | 769 | + action = ignore; |
| 770 | + ready = true; | ||
| 764 | } else if (strchr("{}[]:,", *p)) { | 771 | } else if (strchr("{}[]:,", *p)) { |
| 765 | - tok_end = p; | ||
| 766 | - --p; | 772 | + action = reread; |
| 773 | + ready = true; | ||
| 767 | } else { | 774 | } else { |
| 768 | QTC::TC("libtests", "JSON parse numeric bad character"); | 775 | QTC::TC("libtests", "JSON parse numeric bad character"); |
| 769 | throw std::runtime_error( | 776 | throw std::runtime_error( |
| 770 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 777 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 771 | ": numeric literal: unexpected character " + | 778 | ": numeric literal: unexpected character " + |
| 772 | std::string(p, 1)); | 779 | std::string(p, 1)); |
| 773 | } | 780 | } |
| @@ -777,21 +784,22 @@ JSONParser::getToken() | @@ -777,21 +784,22 @@ JSONParser::getToken() | ||
| 777 | if ((*p >= 'a') && (*p <= 'z')) { | 784 | if ((*p >= 'a') && (*p <= 'z')) { |
| 778 | // okay | 785 | // okay |
| 779 | } else if (QUtil::is_space(*p)) { | 786 | } else if (QUtil::is_space(*p)) { |
| 780 | - tok_end = p; | 787 | + action = ignore; |
| 788 | + ready = true; | ||
| 781 | } else if (strchr("{}[]:,", *p)) { | 789 | } else if (strchr("{}[]:,", *p)) { |
| 782 | - tok_end = p; | ||
| 783 | - --p; | 790 | + action = reread; |
| 791 | + ready = true; | ||
| 784 | } else { | 792 | } else { |
| 785 | QTC::TC("libtests", "JSON parse keyword bad character"); | 793 | QTC::TC("libtests", "JSON parse keyword bad character"); |
| 786 | throw std::runtime_error( | 794 | throw std::runtime_error( |
| 787 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 795 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 788 | ": keyword: unexpected character " + std::string(p, 1)); | 796 | ": keyword: unexpected character " + std::string(p, 1)); |
| 789 | } | 797 | } |
| 790 | break; | 798 | break; |
| 791 | 799 | ||
| 792 | case ls_string: | 800 | case ls_string: |
| 793 | if (*p == '"') { | 801 | if (*p == '"') { |
| 794 | - tok_end = p + 1; | 802 | + ready = true; |
| 795 | } else if (*p == '\\') { | 803 | } else if (*p == '\\') { |
| 796 | lex_state = ls_backslash; | 804 | lex_state = ls_backslash; |
| 797 | } | 805 | } |
| @@ -802,56 +810,70 @@ JSONParser::getToken() | @@ -802,56 +810,70 @@ JSONParser::getToken() | ||
| 802 | if (strchr("\\\"/bfnrt", *p)) { | 810 | if (strchr("\\\"/bfnrt", *p)) { |
| 803 | lex_state = ls_string; | 811 | lex_state = ls_string; |
| 804 | } else if (*p == 'u') { | 812 | } else if (*p == 'u') { |
| 805 | - if (p + 4 >= end) { | ||
| 806 | - QTC::TC("libtests", "JSON parse premature end of u"); | ||
| 807 | - throw std::runtime_error( | ||
| 808 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | ||
| 809 | - ": \\u must be followed by four characters"); | ||
| 810 | - } | ||
| 811 | - for (size_t i = 1; i <= 4; ++i) { | ||
| 812 | - if (!QUtil::is_hex_digit(p[i])) { | ||
| 813 | - QTC::TC("libtests", "JSON parse bad hex after u"); | ||
| 814 | - throw std::runtime_error( | ||
| 815 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | ||
| 816 | - ": \\u must be followed by four hex digits"); | ||
| 817 | - } | ||
| 818 | - } | ||
| 819 | - p += 4; | ||
| 820 | - lex_state = ls_string; | 813 | + lex_state = ls_u4; |
| 814 | + u_count = 0; | ||
| 821 | } else { | 815 | } else { |
| 822 | QTC::TC("libtests", "JSON parse backslash bad character"); | 816 | QTC::TC("libtests", "JSON parse backslash bad character"); |
| 823 | throw std::runtime_error( | 817 | throw std::runtime_error( |
| 824 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 818 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 825 | ": invalid character after backslash: " + | 819 | ": invalid character after backslash: " + |
| 826 | std::string(p, 1)); | 820 | std::string(p, 1)); |
| 827 | } | 821 | } |
| 828 | break; | 822 | break; |
| 823 | + | ||
| 824 | + case ls_u4: | ||
| 825 | + if (!QUtil::is_hex_digit(*p)) { | ||
| 826 | + QTC::TC("libtests", "JSON parse bad hex after u"); | ||
| 827 | + throw std::runtime_error( | ||
| 828 | + "JSON: offset " + | ||
| 829 | + QUtil::uint_to_string(offset - u_count - 1) + | ||
| 830 | + ": \\u must be followed by four hex digits"); | ||
| 831 | + } | ||
| 832 | + if (++u_count == 4) { | ||
| 833 | + lex_state = ls_string; | ||
| 834 | + } | ||
| 835 | + break; | ||
| 829 | } | 836 | } |
| 830 | - ++p; | ||
| 831 | - if (tok_start && tok_end) { | 837 | + switch (action) { |
| 838 | + case reread: | ||
| 839 | + break; | ||
| 840 | + case append: | ||
| 841 | + token.append(1, *p); | ||
| 842 | + // fall through | ||
| 843 | + case ignore: | ||
| 844 | + ++p; | ||
| 845 | + ++offset; | ||
| 846 | + break; | ||
| 847 | + } | ||
| 848 | + if (ready) { | ||
| 832 | break; | 849 | break; |
| 833 | } | 850 | } |
| 834 | } | 851 | } |
| 835 | - if (p == end) { | ||
| 836 | - if (tok_start && (!tok_end)) { | 852 | + if (done) { |
| 853 | + if ((!token.empty()) && (!ready)) { | ||
| 837 | switch (lex_state) { | 854 | switch (lex_state) { |
| 838 | case ls_top: | 855 | case ls_top: |
| 839 | // Can't happen | 856 | // Can't happen |
| 840 | - throw std::logic_error( | ||
| 841 | - "tok_start set in ls_top while parsing " + | ||
| 842 | - std::string(cstr)); | 857 | + throw std::logic_error("tok_start set in ls_top while parsing"); |
| 843 | break; | 858 | break; |
| 844 | 859 | ||
| 845 | case ls_number: | 860 | case ls_number: |
| 846 | case ls_alpha: | 861 | case ls_alpha: |
| 847 | - tok_end = p; | 862 | + // okay |
| 848 | break; | 863 | break; |
| 849 | 864 | ||
| 865 | + case ls_u4: | ||
| 866 | + QTC::TC("libtests", "JSON parse premature end of u"); | ||
| 867 | + throw std::runtime_error( | ||
| 868 | + "JSON: offset " + | ||
| 869 | + QUtil::uint_to_string(offset - u_count - 1) + | ||
| 870 | + ": \\u must be followed by four characters"); | ||
| 871 | + | ||
| 850 | case ls_string: | 872 | case ls_string: |
| 851 | case ls_backslash: | 873 | case ls_backslash: |
| 852 | QTC::TC("libtests", "JSON parse unterminated string"); | 874 | QTC::TC("libtests", "JSON parse unterminated string"); |
| 853 | throw std::runtime_error( | 875 | throw std::runtime_error( |
| 854 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 876 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 855 | ": unterminated string"); | 877 | ": unterminated string"); |
| 856 | break; | 878 | break; |
| 857 | } | 879 | } |
| @@ -862,28 +884,25 @@ JSONParser::getToken() | @@ -862,28 +884,25 @@ JSONParser::getToken() | ||
| 862 | void | 884 | void |
| 863 | JSONParser::handleToken() | 885 | JSONParser::handleToken() |
| 864 | { | 886 | { |
| 865 | - if (!(tok_start && tok_end)) { | 887 | + if (token.empty()) { |
| 866 | return; | 888 | return; |
| 867 | } | 889 | } |
| 868 | 890 | ||
| 869 | - // Get token value. | ||
| 870 | - std::string value(tok_start, tok_end); | ||
| 871 | - | ||
| 872 | if (parser_state == ps_done) { | 891 | if (parser_state == ps_done) { |
| 873 | QTC::TC("libtests", "JSON parse junk after object"); | 892 | QTC::TC("libtests", "JSON parse junk after object"); |
| 874 | throw std::runtime_error( | 893 | throw std::runtime_error( |
| 875 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | ||
| 876 | - ": material follows end of object: " + value); | 894 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 895 | + ": material follows end of object: " + token); | ||
| 877 | } | 896 | } |
| 878 | 897 | ||
| 879 | // Git string value | 898 | // Git string value |
| 880 | std::string s_value; | 899 | std::string s_value; |
| 881 | if (lex_state == ls_string) { | 900 | if (lex_state == ls_string) { |
| 882 | // Token includes the quotation marks | 901 | // Token includes the quotation marks |
| 883 | - if (tok_end - tok_start < 2) { | 902 | + if (token.length() < 2) { |
| 884 | throw std::logic_error("JSON string length < 2"); | 903 | throw std::logic_error("JSON string length < 2"); |
| 885 | } | 904 | } |
| 886 | - s_value = decode_string(value); | 905 | + s_value = decode_string(token); |
| 887 | } | 906 | } |
| 888 | // Based on the lexical state and value, figure out whether we are | 907 | // Based on the lexical state and value, figure out whether we are |
| 889 | // looking at an item or a delimiter. It will always be exactly | 908 | // looking at an item or a delimiter. It will always be exactly |
| @@ -891,12 +910,14 @@ JSONParser::handleToken() | @@ -891,12 +910,14 @@ JSONParser::handleToken() | ||
| 891 | 910 | ||
| 892 | std::shared_ptr<JSON> item; | 911 | std::shared_ptr<JSON> item; |
| 893 | char delimiter = '\0'; | 912 | char delimiter = '\0'; |
| 913 | + // Already verified that token is not empty | ||
| 914 | + char first_char = token.at(0); | ||
| 894 | switch (lex_state) { | 915 | switch (lex_state) { |
| 895 | case ls_top: | 916 | case ls_top: |
| 896 | - switch (*tok_start) { | 917 | + switch (first_char) { |
| 897 | case '{': | 918 | case '{': |
| 898 | item = std::make_shared<JSON>(JSON::makeDictionary()); | 919 | item = std::make_shared<JSON>(JSON::makeDictionary()); |
| 899 | - item->setStart(QIntC::to_size(tok_start - cstr)); | 920 | + item->setStart(offset - token.length()); |
| 900 | if (reactor) { | 921 | if (reactor) { |
| 901 | reactor->dictionaryStart(); | 922 | reactor->dictionaryStart(); |
| 902 | } | 923 | } |
| @@ -904,14 +925,14 @@ JSONParser::handleToken() | @@ -904,14 +925,14 @@ JSONParser::handleToken() | ||
| 904 | 925 | ||
| 905 | case '[': | 926 | case '[': |
| 906 | item = std::make_shared<JSON>(JSON::makeArray()); | 927 | item = std::make_shared<JSON>(JSON::makeArray()); |
| 907 | - item->setStart(QIntC::to_size(tok_start - cstr)); | 928 | + item->setStart(offset - token.length()); |
| 908 | if (reactor) { | 929 | if (reactor) { |
| 909 | reactor->arrayStart(); | 930 | reactor->arrayStart(); |
| 910 | } | 931 | } |
| 911 | break; | 932 | break; |
| 912 | 933 | ||
| 913 | default: | 934 | default: |
| 914 | - delimiter = *tok_start; | 935 | + delimiter = first_char; |
| 915 | break; | 936 | break; |
| 916 | } | 937 | } |
| 917 | break; | 938 | break; |
| @@ -920,38 +941,38 @@ JSONParser::handleToken() | @@ -920,38 +941,38 @@ JSONParser::handleToken() | ||
| 920 | if (number_saw_point && (number_after_point == 0)) { | 941 | if (number_saw_point && (number_after_point == 0)) { |
| 921 | QTC::TC("libtests", "JSON parse decimal with no digits"); | 942 | QTC::TC("libtests", "JSON parse decimal with no digits"); |
| 922 | throw std::runtime_error( | 943 | throw std::runtime_error( |
| 923 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 944 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 924 | ": decimal point with no digits"); | 945 | ": decimal point with no digits"); |
| 925 | } | 946 | } |
| 926 | if ((number_before_point > 1) && | 947 | if ((number_before_point > 1) && |
| 927 | - ((tok_start[0] == '0') || | ||
| 928 | - ((tok_start[0] == '-') && (tok_start[1] == '0')))) { | 948 | + ((first_char == '0') || |
| 949 | + ((first_char == '-') && (token.at(1) == '0')))) { | ||
| 929 | QTC::TC("libtests", "JSON parse leading zero"); | 950 | QTC::TC("libtests", "JSON parse leading zero"); |
| 930 | throw std::runtime_error( | 951 | throw std::runtime_error( |
| 931 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 952 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 932 | ": number with leading zero"); | 953 | ": number with leading zero"); |
| 933 | } | 954 | } |
| 934 | if ((number_before_point == 0) && (number_after_point == 0)) { | 955 | if ((number_before_point == 0) && (number_after_point == 0)) { |
| 935 | QTC::TC("libtests", "JSON parse number no digits"); | 956 | QTC::TC("libtests", "JSON parse number no digits"); |
| 936 | throw std::runtime_error( | 957 | throw std::runtime_error( |
| 937 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 958 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 938 | ": number with no digits"); | 959 | ": number with no digits"); |
| 939 | } | 960 | } |
| 940 | - item = std::make_shared<JSON>(JSON::makeNumber(value)); | 961 | + item = std::make_shared<JSON>(JSON::makeNumber(token)); |
| 941 | break; | 962 | break; |
| 942 | 963 | ||
| 943 | case ls_alpha: | 964 | case ls_alpha: |
| 944 | - if (value == "true") { | 965 | + if (token == "true") { |
| 945 | item = std::make_shared<JSON>(JSON::makeBool(true)); | 966 | item = std::make_shared<JSON>(JSON::makeBool(true)); |
| 946 | - } else if (value == "false") { | 967 | + } else if (token == "false") { |
| 947 | item = std::make_shared<JSON>(JSON::makeBool(false)); | 968 | item = std::make_shared<JSON>(JSON::makeBool(false)); |
| 948 | - } else if (value == "null") { | 969 | + } else if (token == "null") { |
| 949 | item = std::make_shared<JSON>(JSON::makeNull()); | 970 | item = std::make_shared<JSON>(JSON::makeNull()); |
| 950 | } else { | 971 | } else { |
| 951 | QTC::TC("libtests", "JSON parse invalid keyword"); | 972 | QTC::TC("libtests", "JSON parse invalid keyword"); |
| 952 | throw std::runtime_error( | 973 | throw std::runtime_error( |
| 953 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | ||
| 954 | - ": invalid keyword " + value); | 974 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 975 | + ": invalid keyword " + token); | ||
| 955 | } | 976 | } |
| 956 | break; | 977 | break; |
| 957 | 978 | ||
| @@ -960,7 +981,9 @@ JSONParser::handleToken() | @@ -960,7 +981,9 @@ JSONParser::handleToken() | ||
| 960 | break; | 981 | break; |
| 961 | 982 | ||
| 962 | case ls_backslash: | 983 | case ls_backslash: |
| 963 | - throw std::logic_error("tok_end is set while state = ls_backslash"); | 984 | + case ls_u4: |
| 985 | + throw std::logic_error( | ||
| 986 | + "tok_end is set while state = ls_backslash or ls_u4"); | ||
| 964 | break; | 987 | break; |
| 965 | } | 988 | } |
| 966 | 989 | ||
| @@ -981,21 +1004,21 @@ JSONParser::handleToken() | @@ -981,21 +1004,21 @@ JSONParser::handleToken() | ||
| 981 | case ps_dict_after_key: | 1004 | case ps_dict_after_key: |
| 982 | QTC::TC("libtests", "JSON parse expected colon"); | 1005 | QTC::TC("libtests", "JSON parse expected colon"); |
| 983 | throw std::runtime_error( | 1006 | throw std::runtime_error( |
| 984 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 1007 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 985 | ": expected ':'"); | 1008 | ": expected ':'"); |
| 986 | break; | 1009 | break; |
| 987 | 1010 | ||
| 988 | case ps_dict_after_item: | 1011 | case ps_dict_after_item: |
| 989 | QTC::TC("libtests", "JSON parse expected , or }"); | 1012 | QTC::TC("libtests", "JSON parse expected , or }"); |
| 990 | throw std::runtime_error( | 1013 | throw std::runtime_error( |
| 991 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 1014 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 992 | ": expected ',' or '}'"); | 1015 | ": expected ',' or '}'"); |
| 993 | break; | 1016 | break; |
| 994 | 1017 | ||
| 995 | case ps_array_after_item: | 1018 | case ps_array_after_item: |
| 996 | QTC::TC("libtests", "JSON parse expected, or ]"); | 1019 | QTC::TC("libtests", "JSON parse expected, or ]"); |
| 997 | throw std::runtime_error( | 1020 | throw std::runtime_error( |
| 998 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 1021 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 999 | ": expected ',' or ']'"); | 1022 | ": expected ',' or ']'"); |
| 1000 | break; | 1023 | break; |
| 1001 | 1024 | ||
| @@ -1004,7 +1027,7 @@ JSONParser::handleToken() | @@ -1004,7 +1027,7 @@ JSONParser::handleToken() | ||
| 1004 | if (lex_state != ls_string) { | 1027 | if (lex_state != ls_string) { |
| 1005 | QTC::TC("libtests", "JSON parse string as dict key"); | 1028 | QTC::TC("libtests", "JSON parse string as dict key"); |
| 1006 | throw std::runtime_error( | 1029 | throw std::runtime_error( |
| 1007 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 1030 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 1008 | ": expect string as dictionary key"); | 1031 | ": expect string as dictionary key"); |
| 1009 | } | 1032 | } |
| 1010 | break; | 1033 | break; |
| @@ -1023,7 +1046,7 @@ JSONParser::handleToken() | @@ -1023,7 +1046,7 @@ JSONParser::handleToken() | ||
| 1023 | { | 1046 | { |
| 1024 | QTC::TC("libtests", "JSON parse unexpected }"); | 1047 | QTC::TC("libtests", "JSON parse unexpected }"); |
| 1025 | throw std::runtime_error( | 1048 | throw std::runtime_error( |
| 1026 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 1049 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 1027 | ": unexpected dictionary end delimiter"); | 1050 | ": unexpected dictionary end delimiter"); |
| 1028 | } | 1051 | } |
| 1029 | } else if (delimiter == ']') { | 1052 | } else if (delimiter == ']') { |
| @@ -1033,14 +1056,14 @@ JSONParser::handleToken() | @@ -1033,14 +1056,14 @@ JSONParser::handleToken() | ||
| 1033 | { | 1056 | { |
| 1034 | QTC::TC("libtests", "JSON parse unexpected ]"); | 1057 | QTC::TC("libtests", "JSON parse unexpected ]"); |
| 1035 | throw std::runtime_error( | 1058 | throw std::runtime_error( |
| 1036 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 1059 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 1037 | ": unexpected array end delimiter"); | 1060 | ": unexpected array end delimiter"); |
| 1038 | } | 1061 | } |
| 1039 | } else if (delimiter == ':') { | 1062 | } else if (delimiter == ':') { |
| 1040 | if (parser_state != ps_dict_after_key) { | 1063 | if (parser_state != ps_dict_after_key) { |
| 1041 | QTC::TC("libtests", "JSON parse unexpected :"); | 1064 | QTC::TC("libtests", "JSON parse unexpected :"); |
| 1042 | throw std::runtime_error( | 1065 | throw std::runtime_error( |
| 1043 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 1066 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 1044 | ": unexpected colon"); | 1067 | ": unexpected colon"); |
| 1045 | } | 1068 | } |
| 1046 | } else if (delimiter == ',') { | 1069 | } else if (delimiter == ',') { |
| @@ -1048,7 +1071,7 @@ JSONParser::handleToken() | @@ -1048,7 +1071,7 @@ JSONParser::handleToken() | ||
| 1048 | (parser_state == ps_array_after_item))) { | 1071 | (parser_state == ps_array_after_item))) { |
| 1049 | QTC::TC("libtests", "JSON parse unexpected ,"); | 1072 | QTC::TC("libtests", "JSON parse unexpected ,"); |
| 1050 | throw std::runtime_error( | 1073 | throw std::runtime_error( |
| 1051 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 1074 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 1052 | ": unexpected comma"); | 1075 | ": unexpected comma"); |
| 1053 | } | 1076 | } |
| 1054 | } else if (delimiter != '\0') { | 1077 | } else if (delimiter != '\0') { |
| @@ -1074,7 +1097,7 @@ JSONParser::handleToken() | @@ -1074,7 +1097,7 @@ JSONParser::handleToken() | ||
| 1074 | next_state = ps_stack.back(); | 1097 | next_state = ps_stack.back(); |
| 1075 | ps_stack.pop_back(); | 1098 | ps_stack.pop_back(); |
| 1076 | auto tos = stack.back(); | 1099 | auto tos = stack.back(); |
| 1077 | - tos->setEnd(QIntC::to_size(tok_end - cstr)); | 1100 | + tos->setEnd(offset); |
| 1078 | if (reactor) { | 1101 | if (reactor) { |
| 1079 | reactor->containerEnd(*tos); | 1102 | reactor->containerEnd(*tos); |
| 1080 | } | 1103 | } |
| @@ -1086,8 +1109,8 @@ JSONParser::handleToken() | @@ -1086,8 +1109,8 @@ JSONParser::handleToken() | ||
| 1086 | "JSONParser::handleToken: unexpected delimiter in transition"); | 1109 | "JSONParser::handleToken: unexpected delimiter in transition"); |
| 1087 | } else if (item.get()) { | 1110 | } else if (item.get()) { |
| 1088 | if (!(item->isArray() || item->isDictionary())) { | 1111 | if (!(item->isArray() || item->isDictionary())) { |
| 1089 | - item->setStart(QIntC::to_size(tok_start - cstr)); | ||
| 1090 | - item->setEnd(QIntC::to_size(tok_end - cstr)); | 1112 | + item->setStart(offset - token.length()); |
| 1113 | + item->setEnd(offset); | ||
| 1091 | } | 1114 | } |
| 1092 | 1115 | ||
| 1093 | std::shared_ptr<JSON> tos; | 1116 | std::shared_ptr<JSON> tos; |
| @@ -1149,23 +1172,17 @@ JSONParser::handleToken() | @@ -1149,23 +1172,17 @@ JSONParser::handleToken() | ||
| 1149 | } | 1172 | } |
| 1150 | if (ps_stack.size() > 500) { | 1173 | if (ps_stack.size() > 500) { |
| 1151 | throw std::runtime_error( | 1174 | throw std::runtime_error( |
| 1152 | - "JSON: offset " + QUtil::int_to_string(p - cstr) + | 1175 | + "JSON: offset " + QUtil::uint_to_string(offset) + |
| 1153 | ": maximum object depth exceeded"); | 1176 | ": maximum object depth exceeded"); |
| 1154 | } | 1177 | } |
| 1155 | parser_state = next_state; | 1178 | parser_state = next_state; |
| 1156 | - tok_start = nullptr; | ||
| 1157 | - tok_end = nullptr; | ||
| 1158 | lex_state = ls_top; | 1179 | lex_state = ls_top; |
| 1159 | } | 1180 | } |
| 1160 | 1181 | ||
| 1161 | std::shared_ptr<JSON> | 1182 | std::shared_ptr<JSON> |
| 1162 | -JSONParser::parse(std::string const& s) | 1183 | +JSONParser::parse() |
| 1163 | { | 1184 | { |
| 1164 | - cstr = s.c_str(); | ||
| 1165 | - end = cstr + s.length(); | ||
| 1166 | - p = cstr; | ||
| 1167 | - | ||
| 1168 | - while (p < end) { | 1185 | + while (!done) { |
| 1169 | getToken(); | 1186 | getToken(); |
| 1170 | handleToken(); | 1187 | handleToken(); |
| 1171 | } | 1188 | } |
| @@ -1181,10 +1198,18 @@ JSONParser::parse(std::string const& s) | @@ -1181,10 +1198,18 @@ JSONParser::parse(std::string const& s) | ||
| 1181 | } | 1198 | } |
| 1182 | 1199 | ||
| 1183 | JSON | 1200 | JSON |
| 1184 | -JSON::parse(std::string const& s, Reactor* reactor) | 1201 | +JSON::parse(InputSource& is, Reactor* reactor) |
| 1202 | +{ | ||
| 1203 | + JSONParser jp(is, reactor); | ||
| 1204 | + return *jp.parse(); | ||
| 1205 | +} | ||
| 1206 | + | ||
| 1207 | +JSON | ||
| 1208 | +JSON::parse(std::string const& s) | ||
| 1185 | { | 1209 | { |
| 1186 | - JSONParser jp(reactor); | ||
| 1187 | - return *jp.parse(s); | 1210 | + BufferInputSource bis("json input", s); |
| 1211 | + JSONParser jp(bis, nullptr); | ||
| 1212 | + return *jp.parse(); | ||
| 1188 | } | 1213 | } |
| 1189 | 1214 | ||
| 1190 | void | 1215 | void |
libtests/json_parse.cc
| 1 | +#include <qpdf/FileInputSource.hh> | ||
| 1 | #include <qpdf/JSON.hh> | 2 | #include <qpdf/JSON.hh> |
| 2 | #include <qpdf/QUtil.hh> | 3 | #include <qpdf/QUtil.hh> |
| 3 | #include <cstdlib> | 4 | #include <cstdlib> |
| @@ -103,11 +104,8 @@ main(int argc, char* argv[]) | @@ -103,11 +104,8 @@ main(int argc, char* argv[]) | ||
| 103 | } | 104 | } |
| 104 | } | 105 | } |
| 105 | try { | 106 | try { |
| 106 | - std::shared_ptr<char> buf; | ||
| 107 | - size_t size; | ||
| 108 | - QUtil::read_file_into_memory(filename, buf, size); | ||
| 109 | - std::string s(buf.get(), size); | ||
| 110 | - std::cout << JSON::parse(s, reactor.get()).unparse() << std::endl; | 107 | + FileInputSource is(filename); |
| 108 | + std::cout << JSON::parse(is, reactor.get()).unparse() << std::endl; | ||
| 111 | } catch (std::exception& e) { | 109 | } catch (std::exception& e) { |
| 112 | std::cerr << "exception: " << filename << ": " << e.what() << std::endl; | 110 | std::cerr << "exception: " << filename << ": " << e.what() << std::endl; |
| 113 | return 2; | 111 | return 2; |