Commit f2ccff52883adde3f093b1fd6cd4c66d3a5f6cdf
Committed by
m-holger
1 parent
3d83638d
Rename QPDFParser to qpdf::impl::Parser
Co-authored-by: m-holger <34626170+m-holger@users.noreply.github.com>
Showing
7 changed files
with
195 additions
and
180 deletions
include/qpdf/QPDFObjectHandle.hh
| @@ -61,11 +61,14 @@ class QPDFTokenizer; | @@ -61,11 +61,14 @@ class QPDFTokenizer; | ||
| 61 | class QPDFExc; | 61 | class QPDFExc; |
| 62 | class Pl_QPDFTokenizer; | 62 | class Pl_QPDFTokenizer; |
| 63 | class QPDFMatrix; | 63 | class QPDFMatrix; |
| 64 | -class QPDFParser; | 64 | +namespace qpdf::impl |
| 65 | +{ | ||
| 66 | + class Parser; | ||
| 67 | +} | ||
| 65 | 68 | ||
| 66 | class QPDFObjectHandle: public qpdf::BaseHandle | 69 | class QPDFObjectHandle: public qpdf::BaseHandle |
| 67 | { | 70 | { |
| 68 | - friend class QPDFParser; | 71 | + friend class qpdf::impl::Parser; |
| 69 | 72 | ||
| 70 | public: | 73 | public: |
| 71 | // This class is used by replaceStreamData. It provides an alternative way of associating | 74 | // This class is used by replaceStreamData. It provides an alternative way of associating |
include/qpdf/QPDFTokenizer.hh
| @@ -31,6 +31,10 @@ | @@ -31,6 +31,10 @@ | ||
| 31 | namespace qpdf | 31 | namespace qpdf |
| 32 | { | 32 | { |
| 33 | class Tokenizer; | 33 | class Tokenizer; |
| 34 | + namespace impl | ||
| 35 | + { | ||
| 36 | + class Parser; | ||
| 37 | + } | ||
| 34 | } // namespace qpdf | 38 | } // namespace qpdf |
| 35 | 39 | ||
| 36 | class QPDFTokenizer | 40 | class QPDFTokenizer |
| @@ -203,7 +207,7 @@ class QPDFTokenizer | @@ -203,7 +207,7 @@ class QPDFTokenizer | ||
| 203 | void expectInlineImage(InputSource& input); | 207 | void expectInlineImage(InputSource& input); |
| 204 | 208 | ||
| 205 | private: | 209 | private: |
| 206 | - friend class QPDFParser; | 210 | + friend class qpdf::impl::Parser; |
| 207 | 211 | ||
| 208 | QPDFTokenizer(QPDFTokenizer const&) = delete; | 212 | QPDFTokenizer(QPDFTokenizer const&) = delete; |
| 209 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; | 213 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; |
libqpdf/QPDFObjectHandle.cc
| @@ -25,6 +25,8 @@ | @@ -25,6 +25,8 @@ | ||
| 25 | using namespace std::literals; | 25 | using namespace std::literals; |
| 26 | using namespace qpdf; | 26 | using namespace qpdf; |
| 27 | 27 | ||
| 28 | +using Parser = impl::Parser; | ||
| 29 | + | ||
| 28 | const Null Null::temp_; | 30 | const Null Null::temp_; |
| 29 | 31 | ||
| 30 | BaseHandle:: | 32 | BaseHandle:: |
| @@ -1540,7 +1542,7 @@ QPDFObjectHandle::parse( | @@ -1540,7 +1542,7 @@ QPDFObjectHandle::parse( | ||
| 1540 | QPDF* context, std::string const& object_str, std::string const& object_description) | 1542 | QPDF* context, std::string const& object_str, std::string const& object_description) |
| 1541 | { | 1543 | { |
| 1542 | auto input = is::OffsetBuffer("parsed object", object_str); | 1544 | auto input = is::OffsetBuffer("parsed object", object_str); |
| 1543 | - auto result = QPDFParser::parse(input, object_description, context); | 1545 | + auto result = Parser::parse(input, object_description, context); |
| 1544 | size_t offset = QIntC::to_size(input.tell()); | 1546 | size_t offset = QIntC::to_size(input.tell()); |
| 1545 | while (offset < object_str.length()) { | 1547 | while (offset < object_str.length()) { |
| 1546 | if (!isspace(object_str.at(offset))) { | 1548 | if (!isspace(object_str.at(offset))) { |
| @@ -1661,7 +1663,7 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1661,7 +1663,7 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1661 | auto input = is::OffsetBuffer(description, stream_data); | 1663 | auto input = is::OffsetBuffer(description, stream_data); |
| 1662 | Tokenizer tokenizer; | 1664 | Tokenizer tokenizer; |
| 1663 | tokenizer.allowEOF(); | 1665 | tokenizer.allowEOF(); |
| 1664 | - auto sp_description = QPDFParser::make_description(description, "content"); | 1666 | + auto sp_description = Parser::make_description(description, "content"); |
| 1665 | while (QIntC::to_size(input.tell()) < stream_length) { | 1667 | while (QIntC::to_size(input.tell()) < stream_length) { |
| 1666 | // Read a token and seek to the beginning. The offset we get from this process is the | 1668 | // Read a token and seek to the beginning. The offset we get from this process is the |
| 1667 | // beginning of the next non-ignorable (space, comment) token. This way, the offset and | 1669 | // beginning of the next non-ignorable (space, comment) token. This way, the offset and |
| @@ -1669,7 +1671,7 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1669,7 +1671,7 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1669 | tokenizer.nextToken(input, "content", true); | 1671 | tokenizer.nextToken(input, "content", true); |
| 1670 | qpdf_offset_t offset = input.getLastOffset(); | 1672 | qpdf_offset_t offset = input.getLastOffset(); |
| 1671 | input.seek(offset, SEEK_SET); | 1673 | input.seek(offset, SEEK_SET); |
| 1672 | - auto obj = QPDFParser::parse_content(input, sp_description, tokenizer, context); | 1674 | + auto obj = Parser::parse_content(input, sp_description, tokenizer, context); |
| 1673 | if (!obj) { | 1675 | if (!obj) { |
| 1674 | // EOF | 1676 | // EOF |
| 1675 | break; | 1677 | break; |
| @@ -1678,7 +1680,7 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1678,7 +1680,7 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1678 | if (callbacks) { | 1680 | if (callbacks) { |
| 1679 | callbacks->handleObject(obj, QIntC::to_size(offset), length); | 1681 | callbacks->handleObject(obj, QIntC::to_size(offset), length); |
| 1680 | } | 1682 | } |
| 1681 | - if (obj.isOperator() && (obj.getOperatorValue() == "ID")) { | 1683 | + if (obj.isOperator() && obj.getOperatorValue() == "ID") { |
| 1682 | // Discard next character; it is the space after ID that terminated the token. Read | 1684 | // Discard next character; it is the space after ID that terminated the token. Read |
| 1683 | // until end of inline image. | 1685 | // until end of inline image. |
| 1684 | char ch; | 1686 | char ch; |
| @@ -1731,7 +1733,7 @@ QPDFObjectHandle::parse( | @@ -1731,7 +1733,7 @@ QPDFObjectHandle::parse( | ||
| 1731 | StringDecrypter* decrypter, | 1733 | StringDecrypter* decrypter, |
| 1732 | QPDF* context) | 1734 | QPDF* context) |
| 1733 | { | 1735 | { |
| 1734 | - return QPDFParser::parse(*input, object_description, tokenizer, empty, decrypter, context); | 1736 | + return Parser::parse(*input, object_description, tokenizer, empty, decrypter, context); |
| 1735 | } | 1737 | } |
| 1736 | 1738 | ||
| 1737 | qpdf_offset_t | 1739 | qpdf_offset_t |
libqpdf/QPDFParser.cc
| @@ -46,12 +46,13 @@ class QPDF::Doc::ParseGuard | @@ -46,12 +46,13 @@ class QPDF::Doc::ParseGuard | ||
| 46 | }; | 46 | }; |
| 47 | 47 | ||
| 48 | using ParseGuard = QPDF::Doc::ParseGuard; | 48 | using ParseGuard = QPDF::Doc::ParseGuard; |
| 49 | +using Parser = qpdf::impl::Parser; | ||
| 49 | 50 | ||
| 50 | QPDFObjectHandle | 51 | QPDFObjectHandle |
| 51 | -QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) | 52 | +Parser::parse(InputSource& input, std::string const& object_description, QPDF* context) |
| 52 | { | 53 | { |
| 53 | qpdf::Tokenizer tokenizer; | 54 | qpdf::Tokenizer tokenizer; |
| 54 | - if (auto result = QPDFParser( | 55 | + if (auto result = Parser( |
| 55 | input, | 56 | input, |
| 56 | make_description(input.getName(), object_description), | 57 | make_description(input.getName(), object_description), |
| 57 | object_description, | 58 | object_description, |
| @@ -66,14 +67,14 @@ QPDFParser::parse(InputSource& input, std::string const& object_description, QPD | @@ -66,14 +67,14 @@ QPDFParser::parse(InputSource& input, std::string const& object_description, QPD | ||
| 66 | } | 67 | } |
| 67 | 68 | ||
| 68 | QPDFObjectHandle | 69 | QPDFObjectHandle |
| 69 | -QPDFParser::parse_content( | 70 | +Parser::parse_content( |
| 70 | InputSource& input, | 71 | InputSource& input, |
| 71 | std::shared_ptr<QPDFObject::Description> sp_description, | 72 | std::shared_ptr<QPDFObject::Description> sp_description, |
| 72 | qpdf::Tokenizer& tokenizer, | 73 | qpdf::Tokenizer& tokenizer, |
| 73 | QPDF* context) | 74 | QPDF* context) |
| 74 | { | 75 | { |
| 75 | static const std::string content("content"); // GCC12 - make constexpr | 76 | static const std::string content("content"); // GCC12 - make constexpr |
| 76 | - auto p = QPDFParser( | 77 | + auto p = Parser( |
| 77 | input, | 78 | input, |
| 78 | std::move(sp_description), | 79 | std::move(sp_description), |
| 79 | content, | 80 | content, |
| @@ -93,7 +94,7 @@ QPDFParser::parse_content( | @@ -93,7 +94,7 @@ QPDFParser::parse_content( | ||
| 93 | } | 94 | } |
| 94 | 95 | ||
| 95 | QPDFObjectHandle | 96 | QPDFObjectHandle |
| 96 | -QPDFParser::parse( | 97 | +Parser::parse( |
| 97 | InputSource& input, | 98 | InputSource& input, |
| 98 | std::string const& object_description, | 99 | std::string const& object_description, |
| 99 | QPDFTokenizer& tokenizer, | 100 | QPDFTokenizer& tokenizer, |
| @@ -103,7 +104,7 @@ QPDFParser::parse( | @@ -103,7 +104,7 @@ QPDFParser::parse( | ||
| 103 | { | 104 | { |
| 104 | // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the | 105 | // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the |
| 105 | // only user of the 'empty' member. When removing this overload also remove 'empty'. | 106 | // only user of the 'empty' member. When removing this overload also remove 'empty'. |
| 106 | - auto p = QPDFParser( | 107 | + auto p = Parser( |
| 107 | input, | 108 | input, |
| 108 | make_description(input.getName(), object_description), | 109 | make_description(input.getName(), object_description), |
| 109 | object_description, | 110 | object_description, |
| @@ -120,7 +121,7 @@ QPDFParser::parse( | @@ -120,7 +121,7 @@ QPDFParser::parse( | ||
| 120 | } | 121 | } |
| 121 | 122 | ||
| 122 | QPDFObjectHandle | 123 | QPDFObjectHandle |
| 123 | -QPDFParser::parse( | 124 | +Parser::parse( |
| 124 | InputSource& input, | 125 | InputSource& input, |
| 125 | std::string const& object_description, | 126 | std::string const& object_description, |
| 126 | qpdf::Tokenizer& tokenizer, | 127 | qpdf::Tokenizer& tokenizer, |
| @@ -128,7 +129,7 @@ QPDFParser::parse( | @@ -128,7 +129,7 @@ QPDFParser::parse( | ||
| 128 | QPDF& context, | 129 | QPDF& context, |
| 129 | bool sanity_checks) | 130 | bool sanity_checks) |
| 130 | { | 131 | { |
| 131 | - return QPDFParser( | 132 | + return Parser( |
| 132 | input, | 133 | input, |
| 133 | make_description(input.getName(), object_description), | 134 | make_description(input.getName(), object_description), |
| 134 | object_description, | 135 | object_description, |
| @@ -143,10 +144,10 @@ QPDFParser::parse( | @@ -143,10 +144,10 @@ QPDFParser::parse( | ||
| 143 | } | 144 | } |
| 144 | 145 | ||
| 145 | QPDFObjectHandle | 146 | QPDFObjectHandle |
| 146 | -QPDFParser::parse( | 147 | +Parser::parse( |
| 147 | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) | 148 | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) |
| 148 | { | 149 | { |
| 149 | - return QPDFParser( | 150 | + return Parser( |
| 150 | input, | 151 | input, |
| 151 | std::make_shared<QPDFObject::Description>( | 152 | std::make_shared<QPDFObject::Description>( |
| 152 | QPDFObject::ObjStreamDescr(stream_id, obj_id)), | 153 | QPDFObject::ObjStreamDescr(stream_id, obj_id)), |
| @@ -161,7 +162,7 @@ QPDFParser::parse( | @@ -161,7 +162,7 @@ QPDFParser::parse( | ||
| 161 | } | 162 | } |
| 162 | 163 | ||
| 163 | QPDFObjectHandle | 164 | QPDFObjectHandle |
| 164 | -QPDFParser::parse(bool content_stream) | 165 | +Parser::parse(bool content_stream) |
| 165 | { | 166 | { |
| 166 | try { | 167 | try { |
| 167 | return parse_first(content_stream); | 168 | return parse_first(content_stream); |
| @@ -178,7 +179,7 @@ QPDFParser::parse(bool content_stream) | @@ -178,7 +179,7 @@ QPDFParser::parse(bool content_stream) | ||
| 178 | } | 179 | } |
| 179 | 180 | ||
| 180 | QPDFObjectHandle | 181 | QPDFObjectHandle |
| 181 | -QPDFParser::parse_first(bool content_stream) | 182 | +Parser::parse_first(bool content_stream) |
| 182 | { | 183 | { |
| 183 | // This method must take care not to resolve any objects. Don't check the type of any object | 184 | // This method must take care not to resolve any objects. Don't check the type of any object |
| 184 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side | 185 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
| @@ -279,7 +280,7 @@ QPDFParser::parse_first(bool content_stream) | @@ -279,7 +280,7 @@ QPDFParser::parse_first(bool content_stream) | ||
| 279 | } | 280 | } |
| 280 | 281 | ||
| 281 | QPDFObjectHandle | 282 | QPDFObjectHandle |
| 282 | -QPDFParser::parse_remainder(bool content_stream) | 283 | +Parser::parse_remainder(bool content_stream) |
| 283 | { | 284 | { |
| 284 | // This method must take care not to resolve any objects. Don't check the type of any object | 285 | // This method must take care not to resolve any objects. Don't check the type of any object |
| 285 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side | 286 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
| @@ -312,7 +313,7 @@ QPDFParser::parse_remainder(bool content_stream) | @@ -312,7 +313,7 @@ QPDFParser::parse_remainder(bool content_stream) | ||
| 312 | tokenizer_.getValue() == "R") { | 313 | tokenizer_.getValue() == "R") { |
| 313 | if (!context_) { | 314 | if (!context_) { |
| 314 | throw std::logic_error( | 315 | throw std::logic_error( |
| 315 | - "QPDFParser::parse called without context on an object with indirect " | 316 | + "Parser::parse called without context on an object with indirect " |
| 316 | "references"); | 317 | "references"); |
| 317 | } | 318 | } |
| 318 | auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]); | 319 | auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]); |
| @@ -533,7 +534,7 @@ QPDFParser::parse_remainder(bool content_stream) | @@ -533,7 +534,7 @@ QPDFParser::parse_remainder(bool content_stream) | ||
| 533 | } | 534 | } |
| 534 | 535 | ||
| 535 | void | 536 | void |
| 536 | -QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) | 537 | +Parser::add(std::shared_ptr<QPDFObject>&& obj) |
| 537 | { | 538 | { |
| 538 | if (frame_->state != st_dictionary_value) { | 539 | if (frame_->state != st_dictionary_value) { |
| 539 | // If state is st_dictionary_key then there is a missing key. Push onto olist for | 540 | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
| @@ -548,7 +549,7 @@ QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) | @@ -548,7 +549,7 @@ QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) | ||
| 548 | } | 549 | } |
| 549 | 550 | ||
| 550 | void | 551 | void |
| 551 | -QPDFParser::add_null() | 552 | +Parser::add_null() |
| 552 | { | 553 | { |
| 553 | const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>(); | 554 | const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>(); |
| 554 | 555 | ||
| @@ -566,7 +567,7 @@ QPDFParser::add_null() | @@ -566,7 +567,7 @@ QPDFParser::add_null() | ||
| 566 | } | 567 | } |
| 567 | 568 | ||
| 568 | void | 569 | void |
| 569 | -QPDFParser::add_bad_null(std::string const& msg) | 570 | +Parser::add_bad_null(std::string const& msg) |
| 570 | { | 571 | { |
| 571 | warn(msg); | 572 | warn(msg); |
| 572 | check_too_many_bad_tokens(); | 573 | check_too_many_bad_tokens(); |
| @@ -574,7 +575,7 @@ QPDFParser::add_bad_null(std::string const& msg) | @@ -574,7 +575,7 @@ QPDFParser::add_bad_null(std::string const& msg) | ||
| 574 | } | 575 | } |
| 575 | 576 | ||
| 576 | void | 577 | void |
| 577 | -QPDFParser::add_int(int count) | 578 | +Parser::add_int(int count) |
| 578 | { | 579 | { |
| 579 | auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]); | 580 | auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]); |
| 580 | obj->setDescription(context_, description_, last_offset_buffer_[count % 2]); | 581 | obj->setDescription(context_, description_, last_offset_buffer_[count % 2]); |
| @@ -583,7 +584,7 @@ QPDFParser::add_int(int count) | @@ -583,7 +584,7 @@ QPDFParser::add_int(int count) | ||
| 583 | 584 | ||
| 584 | template <typename T, typename... Args> | 585 | template <typename T, typename... Args> |
| 585 | void | 586 | void |
| 586 | -QPDFParser::add_scalar(Args&&... args) | 587 | +Parser::add_scalar(Args&&... args) |
| 587 | { | 588 | { |
| 588 | auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_); | 589 | auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_); |
| 589 | if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) { | 590 | if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) { |
| @@ -599,7 +600,7 @@ QPDFParser::add_scalar(Args&&... args) | @@ -599,7 +600,7 @@ QPDFParser::add_scalar(Args&&... args) | ||
| 599 | 600 | ||
| 600 | template <typename T, typename... Args> | 601 | template <typename T, typename... Args> |
| 601 | QPDFObjectHandle | 602 | QPDFObjectHandle |
| 602 | -QPDFParser::with_description(Args&&... args) | 603 | +Parser::with_description(Args&&... args) |
| 603 | { | 604 | { |
| 604 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 605 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
| 605 | obj->setDescription(context_, description_, start_); | 606 | obj->setDescription(context_, description_, start_); |
| @@ -607,7 +608,7 @@ QPDFParser::with_description(Args&&... args) | @@ -607,7 +608,7 @@ QPDFParser::with_description(Args&&... args) | ||
| 607 | } | 608 | } |
| 608 | 609 | ||
| 609 | void | 610 | void |
| 610 | -QPDFParser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset) | 611 | +Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset) |
| 611 | { | 612 | { |
| 612 | if (obj) { | 613 | if (obj) { |
| 613 | obj->setDescription(context_, description_, parsed_offset); | 614 | obj->setDescription(context_, description_, parsed_offset); |
| @@ -615,7 +616,7 @@ QPDFParser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset) | @@ -615,7 +616,7 @@ QPDFParser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset) | ||
| 615 | } | 616 | } |
| 616 | 617 | ||
| 617 | void | 618 | void |
| 618 | -QPDFParser::fix_missing_keys() | 619 | +Parser::fix_missing_keys() |
| 619 | { | 620 | { |
| 620 | std::set<std::string> names; | 621 | std::set<std::string> names; |
| 621 | for (auto& obj: frame_->olist) { | 622 | for (auto& obj: frame_->olist) { |
| @@ -641,7 +642,7 @@ QPDFParser::fix_missing_keys() | @@ -641,7 +642,7 @@ QPDFParser::fix_missing_keys() | ||
| 641 | } | 642 | } |
| 642 | 643 | ||
| 643 | void | 644 | void |
| 644 | -QPDFParser::check_too_many_bad_tokens() | 645 | +Parser::check_too_many_bad_tokens() |
| 645 | { | 646 | { |
| 646 | auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_); | 647 | auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_); |
| 647 | if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) { | 648 | if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) { |
| @@ -676,7 +677,7 @@ QPDFParser::check_too_many_bad_tokens() | @@ -676,7 +677,7 @@ QPDFParser::check_too_many_bad_tokens() | ||
| 676 | } | 677 | } |
| 677 | 678 | ||
| 678 | void | 679 | void |
| 679 | -QPDFParser::limits_error(std::string const& limit, std::string const& msg) | 680 | +Parser::limits_error(std::string const& limit, std::string const& msg) |
| 680 | { | 681 | { |
| 681 | Limits::error(); | 682 | Limits::error(); |
| 682 | warn("limits error("s + limit + "): " + msg); | 683 | warn("limits error("s + limit + "): " + msg); |
| @@ -684,7 +685,7 @@ QPDFParser::limits_error(std::string const& limit, std::string const& msg) | @@ -684,7 +685,7 @@ QPDFParser::limits_error(std::string const& limit, std::string const& msg) | ||
| 684 | } | 685 | } |
| 685 | 686 | ||
| 686 | void | 687 | void |
| 687 | -QPDFParser::warn(QPDFExc const& e) const | 688 | +Parser::warn(QPDFExc const& e) const |
| 688 | { | 689 | { |
| 689 | // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the | 690 | // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the |
| 690 | // object. If parsing for some other reason, such as an explicit creation of an object from a | 691 | // object. If parsing for some other reason, such as an explicit creation of an object from a |
| @@ -697,7 +698,7 @@ QPDFParser::warn(QPDFExc const& e) const | @@ -697,7 +698,7 @@ QPDFParser::warn(QPDFExc const& e) const | ||
| 697 | } | 698 | } |
| 698 | 699 | ||
| 699 | void | 700 | void |
| 700 | -QPDFParser::warn_duplicate_key() | 701 | +Parser::warn_duplicate_key() |
| 701 | { | 702 | { |
| 702 | warn( | 703 | warn( |
| 703 | frame_->offset, | 704 | frame_->offset, |
| @@ -706,7 +707,7 @@ QPDFParser::warn_duplicate_key() | @@ -706,7 +707,7 @@ QPDFParser::warn_duplicate_key() | ||
| 706 | } | 707 | } |
| 707 | 708 | ||
| 708 | void | 709 | void |
| 709 | -QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const | 710 | +Parser::warn(qpdf_offset_t offset, std::string const& msg) const |
| 710 | { | 711 | { |
| 711 | if (stream_id_) { | 712 | if (stream_id_) { |
| 712 | std::string descr = "object "s + std::to_string(obj_id_) + " 0"; | 713 | std::string descr = "object "s + std::to_string(obj_id_) + " 0"; |
| @@ -718,7 +719,7 @@ QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const | @@ -718,7 +719,7 @@ QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const | ||
| 718 | } | 719 | } |
| 719 | 720 | ||
| 720 | void | 721 | void |
| 721 | -QPDFParser::warn(std::string const& msg) const | 722 | +Parser::warn(std::string const& msg) const |
| 722 | { | 723 | { |
| 723 | warn(input_.getLastOffset(), msg); | 724 | warn(input_.getLastOffset(), msg); |
| 724 | } | 725 | } |
libqpdf/QPDF_objects.cc
| @@ -25,6 +25,7 @@ using namespace qpdf; | @@ -25,6 +25,7 @@ using namespace qpdf; | ||
| 25 | using namespace std::literals; | 25 | using namespace std::literals; |
| 26 | 26 | ||
| 27 | using Objects = QPDF::Doc::Objects; | 27 | using Objects = QPDF::Doc::Objects; |
| 28 | +using Parser = impl::Parser; | ||
| 28 | 29 | ||
| 29 | QPDFXRefEntry::QPDFXRefEntry() = default; | 30 | QPDFXRefEntry::QPDFXRefEntry() = default; |
| 30 | 31 | ||
| @@ -1287,7 +1288,7 @@ Objects::readTrailer() | @@ -1287,7 +1288,7 @@ Objects::readTrailer() | ||
| 1287 | { | 1288 | { |
| 1288 | qpdf_offset_t offset = m->file->tell(); | 1289 | qpdf_offset_t offset = m->file->tell(); |
| 1289 | auto object = | 1290 | auto object = |
| 1290 | - QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref); | 1291 | + Parser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref); |
| 1291 | if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) { | 1292 | if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) { |
| 1292 | warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); | 1293 | warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); |
| 1293 | } | 1294 | } |
| @@ -1304,7 +1305,7 @@ Objects::readObject(std::string const& description, QPDFObjGen og) | @@ -1304,7 +1305,7 @@ Objects::readObject(std::string const& description, QPDFObjGen og) | ||
| 1304 | 1305 | ||
| 1305 | StringDecrypter decrypter{&qpdf, og}; | 1306 | StringDecrypter decrypter{&qpdf, og}; |
| 1306 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; | 1307 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; |
| 1307 | - auto object = QPDFParser::parse( | 1308 | + auto object = Parser::parse( |
| 1308 | *m->file, | 1309 | *m->file, |
| 1309 | m->last_object_description, | 1310 | m->last_object_description, |
| 1310 | m->tokenizer, | 1311 | m->tokenizer, |
| @@ -1834,7 +1835,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) | @@ -1834,7 +1835,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) | ||
| 1834 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && | 1835 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && |
| 1835 | entry->second.getObjStreamNumber() == obj_stream_number) { | 1836 | entry->second.getObjStreamNumber() == obj_stream_number) { |
| 1836 | is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); | 1837 | is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); |
| 1837 | - if (auto oh = QPDFParser::parse(in, obj_stream_number, obj_id, m->tokenizer, qpdf)) { | 1838 | + if (auto oh = Parser::parse(in, obj_stream_number, obj_id, m->tokenizer, qpdf)) { |
| 1838 | updateCache(og, oh.obj_sp(), end_before_space, end_after_space); | 1839 | updateCache(og, oh.obj_sp(), end_before_space, end_after_space); |
| 1839 | } | 1840 | } |
| 1840 | } else { | 1841 | } else { |
libqpdf/qpdf/QPDFParser.hh
| @@ -13,153 +13,157 @@ | @@ -13,153 +13,157 @@ | ||
| 13 | using namespace qpdf; | 13 | using namespace qpdf; |
| 14 | using namespace qpdf::global; | 14 | using namespace qpdf::global; |
| 15 | 15 | ||
| 16 | -class QPDFParser | 16 | +namespace qpdf::impl |
| 17 | { | 17 | { |
| 18 | - public: | ||
| 19 | - class Error: public std::exception | 18 | + class Parser |
| 20 | { | 19 | { |
| 21 | public: | 20 | public: |
| 22 | - Error() = default; | ||
| 23 | - virtual ~Error() noexcept = default; | ||
| 24 | - }; | ||
| 25 | - | ||
| 26 | - static QPDFObjectHandle | ||
| 27 | - parse(InputSource& input, std::string const& object_description, QPDF* context); | ||
| 28 | - | ||
| 29 | - static QPDFObjectHandle parse_content( | ||
| 30 | - InputSource& input, | ||
| 31 | - std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 32 | - qpdf::Tokenizer& tokenizer, | ||
| 33 | - QPDF* context); | ||
| 34 | - | ||
| 35 | - // For use by deprecated QPDFObjectHandle::parse. | ||
| 36 | - static QPDFObjectHandle parse( | ||
| 37 | - InputSource& input, | ||
| 38 | - std::string const& object_description, | ||
| 39 | - QPDFTokenizer& tokenizer, | ||
| 40 | - bool& empty, | ||
| 41 | - QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 42 | - QPDF* context); | ||
| 43 | - | ||
| 44 | - // For use by QPDF. | ||
| 45 | - static QPDFObjectHandle parse( | ||
| 46 | - InputSource& input, | ||
| 47 | - std::string const& object_description, | ||
| 48 | - qpdf::Tokenizer& tokenizer, | ||
| 49 | - QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 50 | - QPDF& context, | ||
| 51 | - bool sanity_checks); | ||
| 52 | - | ||
| 53 | - static QPDFObjectHandle parse( | ||
| 54 | - qpdf::is::OffsetBuffer& input, | ||
| 55 | - int stream_id, | ||
| 56 | - int obj_id, | ||
| 57 | - qpdf::Tokenizer& tokenizer, | ||
| 58 | - QPDF& context); | ||
| 59 | - | ||
| 60 | - static std::shared_ptr<QPDFObject::Description> | ||
| 61 | - make_description(std::string const& input_name, std::string const& object_description) | ||
| 62 | - { | ||
| 63 | - using namespace std::literals; | ||
| 64 | - return std::make_shared<QPDFObject::Description>( | ||
| 65 | - input_name + ", " + object_description + " at offset $PO"); | ||
| 66 | - } | ||
| 67 | - | ||
| 68 | - private: | ||
| 69 | - QPDFParser( | ||
| 70 | - InputSource& input, | ||
| 71 | - std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 72 | - std::string const& object_description, | ||
| 73 | - qpdf::Tokenizer& tokenizer, | ||
| 74 | - QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 75 | - QPDF* context, | ||
| 76 | - bool parse_pdf, | ||
| 77 | - int stream_id = 0, | ||
| 78 | - int obj_id = 0, | ||
| 79 | - bool sanity_checks = false) : | ||
| 80 | - input_(input), | ||
| 81 | - object_description_(object_description), | ||
| 82 | - tokenizer_(tokenizer), | ||
| 83 | - decrypter_(decrypter), | ||
| 84 | - context_(context), | ||
| 85 | - description_(std::move(sp_description)), | ||
| 86 | - parse_pdf_(parse_pdf), | ||
| 87 | - stream_id_(stream_id), | ||
| 88 | - obj_id_(obj_id), | ||
| 89 | - sanity_checks_(sanity_checks) | ||
| 90 | - { | ||
| 91 | - } | ||
| 92 | - | ||
| 93 | - // Parser state. Note: | ||
| 94 | - // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) | ||
| 95 | - enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; | 21 | + class Error: public std::exception |
| 22 | + { | ||
| 23 | + public: | ||
| 24 | + Error() = default; | ||
| 25 | + virtual ~Error() noexcept = default; | ||
| 26 | + }; | ||
| 27 | + | ||
| 28 | + static QPDFObjectHandle | ||
| 29 | + parse(InputSource& input, std::string const& object_description, QPDF* context); | ||
| 30 | + | ||
| 31 | + static QPDFObjectHandle parse_content( | ||
| 32 | + InputSource& input, | ||
| 33 | + std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 34 | + qpdf::Tokenizer& tokenizer, | ||
| 35 | + QPDF* context); | ||
| 36 | + | ||
| 37 | + // For use by deprecated QPDFObjectHandle::parse. | ||
| 38 | + static QPDFObjectHandle parse( | ||
| 39 | + InputSource& input, | ||
| 40 | + std::string const& object_description, | ||
| 41 | + QPDFTokenizer& tokenizer, | ||
| 42 | + bool& empty, | ||
| 43 | + QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 44 | + QPDF* context); | ||
| 45 | + | ||
| 46 | + // For use by QPDF. | ||
| 47 | + static QPDFObjectHandle parse( | ||
| 48 | + InputSource& input, | ||
| 49 | + std::string const& object_description, | ||
| 50 | + qpdf::Tokenizer& tokenizer, | ||
| 51 | + QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 52 | + QPDF& context, | ||
| 53 | + bool sanity_checks); | ||
| 54 | + | ||
| 55 | + static QPDFObjectHandle parse( | ||
| 56 | + qpdf::is::OffsetBuffer& input, | ||
| 57 | + int stream_id, | ||
| 58 | + int obj_id, | ||
| 59 | + qpdf::Tokenizer& tokenizer, | ||
| 60 | + QPDF& context); | ||
| 61 | + | ||
| 62 | + static std::shared_ptr<QPDFObject::Description> | ||
| 63 | + make_description(std::string const& input_name, std::string const& object_description) | ||
| 64 | + { | ||
| 65 | + using namespace std::literals; | ||
| 66 | + return std::make_shared<QPDFObject::Description>( | ||
| 67 | + input_name + ", " + object_description + " at offset $PO"); | ||
| 68 | + } | ||
| 96 | 69 | ||
| 97 | - struct StackFrame | ||
| 98 | - { | ||
| 99 | - StackFrame(InputSource& input, parser_state_e state) : | ||
| 100 | - state(state), | ||
| 101 | - offset(input.tell()) | 70 | + private: |
| 71 | + Parser( | ||
| 72 | + InputSource& input, | ||
| 73 | + std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 74 | + std::string const& object_description, | ||
| 75 | + qpdf::Tokenizer& tokenizer, | ||
| 76 | + QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 77 | + QPDF* context, | ||
| 78 | + bool parse_pdf, | ||
| 79 | + int stream_id = 0, | ||
| 80 | + int obj_id = 0, | ||
| 81 | + bool sanity_checks = false) : | ||
| 82 | + input_(input), | ||
| 83 | + object_description_(object_description), | ||
| 84 | + tokenizer_(tokenizer), | ||
| 85 | + decrypter_(decrypter), | ||
| 86 | + context_(context), | ||
| 87 | + description_(std::move(sp_description)), | ||
| 88 | + parse_pdf_(parse_pdf), | ||
| 89 | + stream_id_(stream_id), | ||
| 90 | + obj_id_(obj_id), | ||
| 91 | + sanity_checks_(sanity_checks) | ||
| 102 | { | 92 | { |
| 103 | } | 93 | } |
| 104 | 94 | ||
| 105 | - std::vector<QPDFObjectHandle> olist; | ||
| 106 | - std::map<std::string, QPDFObjectHandle> dict; | ||
| 107 | - parser_state_e state; | ||
| 108 | - std::string key; | ||
| 109 | - qpdf_offset_t offset; | ||
| 110 | - std::string contents_string; | ||
| 111 | - qpdf_offset_t contents_offset{-1}; | ||
| 112 | - int null_count{0}; | ||
| 113 | - }; | 95 | + // Parser state. Note: |
| 96 | + // state <= st_dictionary_value == (state = st_dictionary_key || state = | ||
| 97 | + // st_dictionary_value) | ||
| 98 | + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; | ||
| 114 | 99 | ||
| 115 | - QPDFObjectHandle parse(bool content_stream = false); | ||
| 116 | - QPDFObjectHandle parse_first(bool content_stream); | ||
| 117 | - QPDFObjectHandle parse_remainder(bool content_stream); | ||
| 118 | - void add(std::shared_ptr<QPDFObject>&& obj); | ||
| 119 | - void add_null(); | ||
| 120 | - void add_bad_null(std::string const& msg); | ||
| 121 | - void add_int(int count); | ||
| 122 | - template <typename T, typename... Args> | ||
| 123 | - void add_scalar(Args&&... args); | ||
| 124 | - void check_too_many_bad_tokens(); | ||
| 125 | - void warn_duplicate_key(); | ||
| 126 | - void fix_missing_keys(); | ||
| 127 | - [[noreturn]] void limits_error(std::string const& limit, std::string const& msg); | ||
| 128 | - void warn(qpdf_offset_t offset, std::string const& msg) const; | ||
| 129 | - void warn(std::string const& msg) const; | ||
| 130 | - void warn(QPDFExc const&) const; | ||
| 131 | - template <typename T, typename... Args> | ||
| 132 | - // Create a new scalar object complete with parsed offset and description. | ||
| 133 | - // NB the offset includes any leading whitespace. | ||
| 134 | - QPDFObjectHandle with_description(Args&&... args); | ||
| 135 | - void set_description(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); | ||
| 136 | - InputSource& input_; | ||
| 137 | - std::string const& object_description_; | ||
| 138 | - qpdf::Tokenizer& tokenizer_; | ||
| 139 | - QPDFObjectHandle::StringDecrypter* decrypter_; | ||
| 140 | - QPDF* context_; | ||
| 141 | - std::shared_ptr<QPDFObject::Description> description_; | ||
| 142 | - bool parse_pdf_{false}; | ||
| 143 | - int stream_id_{0}; | ||
| 144 | - int obj_id_{0}; | ||
| 145 | - bool sanity_checks_{false}; | ||
| 146 | - | ||
| 147 | - std::vector<StackFrame> stack_; | ||
| 148 | - StackFrame* frame_{nullptr}; | ||
| 149 | - // Number of recent bad tokens. This will always be > 0 once a bad token has been encountered as | ||
| 150 | - // it only gets incremented or reset when a bad token is encountered. | ||
| 151 | - int bad_count_{0}; | ||
| 152 | - // Number of bad tokens (remaining) before giving up. | ||
| 153 | - uint32_t max_bad_count_{Limits::parser_max_errors()}; | ||
| 154 | - // Number of good tokens since last bad token. Irrelevant if bad_count == 0. | ||
| 155 | - int good_count_{0}; | ||
| 156 | - // Start offset including any leading whitespace. | ||
| 157 | - qpdf_offset_t start_{0}; | ||
| 158 | - // Number of successive integer tokens. | ||
| 159 | - int int_count_{0}; | ||
| 160 | - long long int_buffer_[2]{0, 0}; | ||
| 161 | - qpdf_offset_t last_offset_buffer_[2]{0, 0}; | ||
| 162 | - bool empty_{false}; | ||
| 163 | -}; | 100 | + struct StackFrame |
| 101 | + { | ||
| 102 | + StackFrame(InputSource& input, parser_state_e state) : | ||
| 103 | + state(state), | ||
| 104 | + offset(input.tell()) | ||
| 105 | + { | ||
| 106 | + } | ||
| 107 | + | ||
| 108 | + std::vector<QPDFObjectHandle> olist; | ||
| 109 | + std::map<std::string, QPDFObjectHandle> dict; | ||
| 110 | + parser_state_e state; | ||
| 111 | + std::string key; | ||
| 112 | + qpdf_offset_t offset; | ||
| 113 | + std::string contents_string; | ||
| 114 | + qpdf_offset_t contents_offset{-1}; | ||
| 115 | + int null_count{0}; | ||
| 116 | + }; | ||
| 117 | + | ||
| 118 | + QPDFObjectHandle parse(bool content_stream = false); | ||
| 119 | + QPDFObjectHandle parse_first(bool content_stream); | ||
| 120 | + QPDFObjectHandle parse_remainder(bool content_stream); | ||
| 121 | + void add(std::shared_ptr<QPDFObject>&& obj); | ||
| 122 | + void add_null(); | ||
| 123 | + void add_bad_null(std::string const& msg); | ||
| 124 | + void add_int(int count); | ||
| 125 | + template <typename T, typename... Args> | ||
| 126 | + void add_scalar(Args&&... args); | ||
| 127 | + void check_too_many_bad_tokens(); | ||
| 128 | + void warn_duplicate_key(); | ||
| 129 | + void fix_missing_keys(); | ||
| 130 | + [[noreturn]] void limits_error(std::string const& limit, std::string const& msg); | ||
| 131 | + void warn(qpdf_offset_t offset, std::string const& msg) const; | ||
| 132 | + void warn(std::string const& msg) const; | ||
| 133 | + void warn(QPDFExc const&) const; | ||
| 134 | + template <typename T, typename... Args> | ||
| 135 | + // Create a new scalar object complete with parsed offset and description. | ||
| 136 | + // NB the offset includes any leading whitespace. | ||
| 137 | + QPDFObjectHandle with_description(Args&&... args); | ||
| 138 | + void set_description(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); | ||
| 139 | + InputSource& input_; | ||
| 140 | + std::string const& object_description_; | ||
| 141 | + qpdf::Tokenizer& tokenizer_; | ||
| 142 | + QPDFObjectHandle::StringDecrypter* decrypter_; | ||
| 143 | + QPDF* context_; | ||
| 144 | + std::shared_ptr<QPDFObject::Description> description_; | ||
| 145 | + bool parse_pdf_{false}; | ||
| 146 | + int stream_id_{0}; | ||
| 147 | + int obj_id_{0}; | ||
| 148 | + bool sanity_checks_{false}; | ||
| 149 | + | ||
| 150 | + std::vector<StackFrame> stack_; | ||
| 151 | + StackFrame* frame_{nullptr}; | ||
| 152 | + // Number of recent bad tokens. This will always be > 0 once a bad token has been | ||
| 153 | + // encountered as it only gets incremented or reset when a bad token is encountered. | ||
| 154 | + int bad_count_{0}; | ||
| 155 | + // Number of bad tokens (remaining) before giving up. | ||
| 156 | + uint32_t max_bad_count_{Limits::parser_max_errors()}; | ||
| 157 | + // Number of good tokens since last bad token. Irrelevant if bad_count == 0. | ||
| 158 | + int good_count_{0}; | ||
| 159 | + // Start offset including any leading whitespace. | ||
| 160 | + qpdf_offset_t start_{0}; | ||
| 161 | + // Number of successive integer tokens. | ||
| 162 | + int int_count_{0}; | ||
| 163 | + long long int_buffer_[2]{0, 0}; | ||
| 164 | + qpdf_offset_t last_offset_buffer_[2]{0, 0}; | ||
| 165 | + bool empty_{false}; | ||
| 166 | + }; | ||
| 167 | +} // namespace qpdf::impl | ||
| 164 | 168 | ||
| 165 | #endif // QPDFPARSER_HH | 169 | #endif // QPDFPARSER_HH |
qpdf/qtest/qpdf/parse-object.out
| 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] | 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] |
| 2 | -logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references | 2 | +logic error parsing indirect: Parser::parse called without context on an object with indirect references |
| 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string | 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string |
| 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string | 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string |
| 5 | WARNING: parsed object: treating unexpected brace token as null | 5 | WARNING: parsed object: treating unexpected brace token as null |