Commit f2ccff52883adde3f093b1fd6cd4c66d3a5f6cdf

Authored by copilot-swe-agent[bot]
Committed by m-holger
1 parent 3d83638d

Rename QPDFParser to qpdf::impl::Parser

Co-authored-by: m-holger <34626170+m-holger@users.noreply.github.com>
include/qpdf/QPDFObjectHandle.hh
@@ -61,11 +61,14 @@ class QPDFTokenizer; @@ -61,11 +61,14 @@ class QPDFTokenizer;
61 class QPDFExc; 61 class QPDFExc;
62 class Pl_QPDFTokenizer; 62 class Pl_QPDFTokenizer;
63 class QPDFMatrix; 63 class QPDFMatrix;
64 -class QPDFParser; 64 +namespace qpdf::impl
  65 +{
  66 + class Parser;
  67 +}
65 68
66 class QPDFObjectHandle: public qpdf::BaseHandle 69 class QPDFObjectHandle: public qpdf::BaseHandle
67 { 70 {
68 - friend class QPDFParser; 71 + friend class qpdf::impl::Parser;
69 72
70 public: 73 public:
71 // This class is used by replaceStreamData. It provides an alternative way of associating 74 // This class is used by replaceStreamData. It provides an alternative way of associating
include/qpdf/QPDFTokenizer.hh
@@ -31,6 +31,10 @@ @@ -31,6 +31,10 @@
31 namespace qpdf 31 namespace qpdf
32 { 32 {
33 class Tokenizer; 33 class Tokenizer;
  34 + namespace impl
  35 + {
  36 + class Parser;
  37 + }
34 } // namespace qpdf 38 } // namespace qpdf
35 39
36 class QPDFTokenizer 40 class QPDFTokenizer
@@ -203,7 +207,7 @@ class QPDFTokenizer @@ -203,7 +207,7 @@ class QPDFTokenizer
203 void expectInlineImage(InputSource& input); 207 void expectInlineImage(InputSource& input);
204 208
205 private: 209 private:
206 - friend class QPDFParser; 210 + friend class qpdf::impl::Parser;
207 211
208 QPDFTokenizer(QPDFTokenizer const&) = delete; 212 QPDFTokenizer(QPDFTokenizer const&) = delete;
209 QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; 213 QPDFTokenizer& operator=(QPDFTokenizer const&) = delete;
libqpdf/QPDFObjectHandle.cc
@@ -25,6 +25,8 @@ @@ -25,6 +25,8 @@
25 using namespace std::literals; 25 using namespace std::literals;
26 using namespace qpdf; 26 using namespace qpdf;
27 27
  28 +using Parser = impl::Parser;
  29 +
28 const Null Null::temp_; 30 const Null Null::temp_;
29 31
30 BaseHandle:: 32 BaseHandle::
@@ -1540,7 +1542,7 @@ QPDFObjectHandle::parse( @@ -1540,7 +1542,7 @@ QPDFObjectHandle::parse(
1540 QPDF* context, std::string const& object_str, std::string const& object_description) 1542 QPDF* context, std::string const& object_str, std::string const& object_description)
1541 { 1543 {
1542 auto input = is::OffsetBuffer("parsed object", object_str); 1544 auto input = is::OffsetBuffer("parsed object", object_str);
1543 - auto result = QPDFParser::parse(input, object_description, context); 1545 + auto result = Parser::parse(input, object_description, context);
1544 size_t offset = QIntC::to_size(input.tell()); 1546 size_t offset = QIntC::to_size(input.tell());
1545 while (offset < object_str.length()) { 1547 while (offset < object_str.length()) {
1546 if (!isspace(object_str.at(offset))) { 1548 if (!isspace(object_str.at(offset))) {
@@ -1661,7 +1663,7 @@ QPDFObjectHandle::parseContentStream_data( @@ -1661,7 +1663,7 @@ QPDFObjectHandle::parseContentStream_data(
1661 auto input = is::OffsetBuffer(description, stream_data); 1663 auto input = is::OffsetBuffer(description, stream_data);
1662 Tokenizer tokenizer; 1664 Tokenizer tokenizer;
1663 tokenizer.allowEOF(); 1665 tokenizer.allowEOF();
1664 - auto sp_description = QPDFParser::make_description(description, "content"); 1666 + auto sp_description = Parser::make_description(description, "content");
1665 while (QIntC::to_size(input.tell()) < stream_length) { 1667 while (QIntC::to_size(input.tell()) < stream_length) {
1666 // Read a token and seek to the beginning. The offset we get from this process is the 1668 // Read a token and seek to the beginning. The offset we get from this process is the
1667 // beginning of the next non-ignorable (space, comment) token. This way, the offset and 1669 // beginning of the next non-ignorable (space, comment) token. This way, the offset and
@@ -1669,7 +1671,7 @@ QPDFObjectHandle::parseContentStream_data( @@ -1669,7 +1671,7 @@ QPDFObjectHandle::parseContentStream_data(
1669 tokenizer.nextToken(input, "content", true); 1671 tokenizer.nextToken(input, "content", true);
1670 qpdf_offset_t offset = input.getLastOffset(); 1672 qpdf_offset_t offset = input.getLastOffset();
1671 input.seek(offset, SEEK_SET); 1673 input.seek(offset, SEEK_SET);
1672 - auto obj = QPDFParser::parse_content(input, sp_description, tokenizer, context); 1674 + auto obj = Parser::parse_content(input, sp_description, tokenizer, context);
1673 if (!obj) { 1675 if (!obj) {
1674 // EOF 1676 // EOF
1675 break; 1677 break;
@@ -1678,7 +1680,7 @@ QPDFObjectHandle::parseContentStream_data( @@ -1678,7 +1680,7 @@ QPDFObjectHandle::parseContentStream_data(
1678 if (callbacks) { 1680 if (callbacks) {
1679 callbacks->handleObject(obj, QIntC::to_size(offset), length); 1681 callbacks->handleObject(obj, QIntC::to_size(offset), length);
1680 } 1682 }
1681 - if (obj.isOperator() && (obj.getOperatorValue() == "ID")) { 1683 + if (obj.isOperator() && obj.getOperatorValue() == "ID") {
1682 // Discard next character; it is the space after ID that terminated the token. Read 1684 // Discard next character; it is the space after ID that terminated the token. Read
1683 // until end of inline image. 1685 // until end of inline image.
1684 char ch; 1686 char ch;
@@ -1731,7 +1733,7 @@ QPDFObjectHandle::parse( @@ -1731,7 +1733,7 @@ QPDFObjectHandle::parse(
1731 StringDecrypter* decrypter, 1733 StringDecrypter* decrypter,
1732 QPDF* context) 1734 QPDF* context)
1733 { 1735 {
1734 - return QPDFParser::parse(*input, object_description, tokenizer, empty, decrypter, context); 1736 + return Parser::parse(*input, object_description, tokenizer, empty, decrypter, context);
1735 } 1737 }
1736 1738
1737 qpdf_offset_t 1739 qpdf_offset_t
libqpdf/QPDFParser.cc
@@ -46,12 +46,13 @@ class QPDF::Doc::ParseGuard @@ -46,12 +46,13 @@ class QPDF::Doc::ParseGuard
46 }; 46 };
47 47
48 using ParseGuard = QPDF::Doc::ParseGuard; 48 using ParseGuard = QPDF::Doc::ParseGuard;
  49 +using Parser = qpdf::impl::Parser;
49 50
50 QPDFObjectHandle 51 QPDFObjectHandle
51 -QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) 52 +Parser::parse(InputSource& input, std::string const& object_description, QPDF* context)
52 { 53 {
53 qpdf::Tokenizer tokenizer; 54 qpdf::Tokenizer tokenizer;
54 - if (auto result = QPDFParser( 55 + if (auto result = Parser(
55 input, 56 input,
56 make_description(input.getName(), object_description), 57 make_description(input.getName(), object_description),
57 object_description, 58 object_description,
@@ -66,14 +67,14 @@ QPDFParser::parse(InputSource&amp; input, std::string const&amp; object_description, QPD @@ -66,14 +67,14 @@ QPDFParser::parse(InputSource&amp; input, std::string const&amp; object_description, QPD
66 } 67 }
67 68
68 QPDFObjectHandle 69 QPDFObjectHandle
69 -QPDFParser::parse_content( 70 +Parser::parse_content(
70 InputSource& input, 71 InputSource& input,
71 std::shared_ptr<QPDFObject::Description> sp_description, 72 std::shared_ptr<QPDFObject::Description> sp_description,
72 qpdf::Tokenizer& tokenizer, 73 qpdf::Tokenizer& tokenizer,
73 QPDF* context) 74 QPDF* context)
74 { 75 {
75 static const std::string content("content"); // GCC12 - make constexpr 76 static const std::string content("content"); // GCC12 - make constexpr
76 - auto p = QPDFParser( 77 + auto p = Parser(
77 input, 78 input,
78 std::move(sp_description), 79 std::move(sp_description),
79 content, 80 content,
@@ -93,7 +94,7 @@ QPDFParser::parse_content( @@ -93,7 +94,7 @@ QPDFParser::parse_content(
93 } 94 }
94 95
95 QPDFObjectHandle 96 QPDFObjectHandle
96 -QPDFParser::parse( 97 +Parser::parse(
97 InputSource& input, 98 InputSource& input,
98 std::string const& object_description, 99 std::string const& object_description,
99 QPDFTokenizer& tokenizer, 100 QPDFTokenizer& tokenizer,
@@ -103,7 +104,7 @@ QPDFParser::parse( @@ -103,7 +104,7 @@ QPDFParser::parse(
103 { 104 {
104 // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the 105 // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the
105 // only user of the 'empty' member. When removing this overload also remove 'empty'. 106 // only user of the 'empty' member. When removing this overload also remove 'empty'.
106 - auto p = QPDFParser( 107 + auto p = Parser(
107 input, 108 input,
108 make_description(input.getName(), object_description), 109 make_description(input.getName(), object_description),
109 object_description, 110 object_description,
@@ -120,7 +121,7 @@ QPDFParser::parse( @@ -120,7 +121,7 @@ QPDFParser::parse(
120 } 121 }
121 122
122 QPDFObjectHandle 123 QPDFObjectHandle
123 -QPDFParser::parse( 124 +Parser::parse(
124 InputSource& input, 125 InputSource& input,
125 std::string const& object_description, 126 std::string const& object_description,
126 qpdf::Tokenizer& tokenizer, 127 qpdf::Tokenizer& tokenizer,
@@ -128,7 +129,7 @@ QPDFParser::parse( @@ -128,7 +129,7 @@ QPDFParser::parse(
128 QPDF& context, 129 QPDF& context,
129 bool sanity_checks) 130 bool sanity_checks)
130 { 131 {
131 - return QPDFParser( 132 + return Parser(
132 input, 133 input,
133 make_description(input.getName(), object_description), 134 make_description(input.getName(), object_description),
134 object_description, 135 object_description,
@@ -143,10 +144,10 @@ QPDFParser::parse( @@ -143,10 +144,10 @@ QPDFParser::parse(
143 } 144 }
144 145
145 QPDFObjectHandle 146 QPDFObjectHandle
146 -QPDFParser::parse( 147 +Parser::parse(
147 is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) 148 is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
148 { 149 {
149 - return QPDFParser( 150 + return Parser(
150 input, 151 input,
151 std::make_shared<QPDFObject::Description>( 152 std::make_shared<QPDFObject::Description>(
152 QPDFObject::ObjStreamDescr(stream_id, obj_id)), 153 QPDFObject::ObjStreamDescr(stream_id, obj_id)),
@@ -161,7 +162,7 @@ QPDFParser::parse( @@ -161,7 +162,7 @@ QPDFParser::parse(
161 } 162 }
162 163
163 QPDFObjectHandle 164 QPDFObjectHandle
164 -QPDFParser::parse(bool content_stream) 165 +Parser::parse(bool content_stream)
165 { 166 {
166 try { 167 try {
167 return parse_first(content_stream); 168 return parse_first(content_stream);
@@ -178,7 +179,7 @@ QPDFParser::parse(bool content_stream) @@ -178,7 +179,7 @@ QPDFParser::parse(bool content_stream)
178 } 179 }
179 180
180 QPDFObjectHandle 181 QPDFObjectHandle
181 -QPDFParser::parse_first(bool content_stream) 182 +Parser::parse_first(bool content_stream)
182 { 183 {
183 // This method must take care not to resolve any objects. Don't check the type of any object 184 // This method must take care not to resolve any objects. Don't check the type of any object
184 // without first ensuring that it is a direct object. Otherwise, doing so may have the side 185 // without first ensuring that it is a direct object. Otherwise, doing so may have the side
@@ -279,7 +280,7 @@ QPDFParser::parse_first(bool content_stream) @@ -279,7 +280,7 @@ QPDFParser::parse_first(bool content_stream)
279 } 280 }
280 281
281 QPDFObjectHandle 282 QPDFObjectHandle
282 -QPDFParser::parse_remainder(bool content_stream) 283 +Parser::parse_remainder(bool content_stream)
283 { 284 {
284 // This method must take care not to resolve any objects. Don't check the type of any object 285 // This method must take care not to resolve any objects. Don't check the type of any object
285 // without first ensuring that it is a direct object. Otherwise, doing so may have the side 286 // without first ensuring that it is a direct object. Otherwise, doing so may have the side
@@ -312,7 +313,7 @@ QPDFParser::parse_remainder(bool content_stream) @@ -312,7 +313,7 @@ QPDFParser::parse_remainder(bool content_stream)
312 tokenizer_.getValue() == "R") { 313 tokenizer_.getValue() == "R") {
313 if (!context_) { 314 if (!context_) {
314 throw std::logic_error( 315 throw std::logic_error(
315 - "QPDFParser::parse called without context on an object with indirect " 316 + "Parser::parse called without context on an object with indirect "
316 "references"); 317 "references");
317 } 318 }
318 auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]); 319 auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]);
@@ -533,7 +534,7 @@ QPDFParser::parse_remainder(bool content_stream) @@ -533,7 +534,7 @@ QPDFParser::parse_remainder(bool content_stream)
533 } 534 }
534 535
535 void 536 void
536 -QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) 537 +Parser::add(std::shared_ptr<QPDFObject>&& obj)
537 { 538 {
538 if (frame_->state != st_dictionary_value) { 539 if (frame_->state != st_dictionary_value) {
539 // If state is st_dictionary_key then there is a missing key. Push onto olist for 540 // If state is st_dictionary_key then there is a missing key. Push onto olist for
@@ -548,7 +549,7 @@ QPDFParser::add(std::shared_ptr&lt;QPDFObject&gt;&amp;&amp; obj) @@ -548,7 +549,7 @@ QPDFParser::add(std::shared_ptr&lt;QPDFObject&gt;&amp;&amp; obj)
548 } 549 }
549 550
550 void 551 void
551 -QPDFParser::add_null() 552 +Parser::add_null()
552 { 553 {
553 const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>(); 554 const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>();
554 555
@@ -566,7 +567,7 @@ QPDFParser::add_null() @@ -566,7 +567,7 @@ QPDFParser::add_null()
566 } 567 }
567 568
568 void 569 void
569 -QPDFParser::add_bad_null(std::string const& msg) 570 +Parser::add_bad_null(std::string const& msg)
570 { 571 {
571 warn(msg); 572 warn(msg);
572 check_too_many_bad_tokens(); 573 check_too_many_bad_tokens();
@@ -574,7 +575,7 @@ QPDFParser::add_bad_null(std::string const&amp; msg) @@ -574,7 +575,7 @@ QPDFParser::add_bad_null(std::string const&amp; msg)
574 } 575 }
575 576
576 void 577 void
577 -QPDFParser::add_int(int count) 578 +Parser::add_int(int count)
578 { 579 {
579 auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]); 580 auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]);
580 obj->setDescription(context_, description_, last_offset_buffer_[count % 2]); 581 obj->setDescription(context_, description_, last_offset_buffer_[count % 2]);
@@ -583,7 +584,7 @@ QPDFParser::add_int(int count) @@ -583,7 +584,7 @@ QPDFParser::add_int(int count)
583 584
584 template <typename T, typename... Args> 585 template <typename T, typename... Args>
585 void 586 void
586 -QPDFParser::add_scalar(Args&&... args) 587 +Parser::add_scalar(Args&&... args)
587 { 588 {
588 auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_); 589 auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
589 if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) { 590 if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
@@ -599,7 +600,7 @@ QPDFParser::add_scalar(Args&amp;&amp;... args) @@ -599,7 +600,7 @@ QPDFParser::add_scalar(Args&amp;&amp;... args)
599 600
600 template <typename T, typename... Args> 601 template <typename T, typename... Args>
601 QPDFObjectHandle 602 QPDFObjectHandle
602 -QPDFParser::with_description(Args&&... args) 603 +Parser::with_description(Args&&... args)
603 { 604 {
604 auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); 605 auto obj = QPDFObject::create<T>(std::forward<Args>(args)...);
605 obj->setDescription(context_, description_, start_); 606 obj->setDescription(context_, description_, start_);
@@ -607,7 +608,7 @@ QPDFParser::with_description(Args&amp;&amp;... args) @@ -607,7 +608,7 @@ QPDFParser::with_description(Args&amp;&amp;... args)
607 } 608 }
608 609
609 void 610 void
610 -QPDFParser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset) 611 +Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset)
611 { 612 {
612 if (obj) { 613 if (obj) {
613 obj->setDescription(context_, description_, parsed_offset); 614 obj->setDescription(context_, description_, parsed_offset);
@@ -615,7 +616,7 @@ QPDFParser::set_description(ObjectPtr&amp; obj, qpdf_offset_t parsed_offset) @@ -615,7 +616,7 @@ QPDFParser::set_description(ObjectPtr&amp; obj, qpdf_offset_t parsed_offset)
615 } 616 }
616 617
617 void 618 void
618 -QPDFParser::fix_missing_keys() 619 +Parser::fix_missing_keys()
619 { 620 {
620 std::set<std::string> names; 621 std::set<std::string> names;
621 for (auto& obj: frame_->olist) { 622 for (auto& obj: frame_->olist) {
@@ -641,7 +642,7 @@ QPDFParser::fix_missing_keys() @@ -641,7 +642,7 @@ QPDFParser::fix_missing_keys()
641 } 642 }
642 643
643 void 644 void
644 -QPDFParser::check_too_many_bad_tokens() 645 +Parser::check_too_many_bad_tokens()
645 { 646 {
646 auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_); 647 auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_);
647 if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) { 648 if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) {
@@ -676,7 +677,7 @@ QPDFParser::check_too_many_bad_tokens() @@ -676,7 +677,7 @@ QPDFParser::check_too_many_bad_tokens()
676 } 677 }
677 678
678 void 679 void
679 -QPDFParser::limits_error(std::string const& limit, std::string const& msg) 680 +Parser::limits_error(std::string const& limit, std::string const& msg)
680 { 681 {
681 Limits::error(); 682 Limits::error();
682 warn("limits error("s + limit + "): " + msg); 683 warn("limits error("s + limit + "): " + msg);
@@ -684,7 +685,7 @@ QPDFParser::limits_error(std::string const&amp; limit, std::string const&amp; msg) @@ -684,7 +685,7 @@ QPDFParser::limits_error(std::string const&amp; limit, std::string const&amp; msg)
684 } 685 }
685 686
686 void 687 void
687 -QPDFParser::warn(QPDFExc const& e) const 688 +Parser::warn(QPDFExc const& e) const
688 { 689 {
689 // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the 690 // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
690 // object. If parsing for some other reason, such as an explicit creation of an object from a 691 // object. If parsing for some other reason, such as an explicit creation of an object from a
@@ -697,7 +698,7 @@ QPDFParser::warn(QPDFExc const&amp; e) const @@ -697,7 +698,7 @@ QPDFParser::warn(QPDFExc const&amp; e) const
697 } 698 }
698 699
699 void 700 void
700 -QPDFParser::warn_duplicate_key() 701 +Parser::warn_duplicate_key()
701 { 702 {
702 warn( 703 warn(
703 frame_->offset, 704 frame_->offset,
@@ -706,7 +707,7 @@ QPDFParser::warn_duplicate_key() @@ -706,7 +707,7 @@ QPDFParser::warn_duplicate_key()
706 } 707 }
707 708
708 void 709 void
709 -QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const 710 +Parser::warn(qpdf_offset_t offset, std::string const& msg) const
710 { 711 {
711 if (stream_id_) { 712 if (stream_id_) {
712 std::string descr = "object "s + std::to_string(obj_id_) + " 0"; 713 std::string descr = "object "s + std::to_string(obj_id_) + " 0";
@@ -718,7 +719,7 @@ QPDFParser::warn(qpdf_offset_t offset, std::string const&amp; msg) const @@ -718,7 +719,7 @@ QPDFParser::warn(qpdf_offset_t offset, std::string const&amp; msg) const
718 } 719 }
719 720
720 void 721 void
721 -QPDFParser::warn(std::string const& msg) const 722 +Parser::warn(std::string const& msg) const
722 { 723 {
723 warn(input_.getLastOffset(), msg); 724 warn(input_.getLastOffset(), msg);
724 } 725 }
libqpdf/QPDF_objects.cc
@@ -25,6 +25,7 @@ using namespace qpdf; @@ -25,6 +25,7 @@ using namespace qpdf;
25 using namespace std::literals; 25 using namespace std::literals;
26 26
27 using Objects = QPDF::Doc::Objects; 27 using Objects = QPDF::Doc::Objects;
  28 +using Parser = impl::Parser;
28 29
29 QPDFXRefEntry::QPDFXRefEntry() = default; 30 QPDFXRefEntry::QPDFXRefEntry() = default;
30 31
@@ -1287,7 +1288,7 @@ Objects::readTrailer() @@ -1287,7 +1288,7 @@ Objects::readTrailer()
1287 { 1288 {
1288 qpdf_offset_t offset = m->file->tell(); 1289 qpdf_offset_t offset = m->file->tell();
1289 auto object = 1290 auto object =
1290 - QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref); 1291 + Parser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref);
1291 if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) { 1292 if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) {
1292 warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); 1293 warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer"));
1293 } 1294 }
@@ -1304,7 +1305,7 @@ Objects::readObject(std::string const&amp; description, QPDFObjGen og) @@ -1304,7 +1305,7 @@ Objects::readObject(std::string const&amp; description, QPDFObjGen og)
1304 1305
1305 StringDecrypter decrypter{&qpdf, og}; 1306 StringDecrypter decrypter{&qpdf, og};
1306 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; 1307 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1307 - auto object = QPDFParser::parse( 1308 + auto object = Parser::parse(
1308 *m->file, 1309 *m->file,
1309 m->last_object_description, 1310 m->last_object_description,
1310 m->tokenizer, 1311 m->tokenizer,
@@ -1834,7 +1835,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) @@ -1834,7 +1835,7 @@ Objects::resolveObjectsInStream(int obj_stream_number)
1834 if (entry != m->xref_table.end() && entry->second.getType() == 2 && 1835 if (entry != m->xref_table.end() && entry->second.getType() == 2 &&
1835 entry->second.getObjStreamNumber() == obj_stream_number) { 1836 entry->second.getObjStreamNumber() == obj_stream_number) {
1836 is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); 1837 is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset);
1837 - if (auto oh = QPDFParser::parse(in, obj_stream_number, obj_id, m->tokenizer, qpdf)) { 1838 + if (auto oh = Parser::parse(in, obj_stream_number, obj_id, m->tokenizer, qpdf)) {
1838 updateCache(og, oh.obj_sp(), end_before_space, end_after_space); 1839 updateCache(og, oh.obj_sp(), end_before_space, end_after_space);
1839 } 1840 }
1840 } else { 1841 } else {
libqpdf/qpdf/QPDFParser.hh
@@ -13,153 +13,157 @@ @@ -13,153 +13,157 @@
13 using namespace qpdf; 13 using namespace qpdf;
14 using namespace qpdf::global; 14 using namespace qpdf::global;
15 15
16 -class QPDFParser 16 +namespace qpdf::impl
17 { 17 {
18 - public:  
19 - class Error: public std::exception 18 + class Parser
20 { 19 {
21 public: 20 public:
22 - Error() = default;  
23 - virtual ~Error() noexcept = default;  
24 - };  
25 -  
26 - static QPDFObjectHandle  
27 - parse(InputSource& input, std::string const& object_description, QPDF* context);  
28 -  
29 - static QPDFObjectHandle parse_content(  
30 - InputSource& input,  
31 - std::shared_ptr<QPDFObject::Description> sp_description,  
32 - qpdf::Tokenizer& tokenizer,  
33 - QPDF* context);  
34 -  
35 - // For use by deprecated QPDFObjectHandle::parse.  
36 - static QPDFObjectHandle parse(  
37 - InputSource& input,  
38 - std::string const& object_description,  
39 - QPDFTokenizer& tokenizer,  
40 - bool& empty,  
41 - QPDFObjectHandle::StringDecrypter* decrypter,  
42 - QPDF* context);  
43 -  
44 - // For use by QPDF.  
45 - static QPDFObjectHandle parse(  
46 - InputSource& input,  
47 - std::string const& object_description,  
48 - qpdf::Tokenizer& tokenizer,  
49 - QPDFObjectHandle::StringDecrypter* decrypter,  
50 - QPDF& context,  
51 - bool sanity_checks);  
52 -  
53 - static QPDFObjectHandle parse(  
54 - qpdf::is::OffsetBuffer& input,  
55 - int stream_id,  
56 - int obj_id,  
57 - qpdf::Tokenizer& tokenizer,  
58 - QPDF& context);  
59 -  
60 - static std::shared_ptr<QPDFObject::Description>  
61 - make_description(std::string const& input_name, std::string const& object_description)  
62 - {  
63 - using namespace std::literals;  
64 - return std::make_shared<QPDFObject::Description>(  
65 - input_name + ", " + object_description + " at offset $PO");  
66 - }  
67 -  
68 - private:  
69 - QPDFParser(  
70 - InputSource& input,  
71 - std::shared_ptr<QPDFObject::Description> sp_description,  
72 - std::string const& object_description,  
73 - qpdf::Tokenizer& tokenizer,  
74 - QPDFObjectHandle::StringDecrypter* decrypter,  
75 - QPDF* context,  
76 - bool parse_pdf,  
77 - int stream_id = 0,  
78 - int obj_id = 0,  
79 - bool sanity_checks = false) :  
80 - input_(input),  
81 - object_description_(object_description),  
82 - tokenizer_(tokenizer),  
83 - decrypter_(decrypter),  
84 - context_(context),  
85 - description_(std::move(sp_description)),  
86 - parse_pdf_(parse_pdf),  
87 - stream_id_(stream_id),  
88 - obj_id_(obj_id),  
89 - sanity_checks_(sanity_checks)  
90 - {  
91 - }  
92 -  
93 - // Parser state. Note:  
94 - // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)  
95 - enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; 21 + class Error: public std::exception
  22 + {
  23 + public:
  24 + Error() = default;
  25 + virtual ~Error() noexcept = default;
  26 + };
  27 +
  28 + static QPDFObjectHandle
  29 + parse(InputSource& input, std::string const& object_description, QPDF* context);
  30 +
  31 + static QPDFObjectHandle parse_content(
  32 + InputSource& input,
  33 + std::shared_ptr<QPDFObject::Description> sp_description,
  34 + qpdf::Tokenizer& tokenizer,
  35 + QPDF* context);
  36 +
  37 + // For use by deprecated QPDFObjectHandle::parse.
  38 + static QPDFObjectHandle parse(
  39 + InputSource& input,
  40 + std::string const& object_description,
  41 + QPDFTokenizer& tokenizer,
  42 + bool& empty,
  43 + QPDFObjectHandle::StringDecrypter* decrypter,
  44 + QPDF* context);
  45 +
  46 + // For use by QPDF.
  47 + static QPDFObjectHandle parse(
  48 + InputSource& input,
  49 + std::string const& object_description,
  50 + qpdf::Tokenizer& tokenizer,
  51 + QPDFObjectHandle::StringDecrypter* decrypter,
  52 + QPDF& context,
  53 + bool sanity_checks);
  54 +
  55 + static QPDFObjectHandle parse(
  56 + qpdf::is::OffsetBuffer& input,
  57 + int stream_id,
  58 + int obj_id,
  59 + qpdf::Tokenizer& tokenizer,
  60 + QPDF& context);
  61 +
  62 + static std::shared_ptr<QPDFObject::Description>
  63 + make_description(std::string const& input_name, std::string const& object_description)
  64 + {
  65 + using namespace std::literals;
  66 + return std::make_shared<QPDFObject::Description>(
  67 + input_name + ", " + object_description + " at offset $PO");
  68 + }
96 69
97 - struct StackFrame  
98 - {  
99 - StackFrame(InputSource& input, parser_state_e state) :  
100 - state(state),  
101 - offset(input.tell()) 70 + private:
  71 + Parser(
  72 + InputSource& input,
  73 + std::shared_ptr<QPDFObject::Description> sp_description,
  74 + std::string const& object_description,
  75 + qpdf::Tokenizer& tokenizer,
  76 + QPDFObjectHandle::StringDecrypter* decrypter,
  77 + QPDF* context,
  78 + bool parse_pdf,
  79 + int stream_id = 0,
  80 + int obj_id = 0,
  81 + bool sanity_checks = false) :
  82 + input_(input),
  83 + object_description_(object_description),
  84 + tokenizer_(tokenizer),
  85 + decrypter_(decrypter),
  86 + context_(context),
  87 + description_(std::move(sp_description)),
  88 + parse_pdf_(parse_pdf),
  89 + stream_id_(stream_id),
  90 + obj_id_(obj_id),
  91 + sanity_checks_(sanity_checks)
102 { 92 {
103 } 93 }
104 94
105 - std::vector<QPDFObjectHandle> olist;  
106 - std::map<std::string, QPDFObjectHandle> dict;  
107 - parser_state_e state;  
108 - std::string key;  
109 - qpdf_offset_t offset;  
110 - std::string contents_string;  
111 - qpdf_offset_t contents_offset{-1};  
112 - int null_count{0};  
113 - }; 95 + // Parser state. Note:
  96 + // state <= st_dictionary_value == (state = st_dictionary_key || state =
  97 + // st_dictionary_value)
  98 + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
114 99
115 - QPDFObjectHandle parse(bool content_stream = false);  
116 - QPDFObjectHandle parse_first(bool content_stream);  
117 - QPDFObjectHandle parse_remainder(bool content_stream);  
118 - void add(std::shared_ptr<QPDFObject>&& obj);  
119 - void add_null();  
120 - void add_bad_null(std::string const& msg);  
121 - void add_int(int count);  
122 - template <typename T, typename... Args>  
123 - void add_scalar(Args&&... args);  
124 - void check_too_many_bad_tokens();  
125 - void warn_duplicate_key();  
126 - void fix_missing_keys();  
127 - [[noreturn]] void limits_error(std::string const& limit, std::string const& msg);  
128 - void warn(qpdf_offset_t offset, std::string const& msg) const;  
129 - void warn(std::string const& msg) const;  
130 - void warn(QPDFExc const&) const;  
131 - template <typename T, typename... Args>  
132 - // Create a new scalar object complete with parsed offset and description.  
133 - // NB the offset includes any leading whitespace.  
134 - QPDFObjectHandle with_description(Args&&... args);  
135 - void set_description(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset);  
136 - InputSource& input_;  
137 - std::string const& object_description_;  
138 - qpdf::Tokenizer& tokenizer_;  
139 - QPDFObjectHandle::StringDecrypter* decrypter_;  
140 - QPDF* context_;  
141 - std::shared_ptr<QPDFObject::Description> description_;  
142 - bool parse_pdf_{false};  
143 - int stream_id_{0};  
144 - int obj_id_{0};  
145 - bool sanity_checks_{false};  
146 -  
147 - std::vector<StackFrame> stack_;  
148 - StackFrame* frame_{nullptr};  
149 - // Number of recent bad tokens. This will always be > 0 once a bad token has been encountered as  
150 - // it only gets incremented or reset when a bad token is encountered.  
151 - int bad_count_{0};  
152 - // Number of bad tokens (remaining) before giving up.  
153 - uint32_t max_bad_count_{Limits::parser_max_errors()};  
154 - // Number of good tokens since last bad token. Irrelevant if bad_count == 0.  
155 - int good_count_{0};  
156 - // Start offset including any leading whitespace.  
157 - qpdf_offset_t start_{0};  
158 - // Number of successive integer tokens.  
159 - int int_count_{0};  
160 - long long int_buffer_[2]{0, 0};  
161 - qpdf_offset_t last_offset_buffer_[2]{0, 0};  
162 - bool empty_{false};  
163 -}; 100 + struct StackFrame
  101 + {
  102 + StackFrame(InputSource& input, parser_state_e state) :
  103 + state(state),
  104 + offset(input.tell())
  105 + {
  106 + }
  107 +
  108 + std::vector<QPDFObjectHandle> olist;
  109 + std::map<std::string, QPDFObjectHandle> dict;
  110 + parser_state_e state;
  111 + std::string key;
  112 + qpdf_offset_t offset;
  113 + std::string contents_string;
  114 + qpdf_offset_t contents_offset{-1};
  115 + int null_count{0};
  116 + };
  117 +
  118 + QPDFObjectHandle parse(bool content_stream = false);
  119 + QPDFObjectHandle parse_first(bool content_stream);
  120 + QPDFObjectHandle parse_remainder(bool content_stream);
  121 + void add(std::shared_ptr<QPDFObject>&& obj);
  122 + void add_null();
  123 + void add_bad_null(std::string const& msg);
  124 + void add_int(int count);
  125 + template <typename T, typename... Args>
  126 + void add_scalar(Args&&... args);
  127 + void check_too_many_bad_tokens();
  128 + void warn_duplicate_key();
  129 + void fix_missing_keys();
  130 + [[noreturn]] void limits_error(std::string const& limit, std::string const& msg);
  131 + void warn(qpdf_offset_t offset, std::string const& msg) const;
  132 + void warn(std::string const& msg) const;
  133 + void warn(QPDFExc const&) const;
  134 + template <typename T, typename... Args>
  135 + // Create a new scalar object complete with parsed offset and description.
  136 + // NB the offset includes any leading whitespace.
  137 + QPDFObjectHandle with_description(Args&&... args);
  138 + void set_description(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset);
  139 + InputSource& input_;
  140 + std::string const& object_description_;
  141 + qpdf::Tokenizer& tokenizer_;
  142 + QPDFObjectHandle::StringDecrypter* decrypter_;
  143 + QPDF* context_;
  144 + std::shared_ptr<QPDFObject::Description> description_;
  145 + bool parse_pdf_{false};
  146 + int stream_id_{0};
  147 + int obj_id_{0};
  148 + bool sanity_checks_{false};
  149 +
  150 + std::vector<StackFrame> stack_;
  151 + StackFrame* frame_{nullptr};
  152 + // Number of recent bad tokens. This will always be > 0 once a bad token has been
  153 + // encountered as it only gets incremented or reset when a bad token is encountered.
  154 + int bad_count_{0};
  155 + // Number of bad tokens (remaining) before giving up.
  156 + uint32_t max_bad_count_{Limits::parser_max_errors()};
  157 + // Number of good tokens since last bad token. Irrelevant if bad_count == 0.
  158 + int good_count_{0};
  159 + // Start offset including any leading whitespace.
  160 + qpdf_offset_t start_{0};
  161 + // Number of successive integer tokens.
  162 + int int_count_{0};
  163 + long long int_buffer_[2]{0, 0};
  164 + qpdf_offset_t last_offset_buffer_[2]{0, 0};
  165 + bool empty_{false};
  166 + };
  167 +} // namespace qpdf::impl
164 168
165 #endif // QPDFPARSER_HH 169 #endif // QPDFPARSER_HH
qpdf/qtest/qpdf/parse-object.out
1 [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] 1 [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
2 -logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references 2 +logic error parsing indirect: Parser::parse called without context on an object with indirect references
3 trailing data: parsed object (trailing test): trailing data found parsing object from string 3 trailing data: parsed object (trailing test): trailing data found parsing object from string
4 WARNING: parsed object (offset 9): unknown token while reading object; treating as string 4 WARNING: parsed object (offset 9): unknown token while reading object; treating as string
5 WARNING: parsed object: treating unexpected brace token as null 5 WARNING: parsed object: treating unexpected brace token as null