Commit 51932fd91bca1cf2a155ac5c376a13dbd71da546
Committed by
GitHub
Merge pull request #1651 from m-holger/parser
Refactor QPDFParser
Showing
7 changed files
with
461 additions
and
325 deletions
include/qpdf/QPDFObjectHandle.hh
| @@ -61,11 +61,14 @@ class QPDFTokenizer; | @@ -61,11 +61,14 @@ class QPDFTokenizer; | ||
| 61 | class QPDFExc; | 61 | class QPDFExc; |
| 62 | class Pl_QPDFTokenizer; | 62 | class Pl_QPDFTokenizer; |
| 63 | class QPDFMatrix; | 63 | class QPDFMatrix; |
| 64 | -class QPDFParser; | 64 | +namespace qpdf::impl |
| 65 | +{ | ||
| 66 | + class Parser; | ||
| 67 | +} | ||
| 65 | 68 | ||
| 66 | class QPDFObjectHandle: public qpdf::BaseHandle | 69 | class QPDFObjectHandle: public qpdf::BaseHandle |
| 67 | { | 70 | { |
| 68 | - friend class QPDFParser; | 71 | + friend class qpdf::impl::Parser; |
| 69 | 72 | ||
| 70 | public: | 73 | public: |
| 71 | // This class is used by replaceStreamData. It provides an alternative way of associating | 74 | // This class is used by replaceStreamData. It provides an alternative way of associating |
include/qpdf/QPDFTokenizer.hh
| @@ -31,6 +31,10 @@ | @@ -31,6 +31,10 @@ | ||
| 31 | namespace qpdf | 31 | namespace qpdf |
| 32 | { | 32 | { |
| 33 | class Tokenizer; | 33 | class Tokenizer; |
| 34 | + namespace impl | ||
| 35 | + { | ||
| 36 | + class Parser; | ||
| 37 | + } | ||
| 34 | } // namespace qpdf | 38 | } // namespace qpdf |
| 35 | 39 | ||
| 36 | class QPDFTokenizer | 40 | class QPDFTokenizer |
| @@ -203,7 +207,7 @@ class QPDFTokenizer | @@ -203,7 +207,7 @@ class QPDFTokenizer | ||
| 203 | void expectInlineImage(InputSource& input); | 207 | void expectInlineImage(InputSource& input); |
| 204 | 208 | ||
| 205 | private: | 209 | private: |
| 206 | - friend class QPDFParser; | 210 | + friend class qpdf::impl::Parser; |
| 207 | 211 | ||
| 208 | QPDFTokenizer(QPDFTokenizer const&) = delete; | 212 | QPDFTokenizer(QPDFTokenizer const&) = delete; |
| 209 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; | 213 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; |
libqpdf/QPDFObjectHandle.cc
| @@ -25,6 +25,8 @@ | @@ -25,6 +25,8 @@ | ||
| 25 | using namespace std::literals; | 25 | using namespace std::literals; |
| 26 | using namespace qpdf; | 26 | using namespace qpdf; |
| 27 | 27 | ||
| 28 | +using Parser = impl::Parser; | ||
| 29 | + | ||
| 28 | const Null Null::temp_; | 30 | const Null Null::temp_; |
| 29 | 31 | ||
| 30 | BaseHandle:: | 32 | BaseHandle:: |
| @@ -1540,7 +1542,7 @@ QPDFObjectHandle::parse( | @@ -1540,7 +1542,7 @@ QPDFObjectHandle::parse( | ||
| 1540 | QPDF* context, std::string const& object_str, std::string const& object_description) | 1542 | QPDF* context, std::string const& object_str, std::string const& object_description) |
| 1541 | { | 1543 | { |
| 1542 | auto input = is::OffsetBuffer("parsed object", object_str); | 1544 | auto input = is::OffsetBuffer("parsed object", object_str); |
| 1543 | - auto result = QPDFParser::parse(input, object_description, context); | 1545 | + auto result = Parser::parse(input, object_description, context); |
| 1544 | size_t offset = QIntC::to_size(input.tell()); | 1546 | size_t offset = QIntC::to_size(input.tell()); |
| 1545 | while (offset < object_str.length()) { | 1547 | while (offset < object_str.length()) { |
| 1546 | if (!isspace(object_str.at(offset))) { | 1548 | if (!isspace(object_str.at(offset))) { |
| @@ -1661,7 +1663,7 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1661,7 +1663,7 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1661 | auto input = is::OffsetBuffer(description, stream_data); | 1663 | auto input = is::OffsetBuffer(description, stream_data); |
| 1662 | Tokenizer tokenizer; | 1664 | Tokenizer tokenizer; |
| 1663 | tokenizer.allowEOF(); | 1665 | tokenizer.allowEOF(); |
| 1664 | - auto sp_description = QPDFParser::make_description(description, "content"); | 1666 | + auto sp_description = Parser::make_description(description, "content"); |
| 1665 | while (QIntC::to_size(input.tell()) < stream_length) { | 1667 | while (QIntC::to_size(input.tell()) < stream_length) { |
| 1666 | // Read a token and seek to the beginning. The offset we get from this process is the | 1668 | // Read a token and seek to the beginning. The offset we get from this process is the |
| 1667 | // beginning of the next non-ignorable (space, comment) token. This way, the offset and | 1669 | // beginning of the next non-ignorable (space, comment) token. This way, the offset and |
| @@ -1669,7 +1671,7 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1669,7 +1671,7 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1669 | tokenizer.nextToken(input, "content", true); | 1671 | tokenizer.nextToken(input, "content", true); |
| 1670 | qpdf_offset_t offset = input.getLastOffset(); | 1672 | qpdf_offset_t offset = input.getLastOffset(); |
| 1671 | input.seek(offset, SEEK_SET); | 1673 | input.seek(offset, SEEK_SET); |
| 1672 | - auto obj = QPDFParser::parse_content(input, sp_description, tokenizer, context); | 1674 | + auto obj = Parser::parse_content(input, sp_description, tokenizer, context); |
| 1673 | if (!obj) { | 1675 | if (!obj) { |
| 1674 | // EOF | 1676 | // EOF |
| 1675 | break; | 1677 | break; |
| @@ -1678,7 +1680,7 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1678,7 +1680,7 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1678 | if (callbacks) { | 1680 | if (callbacks) { |
| 1679 | callbacks->handleObject(obj, QIntC::to_size(offset), length); | 1681 | callbacks->handleObject(obj, QIntC::to_size(offset), length); |
| 1680 | } | 1682 | } |
| 1681 | - if (obj.isOperator() && (obj.getOperatorValue() == "ID")) { | 1683 | + if (obj.isOperator() && obj.getOperatorValue() == "ID") { |
| 1682 | // Discard next character; it is the space after ID that terminated the token. Read | 1684 | // Discard next character; it is the space after ID that terminated the token. Read |
| 1683 | // until end of inline image. | 1685 | // until end of inline image. |
| 1684 | char ch; | 1686 | char ch; |
| @@ -1731,7 +1733,7 @@ QPDFObjectHandle::parse( | @@ -1731,7 +1733,7 @@ QPDFObjectHandle::parse( | ||
| 1731 | StringDecrypter* decrypter, | 1733 | StringDecrypter* decrypter, |
| 1732 | QPDF* context) | 1734 | QPDF* context) |
| 1733 | { | 1735 | { |
| 1734 | - return QPDFParser::parse(*input, object_description, tokenizer, empty, decrypter, context); | 1736 | + return Parser::parse(*input, object_description, tokenizer, empty, decrypter, context); |
| 1735 | } | 1737 | } |
| 1736 | 1738 | ||
| 1737 | qpdf_offset_t | 1739 | qpdf_offset_t |
libqpdf/QPDFParser.cc
| @@ -46,12 +46,13 @@ class QPDF::Doc::ParseGuard | @@ -46,12 +46,13 @@ class QPDF::Doc::ParseGuard | ||
| 46 | }; | 46 | }; |
| 47 | 47 | ||
| 48 | using ParseGuard = QPDF::Doc::ParseGuard; | 48 | using ParseGuard = QPDF::Doc::ParseGuard; |
| 49 | +using Parser = qpdf::impl::Parser; | ||
| 49 | 50 | ||
| 50 | QPDFObjectHandle | 51 | QPDFObjectHandle |
| 51 | -QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) | 52 | +Parser::parse(InputSource& input, std::string const& object_description, QPDF* context) |
| 52 | { | 53 | { |
| 53 | qpdf::Tokenizer tokenizer; | 54 | qpdf::Tokenizer tokenizer; |
| 54 | - if (auto result = QPDFParser( | 55 | + if (auto result = Parser( |
| 55 | input, | 56 | input, |
| 56 | make_description(input.getName(), object_description), | 57 | make_description(input.getName(), object_description), |
| 57 | object_description, | 58 | object_description, |
| @@ -66,14 +67,14 @@ QPDFParser::parse(InputSource& input, std::string const& object_description, QPD | @@ -66,14 +67,14 @@ QPDFParser::parse(InputSource& input, std::string const& object_description, QPD | ||
| 66 | } | 67 | } |
| 67 | 68 | ||
| 68 | QPDFObjectHandle | 69 | QPDFObjectHandle |
| 69 | -QPDFParser::parse_content( | 70 | +Parser::parse_content( |
| 70 | InputSource& input, | 71 | InputSource& input, |
| 71 | std::shared_ptr<QPDFObject::Description> sp_description, | 72 | std::shared_ptr<QPDFObject::Description> sp_description, |
| 72 | qpdf::Tokenizer& tokenizer, | 73 | qpdf::Tokenizer& tokenizer, |
| 73 | QPDF* context) | 74 | QPDF* context) |
| 74 | { | 75 | { |
| 75 | static const std::string content("content"); // GCC12 - make constexpr | 76 | static const std::string content("content"); // GCC12 - make constexpr |
| 76 | - auto p = QPDFParser( | 77 | + auto p = Parser( |
| 77 | input, | 78 | input, |
| 78 | std::move(sp_description), | 79 | std::move(sp_description), |
| 79 | content, | 80 | content, |
| @@ -93,7 +94,7 @@ QPDFParser::parse_content( | @@ -93,7 +94,7 @@ QPDFParser::parse_content( | ||
| 93 | } | 94 | } |
| 94 | 95 | ||
| 95 | QPDFObjectHandle | 96 | QPDFObjectHandle |
| 96 | -QPDFParser::parse( | 97 | +Parser::parse( |
| 97 | InputSource& input, | 98 | InputSource& input, |
| 98 | std::string const& object_description, | 99 | std::string const& object_description, |
| 99 | QPDFTokenizer& tokenizer, | 100 | QPDFTokenizer& tokenizer, |
| @@ -103,7 +104,7 @@ QPDFParser::parse( | @@ -103,7 +104,7 @@ QPDFParser::parse( | ||
| 103 | { | 104 | { |
| 104 | // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the | 105 | // ABI: This parse overload is only used by the deprecated QPDFObjectHandle::parse. It is the |
| 105 | // only user of the 'empty' member. When removing this overload also remove 'empty'. | 106 | // only user of the 'empty' member. When removing this overload also remove 'empty'. |
| 106 | - auto p = QPDFParser( | 107 | + auto p = Parser( |
| 107 | input, | 108 | input, |
| 108 | make_description(input.getName(), object_description), | 109 | make_description(input.getName(), object_description), |
| 109 | object_description, | 110 | object_description, |
| @@ -120,7 +121,7 @@ QPDFParser::parse( | @@ -120,7 +121,7 @@ QPDFParser::parse( | ||
| 120 | } | 121 | } |
| 121 | 122 | ||
| 122 | QPDFObjectHandle | 123 | QPDFObjectHandle |
| 123 | -QPDFParser::parse( | 124 | +Parser::parse( |
| 124 | InputSource& input, | 125 | InputSource& input, |
| 125 | std::string const& object_description, | 126 | std::string const& object_description, |
| 126 | qpdf::Tokenizer& tokenizer, | 127 | qpdf::Tokenizer& tokenizer, |
| @@ -128,7 +129,7 @@ QPDFParser::parse( | @@ -128,7 +129,7 @@ QPDFParser::parse( | ||
| 128 | QPDF& context, | 129 | QPDF& context, |
| 129 | bool sanity_checks) | 130 | bool sanity_checks) |
| 130 | { | 131 | { |
| 131 | - return QPDFParser( | 132 | + return Parser( |
| 132 | input, | 133 | input, |
| 133 | make_description(input.getName(), object_description), | 134 | make_description(input.getName(), object_description), |
| 134 | object_description, | 135 | object_description, |
| @@ -143,10 +144,10 @@ QPDFParser::parse( | @@ -143,10 +144,10 @@ QPDFParser::parse( | ||
| 143 | } | 144 | } |
| 144 | 145 | ||
| 145 | QPDFObjectHandle | 146 | QPDFObjectHandle |
| 146 | -QPDFParser::parse( | 147 | +Parser::parse( |
| 147 | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) | 148 | is::OffsetBuffer& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) |
| 148 | { | 149 | { |
| 149 | - return QPDFParser( | 150 | + return Parser( |
| 150 | input, | 151 | input, |
| 151 | std::make_shared<QPDFObject::Description>( | 152 | std::make_shared<QPDFObject::Description>( |
| 152 | QPDFObject::ObjStreamDescr(stream_id, obj_id)), | 153 | QPDFObject::ObjStreamDescr(stream_id, obj_id)), |
| @@ -161,7 +162,7 @@ QPDFParser::parse( | @@ -161,7 +162,7 @@ QPDFParser::parse( | ||
| 161 | } | 162 | } |
| 162 | 163 | ||
| 163 | QPDFObjectHandle | 164 | QPDFObjectHandle |
| 164 | -QPDFParser::parse(bool content_stream) | 165 | +Parser::parse(bool content_stream) |
| 165 | { | 166 | { |
| 166 | try { | 167 | try { |
| 167 | return parse_first(content_stream); | 168 | return parse_first(content_stream); |
| @@ -178,20 +179,20 @@ QPDFParser::parse(bool content_stream) | @@ -178,20 +179,20 @@ QPDFParser::parse(bool content_stream) | ||
| 178 | } | 179 | } |
| 179 | 180 | ||
| 180 | QPDFObjectHandle | 181 | QPDFObjectHandle |
| 181 | -QPDFParser::parse_first(bool content_stream) | 182 | +Parser::parse_first(bool content_stream) |
| 182 | { | 183 | { |
| 183 | // This method must take care not to resolve any objects. Don't check the type of any object | 184 | // This method must take care not to resolve any objects. Don't check the type of any object |
| 184 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side | 185 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
| 185 | // effect of reading the object and changing the file pointer. If you do this, it will cause a | 186 | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
| 186 | // logic error to be thrown from QPDF::inParse(). | 187 | // logic error to be thrown from QPDF::inParse(). |
| 187 | 188 | ||
| 188 | - QPDF::Doc::ParseGuard pg(context); | ||
| 189 | - start = input.tell(); | ||
| 190 | - if (!tokenizer.nextToken(input, object_description)) { | ||
| 191 | - warn(tokenizer.getErrorMessage()); | 189 | + QPDF::Doc::ParseGuard pg(context_); |
| 190 | + start_ = input_.tell(); | ||
| 191 | + if (!tokenizer_.nextToken(input_, object_description_)) { | ||
| 192 | + warn(tokenizer_.getErrorMessage()); | ||
| 192 | } | 193 | } |
| 193 | 194 | ||
| 194 | - switch (tokenizer.getType()) { | 195 | + switch (tokenizer_.getType()) { |
| 195 | case QPDFTokenizer::tt_eof: | 196 | case QPDFTokenizer::tt_eof: |
| 196 | if (content_stream) { | 197 | if (content_stream) { |
| 197 | // In content stream mode, leave object uninitialized to indicate EOF | 198 | // In content stream mode, leave object uninitialized to indicate EOF |
| @@ -219,57 +220,57 @@ QPDFParser::parse_first(bool content_stream) | @@ -219,57 +220,57 @@ QPDFParser::parse_first(bool content_stream) | ||
| 219 | 220 | ||
| 220 | case QPDFTokenizer::tt_array_open: | 221 | case QPDFTokenizer::tt_array_open: |
| 221 | case QPDFTokenizer::tt_dict_open: | 222 | case QPDFTokenizer::tt_dict_open: |
| 222 | - stack.clear(); | ||
| 223 | - stack.emplace_back( | ||
| 224 | - input, | ||
| 225 | - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); | ||
| 226 | - frame = &stack.back(); | ||
| 227 | - return parseRemainder(content_stream); | 223 | + stack_.clear(); |
| 224 | + stack_.emplace_back( | ||
| 225 | + input_, | ||
| 226 | + (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); | ||
| 227 | + frame_ = &stack_.back(); | ||
| 228 | + return parse_remainder(content_stream); | ||
| 228 | 229 | ||
| 229 | case QPDFTokenizer::tt_bool: | 230 | case QPDFTokenizer::tt_bool: |
| 230 | - return withDescription<QPDF_Bool>(tokenizer.getValue() == "true"); | 231 | + return with_description<QPDF_Bool>(tokenizer_.getValue() == "true"); |
| 231 | 232 | ||
| 232 | case QPDFTokenizer::tt_null: | 233 | case QPDFTokenizer::tt_null: |
| 233 | return {QPDFObject::create<QPDF_Null>()}; | 234 | return {QPDFObject::create<QPDF_Null>()}; |
| 234 | 235 | ||
| 235 | case QPDFTokenizer::tt_integer: | 236 | case QPDFTokenizer::tt_integer: |
| 236 | - return withDescription<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | 237 | + return with_description<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str())); |
| 237 | 238 | ||
| 238 | case QPDFTokenizer::tt_real: | 239 | case QPDFTokenizer::tt_real: |
| 239 | - return withDescription<QPDF_Real>(tokenizer.getValue()); | 240 | + return with_description<QPDF_Real>(tokenizer_.getValue()); |
| 240 | 241 | ||
| 241 | case QPDFTokenizer::tt_name: | 242 | case QPDFTokenizer::tt_name: |
| 242 | - return withDescription<QPDF_Name>(tokenizer.getValue()); | 243 | + return with_description<QPDF_Name>(tokenizer_.getValue()); |
| 243 | 244 | ||
| 244 | case QPDFTokenizer::tt_word: | 245 | case QPDFTokenizer::tt_word: |
| 245 | { | 246 | { |
| 246 | - auto const& value = tokenizer.getValue(); | 247 | + auto const& value = tokenizer_.getValue(); |
| 247 | if (content_stream) { | 248 | if (content_stream) { |
| 248 | - return withDescription<QPDF_Operator>(value); | 249 | + return with_description<QPDF_Operator>(value); |
| 249 | } else if (value == "endobj") { | 250 | } else if (value == "endobj") { |
| 250 | // We just saw endobj without having read anything. Nothing in the PDF spec appears | 251 | // We just saw endobj without having read anything. Nothing in the PDF spec appears |
| 251 | // to allow empty objects, but they have been encountered in actual PDF files and | 252 | // to allow empty objects, but they have been encountered in actual PDF files and |
| 252 | // Adobe Reader appears to ignore them. Treat this as a null and do not move the | 253 | // Adobe Reader appears to ignore them. Treat this as a null and do not move the |
| 253 | // input source's offset. | 254 | // input source's offset. |
| 254 | empty_ = true; | 255 | empty_ = true; |
| 255 | - input.seek(input.getLastOffset(), SEEK_SET); | 256 | + input_.seek(input_.getLastOffset(), SEEK_SET); |
| 256 | if (!content_stream) { | 257 | if (!content_stream) { |
| 257 | warn("empty object treated as null"); | 258 | warn("empty object treated as null"); |
| 258 | } | 259 | } |
| 259 | return {}; | 260 | return {}; |
| 260 | } else { | 261 | } else { |
| 261 | warn("unknown token while reading object; treating as string"); | 262 | warn("unknown token while reading object; treating as string"); |
| 262 | - return withDescription<QPDF_String>(value); | 263 | + return with_description<QPDF_String>(value); |
| 263 | } | 264 | } |
| 264 | } | 265 | } |
| 265 | 266 | ||
| 266 | case QPDFTokenizer::tt_string: | 267 | case QPDFTokenizer::tt_string: |
| 267 | - if (decrypter) { | ||
| 268 | - std::string s{tokenizer.getValue()}; | ||
| 269 | - decrypter->decryptString(s); | ||
| 270 | - return withDescription<QPDF_String>(s); | 268 | + if (decrypter_) { |
| 269 | + std::string s{tokenizer_.getValue()}; | ||
| 270 | + decrypter_->decryptString(s); | ||
| 271 | + return with_description<QPDF_String>(s); | ||
| 271 | } else { | 272 | } else { |
| 272 | - return withDescription<QPDF_String>(tokenizer.getValue()); | 273 | + return with_description<QPDF_String>(tokenizer_.getValue()); |
| 273 | } | 274 | } |
| 274 | 275 | ||
| 275 | default: | 276 | default: |
| @@ -279,65 +280,65 @@ QPDFParser::parse_first(bool content_stream) | @@ -279,65 +280,65 @@ QPDFParser::parse_first(bool content_stream) | ||
| 279 | } | 280 | } |
| 280 | 281 | ||
| 281 | QPDFObjectHandle | 282 | QPDFObjectHandle |
| 282 | -QPDFParser::parseRemainder(bool content_stream) | 283 | +Parser::parse_remainder(bool content_stream) |
| 283 | { | 284 | { |
| 284 | // This method must take care not to resolve any objects. Don't check the type of any object | 285 | // This method must take care not to resolve any objects. Don't check the type of any object |
| 285 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side | 286 | // without first ensuring that it is a direct object. Otherwise, doing so may have the side |
| 286 | // effect of reading the object and changing the file pointer. If you do this, it will cause a | 287 | // effect of reading the object and changing the file pointer. If you do this, it will cause a |
| 287 | // logic error to be thrown from QPDF::inParse(). | 288 | // logic error to be thrown from QPDF::inParse(). |
| 288 | 289 | ||
| 289 | - bad_count = 0; | 290 | + bad_count_ = 0; |
| 290 | bool b_contents = false; | 291 | bool b_contents = false; |
| 291 | 292 | ||
| 292 | while (true) { | 293 | while (true) { |
| 293 | - if (!tokenizer.nextToken(input, object_description)) { | ||
| 294 | - warn(tokenizer.getErrorMessage()); | 294 | + if (!tokenizer_.nextToken(input_, object_description_)) { |
| 295 | + warn(tokenizer_.getErrorMessage()); | ||
| 295 | } | 296 | } |
| 296 | - ++good_count; // optimistically | 297 | + ++good_count_; // optimistically |
| 297 | 298 | ||
| 298 | - if (int_count != 0) { | 299 | + if (int_count_ != 0) { |
| 299 | // Special handling of indirect references. Treat integer tokens as part of an indirect | 300 | // Special handling of indirect references. Treat integer tokens as part of an indirect |
| 300 | // reference until proven otherwise. | 301 | // reference until proven otherwise. |
| 301 | - if (tokenizer.getType() == QPDFTokenizer::tt_integer) { | ||
| 302 | - if (++int_count > 2) { | 302 | + if (tokenizer_.getType() == QPDFTokenizer::tt_integer) { |
| 303 | + if (++int_count_ > 2) { | ||
| 303 | // Process the oldest buffered integer. | 304 | // Process the oldest buffered integer. |
| 304 | - addInt(int_count); | 305 | + add_int(int_count_); |
| 305 | } | 306 | } |
| 306 | - last_offset_buffer[int_count % 2] = input.getLastOffset(); | ||
| 307 | - int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | 307 | + last_offset_buffer_[int_count_ % 2] = input_.getLastOffset(); |
| 308 | + int_buffer_[int_count_ % 2] = QUtil::string_to_ll(tokenizer_.getValue().c_str()); | ||
| 308 | continue; | 309 | continue; |
| 309 | 310 | ||
| 310 | } else if ( | 311 | } else if ( |
| 311 | - int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word && | ||
| 312 | - tokenizer.getValue() == "R") { | ||
| 313 | - if (!context) { | 312 | + int_count_ >= 2 && tokenizer_.getType() == QPDFTokenizer::tt_word && |
| 313 | + tokenizer_.getValue() == "R") { | ||
| 314 | + if (!context_) { | ||
| 314 | throw std::logic_error( | 315 | throw std::logic_error( |
| 315 | - "QPDFParser::parse called without context on an object with indirect " | 316 | + "Parser::parse called without context on an object with indirect " |
| 316 | "references"); | 317 | "references"); |
| 317 | } | 318 | } |
| 318 | - auto id = QIntC::to_int(int_buffer[(int_count - 1) % 2]); | ||
| 319 | - auto gen = QIntC::to_int(int_buffer[(int_count) % 2]); | 319 | + auto id = QIntC::to_int(int_buffer_[(int_count_ - 1) % 2]); |
| 320 | + auto gen = QIntC::to_int(int_buffer_[(int_count_) % 2]); | ||
| 320 | if (!(id < 1 || gen < 0 || gen >= 65535)) { | 321 | if (!(id < 1 || gen < 0 || gen >= 65535)) { |
| 321 | - add(ParseGuard::getObject(context, id, gen, parse_pdf)); | 322 | + add(ParseGuard::getObject(context_, id, gen, parse_pdf_)); |
| 322 | } else { | 323 | } else { |
| 323 | add_bad_null( | 324 | add_bad_null( |
| 324 | "treating bad indirect reference (" + std::to_string(id) + " " + | 325 | "treating bad indirect reference (" + std::to_string(id) + " " + |
| 325 | std::to_string(gen) + " R) as null"); | 326 | std::to_string(gen) + " R) as null"); |
| 326 | } | 327 | } |
| 327 | - int_count = 0; | 328 | + int_count_ = 0; |
| 328 | continue; | 329 | continue; |
| 329 | 330 | ||
| 330 | - } else if (int_count > 0) { | 331 | + } else if (int_count_ > 0) { |
| 331 | // Process the buffered integers before processing the current token. | 332 | // Process the buffered integers before processing the current token. |
| 332 | - if (int_count > 1) { | ||
| 333 | - addInt(int_count - 1); | 333 | + if (int_count_ > 1) { |
| 334 | + add_int(int_count_ - 1); | ||
| 334 | } | 335 | } |
| 335 | - addInt(int_count); | ||
| 336 | - int_count = 0; | 336 | + add_int(int_count_); |
| 337 | + int_count_ = 0; | ||
| 337 | } | 338 | } |
| 338 | } | 339 | } |
| 339 | 340 | ||
| 340 | - switch (tokenizer.getType()) { | 341 | + switch (tokenizer_.getType()) { |
| 341 | case QPDFTokenizer::tt_eof: | 342 | case QPDFTokenizer::tt_eof: |
| 342 | warn("parse error while reading object"); | 343 | warn("parse error while reading object"); |
| 343 | if (content_stream) { | 344 | if (content_stream) { |
| @@ -349,7 +350,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -349,7 +350,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 349 | 350 | ||
| 350 | case QPDFTokenizer::tt_bad: | 351 | case QPDFTokenizer::tt_bad: |
| 351 | check_too_many_bad_tokens(); | 352 | check_too_many_bad_tokens(); |
| 352 | - addNull(); | 353 | + add_null(); |
| 353 | continue; | 354 | continue; |
| 354 | 355 | ||
| 355 | case QPDFTokenizer::tt_brace_open: | 356 | case QPDFTokenizer::tt_brace_open: |
| @@ -358,23 +359,23 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -358,23 +359,23 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 358 | continue; | 359 | continue; |
| 359 | 360 | ||
| 360 | case QPDFTokenizer::tt_array_close: | 361 | case QPDFTokenizer::tt_array_close: |
| 361 | - if (frame->state == st_array) { | ||
| 362 | - auto object = frame->null_count > 100 | ||
| 363 | - ? QPDFObject::create<QPDF_Array>(std::move(frame->olist), true) | ||
| 364 | - : QPDFObject::create<QPDF_Array>(std::move(frame->olist)); | ||
| 365 | - setDescription(object, frame->offset - 1); | 362 | + if (frame_->state == st_array) { |
| 363 | + auto object = frame_->null_count > 100 | ||
| 364 | + ? QPDFObject::create<QPDF_Array>(std::move(frame_->olist), true) | ||
| 365 | + : QPDFObject::create<QPDF_Array>(std::move(frame_->olist)); | ||
| 366 | + set_description(object, frame_->offset - 1); | ||
| 366 | // The `offset` points to the next of "[". Set the rewind offset to point to the | 367 | // The `offset` points to the next of "[". Set the rewind offset to point to the |
| 367 | // beginning of "[". This has been explicitly tested with whitespace surrounding the | 368 | // beginning of "[". This has been explicitly tested with whitespace surrounding the |
| 368 | // array start delimiter. getLastOffset points to the array end token and therefore | 369 | // array start delimiter. getLastOffset points to the array end token and therefore |
| 369 | // can't be used here. | 370 | // can't be used here. |
| 370 | - if (stack.size() <= 1) { | 371 | + if (stack_.size() <= 1) { |
| 371 | return object; | 372 | return object; |
| 372 | } | 373 | } |
| 373 | - stack.pop_back(); | ||
| 374 | - frame = &stack.back(); | 374 | + stack_.pop_back(); |
| 375 | + frame_ = &stack_.back(); | ||
| 375 | add(std::move(object)); | 376 | add(std::move(object)); |
| 376 | } else { | 377 | } else { |
| 377 | - if (sanity_checks) { | 378 | + if (sanity_checks_) { |
| 378 | // During sanity checks, assume nesting of containers is corrupt and object is | 379 | // During sanity checks, assume nesting of containers is corrupt and object is |
| 379 | // unusable. | 380 | // unusable. |
| 380 | warn("unexpected array close token; giving up on reading object"); | 381 | warn("unexpected array close token; giving up on reading object"); |
| @@ -385,46 +386,46 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -385,46 +386,46 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 385 | continue; | 386 | continue; |
| 386 | 387 | ||
| 387 | case QPDFTokenizer::tt_dict_close: | 388 | case QPDFTokenizer::tt_dict_close: |
| 388 | - if (frame->state <= st_dictionary_value) { | 389 | + if (frame_->state <= st_dictionary_value) { |
| 389 | // Attempt to recover more or less gracefully from invalid dictionaries. | 390 | // Attempt to recover more or less gracefully from invalid dictionaries. |
| 390 | - auto& dict = frame->dict; | 391 | + auto& dict = frame_->dict; |
| 391 | 392 | ||
| 392 | - if (frame->state == st_dictionary_value) { | 393 | + if (frame_->state == st_dictionary_value) { |
| 393 | warn( | 394 | warn( |
| 394 | - frame->offset, | 395 | + frame_->offset, |
| 395 | "dictionary ended prematurely; using null as value for last key"); | 396 | "dictionary ended prematurely; using null as value for last key"); |
| 396 | - dict[frame->key] = QPDFObject::create<QPDF_Null>(); | 397 | + dict[frame_->key] = QPDFObject::create<QPDF_Null>(); |
| 397 | } | 398 | } |
| 398 | - if (!frame->olist.empty()) { | ||
| 399 | - if (sanity_checks) { | 399 | + if (!frame_->olist.empty()) { |
| 400 | + if (sanity_checks_) { | ||
| 400 | warn( | 401 | warn( |
| 401 | - frame->offset, | 402 | + frame_->offset, |
| 402 | "expected dictionary keys but found non-name objects; ignoring"); | 403 | "expected dictionary keys but found non-name objects; ignoring"); |
| 403 | } else { | 404 | } else { |
| 404 | - fixMissingKeys(); | 405 | + fix_missing_keys(); |
| 405 | } | 406 | } |
| 406 | } | 407 | } |
| 407 | 408 | ||
| 408 | - if (!frame->contents_string.empty() && dict.contains("/Type") && | 409 | + if (!frame_->contents_string.empty() && dict.contains("/Type") && |
| 409 | dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") && | 410 | dict["/Type"].isNameAndEquals("/Sig") && dict.contains("/ByteRange") && |
| 410 | dict.contains("/Contents") && dict["/Contents"].isString()) { | 411 | dict.contains("/Contents") && dict["/Contents"].isString()) { |
| 411 | - dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); | ||
| 412 | - dict["/Contents"].setParsedOffset(frame->contents_offset); | 412 | + dict["/Contents"] = QPDFObjectHandle::newString(frame_->contents_string); |
| 413 | + dict["/Contents"].setParsedOffset(frame_->contents_offset); | ||
| 413 | } | 414 | } |
| 414 | auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict)); | 415 | auto object = QPDFObject::create<QPDF_Dictionary>(std::move(dict)); |
| 415 | - setDescription(object, frame->offset - 2); | 416 | + set_description(object, frame_->offset - 2); |
| 416 | // The `offset` points to the next of "<<". Set the rewind offset to point to the | 417 | // The `offset` points to the next of "<<". Set the rewind offset to point to the |
| 417 | // beginning of "<<". This has been explicitly tested with whitespace surrounding | 418 | // beginning of "<<". This has been explicitly tested with whitespace surrounding |
| 418 | // the dictionary start delimiter. getLastOffset points to the dictionary end token | 419 | // the dictionary start delimiter. getLastOffset points to the dictionary end token |
| 419 | // and therefore can't be used here. | 420 | // and therefore can't be used here. |
| 420 | - if (stack.size() <= 1) { | 421 | + if (stack_.size() <= 1) { |
| 421 | return object; | 422 | return object; |
| 422 | } | 423 | } |
| 423 | - stack.pop_back(); | ||
| 424 | - frame = &stack.back(); | 424 | + stack_.pop_back(); |
| 425 | + frame_ = &stack_.back(); | ||
| 425 | add(std::move(object)); | 426 | add(std::move(object)); |
| 426 | } else { | 427 | } else { |
| 427 | - if (sanity_checks) { | 428 | + if (sanity_checks_) { |
| 428 | // During sanity checks, assume nesting of containers is corrupt and object is | 429 | // During sanity checks, assume nesting of containers is corrupt and object is |
| 429 | // unusable. | 430 | // unusable. |
| 430 | warn("unexpected dictionary close token; giving up on reading object"); | 431 | warn("unexpected dictionary close token; giving up on reading object"); |
| @@ -436,60 +437,60 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -436,60 +437,60 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 436 | 437 | ||
| 437 | case QPDFTokenizer::tt_array_open: | 438 | case QPDFTokenizer::tt_array_open: |
| 438 | case QPDFTokenizer::tt_dict_open: | 439 | case QPDFTokenizer::tt_dict_open: |
| 439 | - if (stack.size() > max_nesting) { | 440 | + if (stack_.size() > max_nesting) { |
| 440 | limits_error( | 441 | limits_error( |
| 441 | "parser-max-nesting", "ignoring excessively deeply nested data structure"); | 442 | "parser-max-nesting", "ignoring excessively deeply nested data structure"); |
| 442 | } | 443 | } |
| 443 | b_contents = false; | 444 | b_contents = false; |
| 444 | - stack.emplace_back( | ||
| 445 | - input, | ||
| 446 | - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array | ||
| 447 | - : st_dictionary_key); | ||
| 448 | - frame = &stack.back(); | 445 | + stack_.emplace_back( |
| 446 | + input_, | ||
| 447 | + (tokenizer_.getType() == QPDFTokenizer::tt_array_open) ? st_array | ||
| 448 | + : st_dictionary_key); | ||
| 449 | + frame_ = &stack_.back(); | ||
| 449 | continue; | 450 | continue; |
| 450 | 451 | ||
| 451 | case QPDFTokenizer::tt_bool: | 452 | case QPDFTokenizer::tt_bool: |
| 452 | - addScalar<QPDF_Bool>(tokenizer.getValue() == "true"); | 453 | + add_scalar<QPDF_Bool>(tokenizer_.getValue() == "true"); |
| 453 | continue; | 454 | continue; |
| 454 | 455 | ||
| 455 | case QPDFTokenizer::tt_null: | 456 | case QPDFTokenizer::tt_null: |
| 456 | - addNull(); | 457 | + add_null(); |
| 457 | continue; | 458 | continue; |
| 458 | 459 | ||
| 459 | case QPDFTokenizer::tt_integer: | 460 | case QPDFTokenizer::tt_integer: |
| 460 | if (!content_stream) { | 461 | if (!content_stream) { |
| 461 | // Buffer token in case it is part of an indirect reference. | 462 | // Buffer token in case it is part of an indirect reference. |
| 462 | - last_offset_buffer[1] = input.getLastOffset(); | ||
| 463 | - int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); | ||
| 464 | - int_count = 1; | 463 | + last_offset_buffer_[1] = input_.getLastOffset(); |
| 464 | + int_buffer_[1] = QUtil::string_to_ll(tokenizer_.getValue().c_str()); | ||
| 465 | + int_count_ = 1; | ||
| 465 | } else { | 466 | } else { |
| 466 | - addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str())); | 467 | + add_scalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer_.getValue().c_str())); |
| 467 | } | 468 | } |
| 468 | continue; | 469 | continue; |
| 469 | 470 | ||
| 470 | case QPDFTokenizer::tt_real: | 471 | case QPDFTokenizer::tt_real: |
| 471 | - addScalar<QPDF_Real>(tokenizer.getValue()); | 472 | + add_scalar<QPDF_Real>(tokenizer_.getValue()); |
| 472 | continue; | 473 | continue; |
| 473 | 474 | ||
| 474 | case QPDFTokenizer::tt_name: | 475 | case QPDFTokenizer::tt_name: |
| 475 | - if (frame->state == st_dictionary_key) { | ||
| 476 | - frame->key = tokenizer.getValue(); | ||
| 477 | - frame->state = st_dictionary_value; | ||
| 478 | - b_contents = decrypter && frame->key == "/Contents"; | 476 | + if (frame_->state == st_dictionary_key) { |
| 477 | + frame_->key = tokenizer_.getValue(); | ||
| 478 | + frame_->state = st_dictionary_value; | ||
| 479 | + b_contents = decrypter_ && frame_->key == "/Contents"; | ||
| 479 | continue; | 480 | continue; |
| 480 | } else { | 481 | } else { |
| 481 | - addScalar<QPDF_Name>(tokenizer.getValue()); | 482 | + add_scalar<QPDF_Name>(tokenizer_.getValue()); |
| 482 | } | 483 | } |
| 483 | continue; | 484 | continue; |
| 484 | 485 | ||
| 485 | case QPDFTokenizer::tt_word: | 486 | case QPDFTokenizer::tt_word: |
| 486 | if (content_stream) { | 487 | if (content_stream) { |
| 487 | - addScalar<QPDF_Operator>(tokenizer.getValue()); | 488 | + add_scalar<QPDF_Operator>(tokenizer_.getValue()); |
| 488 | continue; | 489 | continue; |
| 489 | } | 490 | } |
| 490 | 491 | ||
| 491 | - if (sanity_checks) { | ||
| 492 | - if (tokenizer.getValue() == "endobj" || tokenizer.getValue() == "endstream") { | 492 | + if (sanity_checks_) { |
| 493 | + if (tokenizer_.getValue() == "endobj" || tokenizer_.getValue() == "endstream") { | ||
| 493 | // During sanity checks, assume an unexpected endobj or endstream indicates that | 494 | // During sanity checks, assume an unexpected endobj or endstream indicates that |
| 494 | // we are parsing past the end of the object. | 495 | // we are parsing past the end of the object. |
| 495 | warn( | 496 | warn( |
| @@ -504,24 +505,24 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -504,24 +505,24 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 504 | 505 | ||
| 505 | warn("unknown token while reading object; treating as string"); | 506 | warn("unknown token while reading object; treating as string"); |
| 506 | check_too_many_bad_tokens(); | 507 | check_too_many_bad_tokens(); |
| 507 | - addScalar<QPDF_String>(tokenizer.getValue()); | 508 | + add_scalar<QPDF_String>(tokenizer_.getValue()); |
| 508 | 509 | ||
| 509 | continue; | 510 | continue; |
| 510 | 511 | ||
| 511 | case QPDFTokenizer::tt_string: | 512 | case QPDFTokenizer::tt_string: |
| 512 | { | 513 | { |
| 513 | - auto const& val = tokenizer.getValue(); | ||
| 514 | - if (decrypter) { | 514 | + auto const& val = tokenizer_.getValue(); |
| 515 | + if (decrypter_) { | ||
| 515 | if (b_contents) { | 516 | if (b_contents) { |
| 516 | - frame->contents_string = val; | ||
| 517 | - frame->contents_offset = input.getLastOffset(); | 517 | + frame_->contents_string = val; |
| 518 | + frame_->contents_offset = input_.getLastOffset(); | ||
| 518 | b_contents = false; | 519 | b_contents = false; |
| 519 | } | 520 | } |
| 520 | std::string s{val}; | 521 | std::string s{val}; |
| 521 | - decrypter->decryptString(s); | ||
| 522 | - addScalar<QPDF_String>(s); | 522 | + decrypter_->decryptString(s); |
| 523 | + add_scalar<QPDF_String>(s); | ||
| 523 | } else { | 524 | } else { |
| 524 | - addScalar<QPDF_String>(val); | 525 | + add_scalar<QPDF_String>(val); |
| 525 | } | 526 | } |
| 526 | } | 527 | } |
| 527 | continue; | 528 | continue; |
| @@ -533,107 +534,107 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -533,107 +534,107 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 533 | } | 534 | } |
| 534 | 535 | ||
| 535 | void | 536 | void |
| 536 | -QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) | 537 | +Parser::add(std::shared_ptr<QPDFObject>&& obj) |
| 537 | { | 538 | { |
| 538 | - if (frame->state != st_dictionary_value) { | 539 | + if (frame_->state != st_dictionary_value) { |
| 539 | // If state is st_dictionary_key then there is a missing key. Push onto olist for | 540 | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
| 540 | // processing once the tt_dict_close token has been found. | 541 | // processing once the tt_dict_close token has been found. |
| 541 | - frame->olist.emplace_back(std::move(obj)); | 542 | + frame_->olist.emplace_back(std::move(obj)); |
| 542 | } else { | 543 | } else { |
| 543 | - if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { | ||
| 544 | - warnDuplicateKey(); | 544 | + if (auto res = frame_->dict.insert_or_assign(frame_->key, std::move(obj)); !res.second) { |
| 545 | + warn_duplicate_key(); | ||
| 545 | } | 546 | } |
| 546 | - frame->state = st_dictionary_key; | 547 | + frame_->state = st_dictionary_key; |
| 547 | } | 548 | } |
| 548 | } | 549 | } |
| 549 | 550 | ||
| 550 | void | 551 | void |
| 551 | -QPDFParser::addNull() | 552 | +Parser::add_null() |
| 552 | { | 553 | { |
| 553 | const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>(); | 554 | const static ObjectPtr null_obj = QPDFObject::create<QPDF_Null>(); |
| 554 | 555 | ||
| 555 | - if (frame->state != st_dictionary_value) { | 556 | + if (frame_->state != st_dictionary_value) { |
| 556 | // If state is st_dictionary_key then there is a missing key. Push onto olist for | 557 | // If state is st_dictionary_key then there is a missing key. Push onto olist for |
| 557 | // processing once the tt_dict_close token has been found. | 558 | // processing once the tt_dict_close token has been found. |
| 558 | - frame->olist.emplace_back(null_obj); | 559 | + frame_->olist.emplace_back(null_obj); |
| 559 | } else { | 560 | } else { |
| 560 | - if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { | ||
| 561 | - warnDuplicateKey(); | 561 | + if (auto res = frame_->dict.insert_or_assign(frame_->key, null_obj); !res.second) { |
| 562 | + warn_duplicate_key(); | ||
| 562 | } | 563 | } |
| 563 | - frame->state = st_dictionary_key; | 564 | + frame_->state = st_dictionary_key; |
| 564 | } | 565 | } |
| 565 | - ++frame->null_count; | 566 | + ++frame_->null_count; |
| 566 | } | 567 | } |
| 567 | 568 | ||
| 568 | void | 569 | void |
| 569 | -QPDFParser::add_bad_null(std::string const& msg) | 570 | +Parser::add_bad_null(std::string const& msg) |
| 570 | { | 571 | { |
| 571 | warn(msg); | 572 | warn(msg); |
| 572 | check_too_many_bad_tokens(); | 573 | check_too_many_bad_tokens(); |
| 573 | - addNull(); | 574 | + add_null(); |
| 574 | } | 575 | } |
| 575 | 576 | ||
| 576 | void | 577 | void |
| 577 | -QPDFParser::addInt(int count) | 578 | +Parser::add_int(int count) |
| 578 | { | 579 | { |
| 579 | - auto obj = QPDFObject::create<QPDF_Integer>(int_buffer[count % 2]); | ||
| 580 | - obj->setDescription(context, description, last_offset_buffer[count % 2]); | 580 | + auto obj = QPDFObject::create<QPDF_Integer>(int_buffer_[count % 2]); |
| 581 | + obj->setDescription(context_, description_, last_offset_buffer_[count % 2]); | ||
| 581 | add(std::move(obj)); | 582 | add(std::move(obj)); |
| 582 | } | 583 | } |
| 583 | 584 | ||
| 584 | template <typename T, typename... Args> | 585 | template <typename T, typename... Args> |
| 585 | void | 586 | void |
| 586 | -QPDFParser::addScalar(Args&&... args) | 587 | +Parser::add_scalar(Args&&... args) |
| 587 | { | 588 | { |
| 588 | - auto limit = Limits::parser_max_container_size(bad_count || sanity_checks); | ||
| 589 | - if (frame->olist.size() >= limit || frame->dict.size() >= limit) { | 589 | + auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_); |
| 590 | + if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) { | ||
| 590 | // Stop adding scalars. We are going to abort when the close token or a bad token is | 591 | // Stop adding scalars. We are going to abort when the close token or a bad token is |
| 591 | // encountered. | 592 | // encountered. |
| 592 | - max_bad_count = 1; | 593 | + max_bad_count_ = 1; |
| 593 | check_too_many_bad_tokens(); // always throws Error() | 594 | check_too_many_bad_tokens(); // always throws Error() |
| 594 | } | 595 | } |
| 595 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 596 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
| 596 | - obj->setDescription(context, description, input.getLastOffset()); | 597 | + obj->setDescription(context_, description_, input_.getLastOffset()); |
| 597 | add(std::move(obj)); | 598 | add(std::move(obj)); |
| 598 | } | 599 | } |
| 599 | 600 | ||
| 600 | template <typename T, typename... Args> | 601 | template <typename T, typename... Args> |
| 601 | QPDFObjectHandle | 602 | QPDFObjectHandle |
| 602 | -QPDFParser::withDescription(Args&&... args) | 603 | +Parser::with_description(Args&&... args) |
| 603 | { | 604 | { |
| 604 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); | 605 | auto obj = QPDFObject::create<T>(std::forward<Args>(args)...); |
| 605 | - obj->setDescription(context, description, start); | 606 | + obj->setDescription(context_, description_, start_); |
| 606 | return {obj}; | 607 | return {obj}; |
| 607 | } | 608 | } |
| 608 | 609 | ||
| 609 | void | 610 | void |
| 610 | -QPDFParser::setDescription(ObjectPtr& obj, qpdf_offset_t parsed_offset) | 611 | +Parser::set_description(ObjectPtr& obj, qpdf_offset_t parsed_offset) |
| 611 | { | 612 | { |
| 612 | if (obj) { | 613 | if (obj) { |
| 613 | - obj->setDescription(context, description, parsed_offset); | 614 | + obj->setDescription(context_, description_, parsed_offset); |
| 614 | } | 615 | } |
| 615 | } | 616 | } |
| 616 | 617 | ||
| 617 | void | 618 | void |
| 618 | -QPDFParser::fixMissingKeys() | 619 | +Parser::fix_missing_keys() |
| 619 | { | 620 | { |
| 620 | std::set<std::string> names; | 621 | std::set<std::string> names; |
| 621 | - for (auto& obj: frame->olist) { | 622 | + for (auto& obj: frame_->olist) { |
| 622 | if (obj.raw_type_code() == ::ot_name) { | 623 | if (obj.raw_type_code() == ::ot_name) { |
| 623 | names.insert(obj.obj_sp()->getStringValue()); | 624 | names.insert(obj.obj_sp()->getStringValue()); |
| 624 | } | 625 | } |
| 625 | } | 626 | } |
| 626 | int next_fake_key = 1; | 627 | int next_fake_key = 1; |
| 627 | - for (auto const& item: frame->olist) { | 628 | + for (auto const& item: frame_->olist) { |
| 628 | while (true) { | 629 | while (true) { |
| 629 | const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); | 630 | const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); |
| 630 | - const bool found_fake = !frame->dict.contains(key) && !names.contains(key); | 631 | + const bool found_fake = !frame_->dict.contains(key) && !names.contains(key); |
| 631 | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | 632 | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); |
| 632 | if (found_fake) { | 633 | if (found_fake) { |
| 633 | warn( | 634 | warn( |
| 634 | - frame->offset, | 635 | + frame_->offset, |
| 635 | "expected dictionary key but found non-name object; inserting key " + key); | 636 | "expected dictionary key but found non-name object; inserting key " + key); |
| 636 | - frame->dict[key] = item; | 637 | + frame_->dict[key] = item; |
| 637 | break; | 638 | break; |
| 638 | } | 639 | } |
| 639 | } | 640 | } |
| @@ -641,11 +642,11 @@ QPDFParser::fixMissingKeys() | @@ -641,11 +642,11 @@ QPDFParser::fixMissingKeys() | ||
| 641 | } | 642 | } |
| 642 | 643 | ||
| 643 | void | 644 | void |
| 644 | -QPDFParser::check_too_many_bad_tokens() | 645 | +Parser::check_too_many_bad_tokens() |
| 645 | { | 646 | { |
| 646 | - auto limit = Limits::parser_max_container_size(bad_count || sanity_checks); | ||
| 647 | - if (frame->olist.size() >= limit || frame->dict.size() >= limit) { | ||
| 648 | - if (bad_count) { | 647 | + auto limit = Limits::parser_max_container_size(bad_count_ || sanity_checks_); |
| 648 | + if (frame_->olist.size() >= limit || frame_->dict.size() >= limit) { | ||
| 649 | + if (bad_count_) { | ||
| 649 | limits_error( | 650 | limits_error( |
| 650 | "parser-max-container-size-damaged", | 651 | "parser-max-container-size-damaged", |
| 651 | "encountered errors while parsing an array or dictionary with more than " + | 652 | "encountered errors while parsing an array or dictionary with more than " + |
| @@ -656,27 +657,27 @@ QPDFParser::check_too_many_bad_tokens() | @@ -656,27 +657,27 @@ QPDFParser::check_too_many_bad_tokens() | ||
| 656 | "encountered an array or dictionary with more than " + std::to_string(limit) + | 657 | "encountered an array or dictionary with more than " + std::to_string(limit) + |
| 657 | " elements during xref recovery; giving up on reading object"); | 658 | " elements during xref recovery; giving up on reading object"); |
| 658 | } | 659 | } |
| 659 | - if (max_bad_count && --max_bad_count == 0) { | 660 | + if (max_bad_count_ && --max_bad_count_ == 0) { |
| 660 | limits_error( | 661 | limits_error( |
| 661 | "parser-max-errors", "too many errors during parsing; treating object as null"); | 662 | "parser-max-errors", "too many errors during parsing; treating object as null"); |
| 662 | } | 663 | } |
| 663 | - if (good_count > 4) { | ||
| 664 | - good_count = 0; | ||
| 665 | - bad_count = 1; | 664 | + if (good_count_ > 4) { |
| 665 | + good_count_ = 0; | ||
| 666 | + bad_count_ = 1; | ||
| 666 | return; | 667 | return; |
| 667 | } | 668 | } |
| 668 | - if (++bad_count > 5 || | ||
| 669 | - (frame->state != st_array && std::cmp_less(max_bad_count, frame->olist.size()))) { | 669 | + if (++bad_count_ > 5 || |
| 670 | + (frame_->state != st_array && std::cmp_less(max_bad_count_, frame_->olist.size()))) { | ||
| 670 | // Give up after 5 errors in close proximity or if the number of missing dictionary keys | 671 | // Give up after 5 errors in close proximity or if the number of missing dictionary keys |
| 671 | // exceeds the remaining number of allowable total errors. | 672 | // exceeds the remaining number of allowable total errors. |
| 672 | warn("too many errors; giving up on reading object"); | 673 | warn("too many errors; giving up on reading object"); |
| 673 | throw Error(); | 674 | throw Error(); |
| 674 | } | 675 | } |
| 675 | - good_count = 0; | 676 | + good_count_ = 0; |
| 676 | } | 677 | } |
| 677 | 678 | ||
| 678 | void | 679 | void |
| 679 | -QPDFParser::limits_error(std::string const& limit, std::string const& msg) | 680 | +Parser::limits_error(std::string const& limit, std::string const& msg) |
| 680 | { | 681 | { |
| 681 | Limits::error(); | 682 | Limits::error(); |
| 682 | warn("limits error("s + limit + "): " + msg); | 683 | warn("limits error("s + limit + "): " + msg); |
| @@ -684,40 +685,41 @@ QPDFParser::limits_error(std::string const& limit, std::string const& msg) | @@ -684,40 +685,41 @@ QPDFParser::limits_error(std::string const& limit, std::string const& msg) | ||
| 684 | } | 685 | } |
| 685 | 686 | ||
| 686 | void | 687 | void |
| 687 | -QPDFParser::warn(QPDFExc const& e) const | 688 | +Parser::warn(QPDFExc const& e) const |
| 688 | { | 689 | { |
| 689 | // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the | 690 | // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the |
| 690 | // object. If parsing for some other reason, such as an explicit creation of an object from a | 691 | // object. If parsing for some other reason, such as an explicit creation of an object from a |
| 691 | // string, then just throw the exception. | 692 | // string, then just throw the exception. |
| 692 | - if (context) { | ||
| 693 | - context->warn(e); | 693 | + if (context_) { |
| 694 | + context_->warn(e); | ||
| 694 | } else { | 695 | } else { |
| 695 | throw e; | 696 | throw e; |
| 696 | } | 697 | } |
| 697 | } | 698 | } |
| 698 | 699 | ||
| 699 | void | 700 | void |
| 700 | -QPDFParser::warnDuplicateKey() | 701 | +Parser::warn_duplicate_key() |
| 701 | { | 702 | { |
| 702 | warn( | 703 | warn( |
| 703 | - frame->offset, | ||
| 704 | - "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); | 704 | + frame_->offset, |
| 705 | + "dictionary has duplicated key " + frame_->key + | ||
| 706 | + "; last occurrence overrides earlier ones"); | ||
| 705 | } | 707 | } |
| 706 | 708 | ||
| 707 | void | 709 | void |
| 708 | -QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const | 710 | +Parser::warn(qpdf_offset_t offset, std::string const& msg) const |
| 709 | { | 711 | { |
| 710 | - if (stream_id) { | ||
| 711 | - std::string descr = "object "s + std::to_string(obj_id) + " 0"; | ||
| 712 | - std::string name = context->getFilename() + " object stream " + std::to_string(stream_id); | 712 | + if (stream_id_) { |
| 713 | + std::string descr = "object "s + std::to_string(obj_id_) + " 0"; | ||
| 714 | + std::string name = context_->getFilename() + " object stream " + std::to_string(stream_id_); | ||
| 713 | warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg)); | 715 | warn(QPDFExc(qpdf_e_damaged_pdf, name, descr, offset, msg)); |
| 714 | } else { | 716 | } else { |
| 715 | - warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg)); | 717 | + warn(QPDFExc(qpdf_e_damaged_pdf, input_.getName(), object_description_, offset, msg)); |
| 716 | } | 718 | } |
| 717 | } | 719 | } |
| 718 | 720 | ||
| 719 | void | 721 | void |
| 720 | -QPDFParser::warn(std::string const& msg) const | 722 | +Parser::warn(std::string const& msg) const |
| 721 | { | 723 | { |
| 722 | - warn(input.getLastOffset(), msg); | 724 | + warn(input_.getLastOffset(), msg); |
| 723 | } | 725 | } |
libqpdf/QPDF_objects.cc
| @@ -25,6 +25,7 @@ using namespace qpdf; | @@ -25,6 +25,7 @@ using namespace qpdf; | ||
| 25 | using namespace std::literals; | 25 | using namespace std::literals; |
| 26 | 26 | ||
| 27 | using Objects = QPDF::Doc::Objects; | 27 | using Objects = QPDF::Doc::Objects; |
| 28 | +using Parser = impl::Parser; | ||
| 28 | 29 | ||
| 29 | QPDFXRefEntry::QPDFXRefEntry() = default; | 30 | QPDFXRefEntry::QPDFXRefEntry() = default; |
| 30 | 31 | ||
| @@ -1287,7 +1288,7 @@ Objects::readTrailer() | @@ -1287,7 +1288,7 @@ Objects::readTrailer() | ||
| 1287 | { | 1288 | { |
| 1288 | qpdf_offset_t offset = m->file->tell(); | 1289 | qpdf_offset_t offset = m->file->tell(); |
| 1289 | auto object = | 1290 | auto object = |
| 1290 | - QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref); | 1291 | + Parser::parse(*m->file, "trailer", m->tokenizer, nullptr, qpdf, m->reconstructed_xref); |
| 1291 | if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) { | 1292 | if (object.isDictionary() && m->objects.readToken(*m->file).isWord("stream")) { |
| 1292 | warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); | 1293 | warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); |
| 1293 | } | 1294 | } |
| @@ -1304,7 +1305,7 @@ Objects::readObject(std::string const& description, QPDFObjGen og) | @@ -1304,7 +1305,7 @@ Objects::readObject(std::string const& description, QPDFObjGen og) | ||
| 1304 | 1305 | ||
| 1305 | StringDecrypter decrypter{&qpdf, og}; | 1306 | StringDecrypter decrypter{&qpdf, og}; |
| 1306 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; | 1307 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; |
| 1307 | - auto object = QPDFParser::parse( | 1308 | + auto object = Parser::parse( |
| 1308 | *m->file, | 1309 | *m->file, |
| 1309 | m->last_object_description, | 1310 | m->last_object_description, |
| 1310 | m->tokenizer, | 1311 | m->tokenizer, |
| @@ -1834,7 +1835,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) | @@ -1834,7 +1835,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) | ||
| 1834 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && | 1835 | if (entry != m->xref_table.end() && entry->second.getType() == 2 && |
| 1835 | entry->second.getObjStreamNumber() == obj_stream_number) { | 1836 | entry->second.getObjStreamNumber() == obj_stream_number) { |
| 1836 | is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); | 1837 | is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); |
| 1837 | - if (auto oh = QPDFParser::parse(in, obj_stream_number, obj_id, m->tokenizer, qpdf)) { | 1838 | + if (auto oh = Parser::parse(in, obj_stream_number, obj_id, m->tokenizer, qpdf)) { |
| 1838 | updateCache(og, oh.obj_sp(), end_before_space, end_after_space); | 1839 | updateCache(og, oh.obj_sp(), end_before_space, end_after_space); |
| 1839 | } | 1840 | } |
| 1840 | } else { | 1841 | } else { |
libqpdf/qpdf/QPDFParser.hh
| @@ -13,153 +13,277 @@ | @@ -13,153 +13,277 @@ | ||
| 13 | using namespace qpdf; | 13 | using namespace qpdf; |
| 14 | using namespace qpdf::global; | 14 | using namespace qpdf::global; |
| 15 | 15 | ||
| 16 | -class QPDFParser | 16 | +namespace qpdf::impl |
| 17 | { | 17 | { |
| 18 | - public: | ||
| 19 | - class Error: public std::exception | 18 | + /// @class Parser |
| 19 | + /// @brief Internal parser for PDF objects and content streams. | ||
| 20 | + /// @par | ||
| 21 | + /// The Parser class provides static methods for parsing PDF objects from input sources. | ||
| 22 | + /// It handles tokenization, error recovery, and object construction with proper offset | ||
| 23 | + /// tracking and description for error reporting. | ||
| 24 | + class Parser | ||
| 20 | { | 25 | { |
| 21 | public: | 26 | public: |
| 22 | - Error() = default; | ||
| 23 | - virtual ~Error() noexcept = default; | ||
| 24 | - }; | 27 | + /// @brief Exception thrown when parser encounters an unrecoverable error. |
| 28 | + class Error: public std::exception | ||
| 29 | + { | ||
| 30 | + public: | ||
| 31 | + Error() = default; | ||
| 32 | + virtual ~Error() noexcept = default; | ||
| 33 | + }; | ||
| 25 | 34 | ||
| 26 | - static QPDFObjectHandle | ||
| 27 | - parse(InputSource& input, std::string const& object_description, QPDF* context); | ||
| 28 | - | ||
| 29 | - static QPDFObjectHandle parse_content( | ||
| 30 | - InputSource& input, | ||
| 31 | - std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 32 | - qpdf::Tokenizer& tokenizer, | ||
| 33 | - QPDF* context); | ||
| 34 | - | ||
| 35 | - // For use by deprecated QPDFObjectHandle::parse. | ||
| 36 | - static QPDFObjectHandle parse( | ||
| 37 | - InputSource& input, | ||
| 38 | - std::string const& object_description, | ||
| 39 | - QPDFTokenizer& tokenizer, | ||
| 40 | - bool& empty, | ||
| 41 | - QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 42 | - QPDF* context); | ||
| 43 | - | ||
| 44 | - // For use by QPDF. | ||
| 45 | - static QPDFObjectHandle parse( | ||
| 46 | - InputSource& input, | ||
| 47 | - std::string const& object_description, | ||
| 48 | - qpdf::Tokenizer& tokenizer, | ||
| 49 | - QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 50 | - QPDF& context, | ||
| 51 | - bool sanity_checks); | ||
| 52 | - | ||
| 53 | - static QPDFObjectHandle parse( | ||
| 54 | - qpdf::is::OffsetBuffer& input, | ||
| 55 | - int stream_id, | ||
| 56 | - int obj_id, | ||
| 57 | - qpdf::Tokenizer& tokenizer, | ||
| 58 | - QPDF& context); | ||
| 59 | - | ||
| 60 | - static std::shared_ptr<QPDFObject::Description> | ||
| 61 | - make_description(std::string const& input_name, std::string const& object_description) | ||
| 62 | - { | ||
| 63 | - using namespace std::literals; | ||
| 64 | - return std::make_shared<QPDFObject::Description>( | ||
| 65 | - input_name + ", " + object_description + " at offset $PO"); | ||
| 66 | - } | ||
| 67 | - | ||
| 68 | - private: | ||
| 69 | - QPDFParser( | ||
| 70 | - InputSource& input, | ||
| 71 | - std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 72 | - std::string const& object_description, | ||
| 73 | - qpdf::Tokenizer& tokenizer, | ||
| 74 | - QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 75 | - QPDF* context, | ||
| 76 | - bool parse_pdf, | ||
| 77 | - int stream_id = 0, | ||
| 78 | - int obj_id = 0, | ||
| 79 | - bool sanity_checks = false) : | ||
| 80 | - input(input), | ||
| 81 | - object_description(object_description), | ||
| 82 | - tokenizer(tokenizer), | ||
| 83 | - decrypter(decrypter), | ||
| 84 | - context(context), | ||
| 85 | - description(std::move(sp_description)), | ||
| 86 | - parse_pdf(parse_pdf), | ||
| 87 | - stream_id(stream_id), | ||
| 88 | - obj_id(obj_id), | ||
| 89 | - sanity_checks(sanity_checks) | ||
| 90 | - { | ||
| 91 | - } | 35 | + /// @brief Parse a PDF object from an input source. |
| 36 | + /// @param input The input source to read from. | ||
| 37 | + /// @param object_description Description of the object for error messages. | ||
| 38 | + /// @param context The QPDF context, or nullptr if parsing standalone. | ||
| 39 | + /// @return The parsed QPDFObjectHandle, or null if parsing fails. | ||
| 40 | + static QPDFObjectHandle | ||
| 41 | + parse(InputSource& input, std::string const& object_description, QPDF* context); | ||
| 92 | 42 | ||
| 93 | - // Parser state. Note: | ||
| 94 | - // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) | ||
| 95 | - enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; | 43 | + /// @brief Parse a content stream from an input source. |
| 44 | + /// @param input The input source to read from. | ||
| 45 | + /// @param sp_description Shared pointer to object description. | ||
| 46 | + /// @param tokenizer The tokenizer to use for parsing. | ||
| 47 | + /// @param context The QPDF context. | ||
| 48 | + /// @return The parsed QPDFObjectHandle, or uninitialized handle on EOF. | ||
| 49 | + static QPDFObjectHandle parse_content( | ||
| 50 | + InputSource& input, | ||
| 51 | + std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 52 | + qpdf::Tokenizer& tokenizer, | ||
| 53 | + QPDF* context); | ||
| 96 | 54 | ||
| 97 | - struct StackFrame | ||
| 98 | - { | ||
| 99 | - StackFrame(InputSource& input, parser_state_e state) : | ||
| 100 | - state(state), | ||
| 101 | - offset(input.tell()) | 55 | + /// @brief Parse a PDF object (interface for deprecated QPDFObjectHandle::parse). |
| 56 | + /// @param input The input source to read from. | ||
| 57 | + /// @param object_description Description of the object for error messages. | ||
| 58 | + /// @param tokenizer The tokenizer to use for parsing. | ||
| 59 | + /// @param empty Output parameter indicating if object was empty. | ||
| 60 | + /// @param decrypter String decrypter for encrypted strings, or nullptr. | ||
| 61 | + /// @param context The QPDF context, or nullptr if parsing standalone. | ||
| 62 | + /// @return The parsed QPDFObjectHandle. | ||
| 63 | + static QPDFObjectHandle parse( | ||
| 64 | + InputSource& input, | ||
| 65 | + std::string const& object_description, | ||
| 66 | + QPDFTokenizer& tokenizer, | ||
| 67 | + bool& empty, | ||
| 68 | + QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 69 | + QPDF* context); | ||
| 70 | + | ||
| 71 | + /// @brief Parse a PDF object for use by QPDF. | ||
| 72 | + /// @param input The input source to read from. | ||
| 73 | + /// @param object_description Description of the object for error messages. | ||
| 74 | + /// @param tokenizer The tokenizer to use for parsing. | ||
| 75 | + /// @param decrypter String decrypter for encrypted strings, or nullptr. | ||
| 76 | + /// @param context The QPDF context. | ||
| 77 | + /// @param sanity_checks Enable additional sanity checks during parsing. | ||
| 78 | + /// @return The parsed QPDFObjectHandle. | ||
| 79 | + static QPDFObjectHandle parse( | ||
| 80 | + InputSource& input, | ||
| 81 | + std::string const& object_description, | ||
| 82 | + qpdf::Tokenizer& tokenizer, | ||
| 83 | + QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 84 | + QPDF& context, | ||
| 85 | + bool sanity_checks); | ||
| 86 | + | ||
| 87 | + /// @brief Parse an object from an object stream. | ||
| 88 | + /// @param input The offset buffer containing the object data. | ||
| 89 | + /// @param stream_id The object stream number. | ||
| 90 | + /// @param obj_id The object ID within the stream. | ||
| 91 | + /// @param tokenizer The tokenizer to use for parsing. | ||
| 92 | + /// @param context The QPDF context. | ||
| 93 | + /// @return The parsed QPDFObjectHandle. | ||
| 94 | + static QPDFObjectHandle parse( | ||
| 95 | + qpdf::is::OffsetBuffer& input, | ||
| 96 | + int stream_id, | ||
| 97 | + int obj_id, | ||
| 98 | + qpdf::Tokenizer& tokenizer, | ||
| 99 | + QPDF& context); | ||
| 100 | + | ||
| 101 | + /// @brief Create a description for a parsed object. | ||
| 102 | + /// @param input_name The name of the input source. | ||
| 103 | + /// @param object_description Description of the object being parsed. | ||
| 104 | + /// @return Shared pointer to object description with offset placeholder. | ||
| 105 | + static std::shared_ptr<QPDFObject::Description> | ||
| 106 | + make_description(std::string const& input_name, std::string const& object_description) | ||
| 102 | { | 107 | { |
| 108 | + using namespace std::literals; | ||
| 109 | + return std::make_shared<QPDFObject::Description>( | ||
| 110 | + input_name + ", " + object_description + " at offset $PO"); | ||
| 103 | } | 111 | } |
| 104 | 112 | ||
| 105 | - std::vector<QPDFObjectHandle> olist; | ||
| 106 | - std::map<std::string, QPDFObjectHandle> dict; | ||
| 107 | - parser_state_e state; | ||
| 108 | - std::string key; | ||
| 109 | - qpdf_offset_t offset; | ||
| 110 | - std::string contents_string; | ||
| 111 | - qpdf_offset_t contents_offset{-1}; | ||
| 112 | - int null_count{0}; | ||
| 113 | - }; | 113 | + private: |
| 114 | + /// @brief Construct a parser instance. | ||
| 115 | + /// @param input The input source to read from. | ||
| 116 | + /// @param sp_description Shared pointer to object description. | ||
| 117 | + /// @param object_description Description string for error messages. | ||
| 118 | + /// @param tokenizer The tokenizer to use for parsing. | ||
| 119 | + /// @param decrypter String decrypter for encrypted content. | ||
| 120 | + /// @param context The QPDF context. | ||
| 121 | + /// @param parse_pdf Whether parsing PDF objects (vs content streams). | ||
| 122 | + /// @param stream_id Object stream ID for object stream parsing. | ||
| 123 | + /// @param obj_id Object ID within object stream. | ||
| 124 | + /// @param sanity_checks Enable additional sanity checks. | ||
| 125 | + Parser( | ||
| 126 | + InputSource& input, | ||
| 127 | + std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 128 | + std::string const& object_description, | ||
| 129 | + qpdf::Tokenizer& tokenizer, | ||
| 130 | + QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 131 | + QPDF* context, | ||
| 132 | + bool parse_pdf, | ||
| 133 | + int stream_id = 0, | ||
| 134 | + int obj_id = 0, | ||
| 135 | + bool sanity_checks = false) : | ||
| 136 | + input_(input), | ||
| 137 | + object_description_(object_description), | ||
| 138 | + tokenizer_(tokenizer), | ||
| 139 | + decrypter_(decrypter), | ||
| 140 | + context_(context), | ||
| 141 | + description_(std::move(sp_description)), | ||
| 142 | + parse_pdf_(parse_pdf), | ||
| 143 | + stream_id_(stream_id), | ||
| 144 | + obj_id_(obj_id), | ||
| 145 | + sanity_checks_(sanity_checks) | ||
| 146 | + { | ||
| 147 | + } | ||
| 148 | + | ||
| 149 | + /// @brief Parser state enumeration. | ||
| 150 | + /// @note state <= st_dictionary_value indicates we're in a dictionary context. | ||
| 151 | + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; | ||
| 152 | + | ||
| 153 | + /// @brief Stack frame for tracking nested arrays and dictionaries. | ||
| 154 | + struct StackFrame | ||
| 155 | + { | ||
| 156 | + StackFrame(InputSource& input, parser_state_e state) : | ||
| 157 | + state(state), | ||
| 158 | + offset(input.tell()) | ||
| 159 | + { | ||
| 160 | + } | ||
| 161 | + | ||
| 162 | + std::vector<QPDFObjectHandle> olist; ///< Object list for arrays/dict values | ||
| 163 | + std::map<std::string, QPDFObjectHandle> dict; ///< Dictionary entries | ||
| 164 | + parser_state_e state; ///< Current parser state | ||
| 165 | + std::string key; ///< Current dictionary key | ||
| 166 | + qpdf_offset_t offset; ///< Offset of container start | ||
| 167 | + std::string contents_string; ///< For /Contents field in signatures | ||
| 168 | + qpdf_offset_t contents_offset{-1}; ///< Offset of /Contents value | ||
| 169 | + int null_count{0}; ///< Count of null values in container | ||
| 170 | + }; | ||
| 171 | + | ||
| 172 | + /// @brief Parse an object, handling exceptions and returning null on error. | ||
| 173 | + /// @param content_stream True if parsing a content stream. | ||
| 174 | + /// @return The parsed object handle, or null/uninitialized on error. | ||
| 175 | + QPDFObjectHandle parse(bool content_stream = false); | ||
| 176 | + | ||
| 177 | + /// @brief Parse the first token and dispatch to appropriate handler. | ||
| 178 | + /// @param content_stream True if parsing a content stream. | ||
| 179 | + /// @return The parsed object handle. | ||
| 180 | + QPDFObjectHandle parse_first(bool content_stream); | ||
| 181 | + | ||
| 182 | + /// @brief Parse the remainder of a composite object (array/dict/reference). | ||
| 183 | + /// @param content_stream True if parsing a content stream. | ||
| 184 | + /// @return The completed object handle. | ||
| 185 | + QPDFObjectHandle parse_remainder(bool content_stream); | ||
| 186 | + | ||
| 187 | + /// @brief Add an object to the current container. | ||
| 188 | + /// @param obj The object to add. | ||
| 189 | + void add(std::shared_ptr<QPDFObject>&& obj); | ||
| 114 | 190 | ||
| 115 | - QPDFObjectHandle parse(bool content_stream = false); | ||
| 116 | - QPDFObjectHandle parse_first(bool content_stream); | ||
| 117 | - QPDFObjectHandle parseRemainder(bool content_stream); | ||
| 118 | - void add(std::shared_ptr<QPDFObject>&& obj); | ||
| 119 | - void addNull(); | ||
| 120 | - void add_bad_null(std::string const& msg); | ||
| 121 | - void addInt(int count); | ||
| 122 | - template <typename T, typename... Args> | ||
| 123 | - void addScalar(Args&&... args); | ||
| 124 | - void check_too_many_bad_tokens(); | ||
| 125 | - void warnDuplicateKey(); | ||
| 126 | - void fixMissingKeys(); | ||
| 127 | - [[noreturn]] void limits_error(std::string const& limit, std::string const& msg); | ||
| 128 | - void warn(qpdf_offset_t offset, std::string const& msg) const; | ||
| 129 | - void warn(std::string const& msg) const; | ||
| 130 | - void warn(QPDFExc const&) const; | ||
| 131 | - template <typename T, typename... Args> | ||
| 132 | - // Create a new scalar object complete with parsed offset and description. | ||
| 133 | - // NB the offset includes any leading whitespace. | ||
| 134 | - QPDFObjectHandle withDescription(Args&&... args); | ||
| 135 | - void setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); | ||
| 136 | - InputSource& input; | ||
| 137 | - std::string const& object_description; | ||
| 138 | - qpdf::Tokenizer& tokenizer; | ||
| 139 | - QPDFObjectHandle::StringDecrypter* decrypter; | ||
| 140 | - QPDF* context; | ||
| 141 | - std::shared_ptr<QPDFObject::Description> description; | ||
| 142 | - bool parse_pdf{false}; | ||
| 143 | - int stream_id{0}; | ||
| 144 | - int obj_id{0}; | ||
| 145 | - bool sanity_checks{false}; | ||
| 146 | - | ||
| 147 | - std::vector<StackFrame> stack; | ||
| 148 | - StackFrame* frame{nullptr}; | ||
| 149 | - // Number of recent bad tokens. This will always be > 0 once a bad token has been encountered as | ||
| 150 | - // it only gets incremented or reset when a bad token is encountered. | ||
| 151 | - int bad_count{0}; | ||
| 152 | - // Number of bad tokens (remaining) before giving up. | ||
| 153 | - uint32_t max_bad_count{Limits::parser_max_errors()}; | ||
| 154 | - // Number of good tokens since last bad token. Irrelevant if bad_count == 0. | ||
| 155 | - int good_count{0}; | ||
| 156 | - // Start offset including any leading whitespace. | ||
| 157 | - qpdf_offset_t start{0}; | ||
| 158 | - // Number of successive integer tokens. | ||
| 159 | - int int_count{0}; | ||
| 160 | - long long int_buffer[2]{0, 0}; | ||
| 161 | - qpdf_offset_t last_offset_buffer[2]{0, 0}; | ||
| 162 | - bool empty_{false}; | ||
| 163 | -}; | 191 | + /// @brief Add a null object to the current container. |
| 192 | + void add_null(); | ||
| 193 | + | ||
| 194 | + /// @brief Add a null with a warning message. | ||
| 195 | + /// @param msg Warning message describing the error. | ||
| 196 | + void add_bad_null(std::string const& msg); | ||
| 197 | + | ||
| 198 | + /// @brief Add a buffered integer from int_buffer_. | ||
| 199 | + /// @param count Buffer index (1 or 2) to read from. | ||
| 200 | + void add_int(int count); | ||
| 201 | + | ||
| 202 | + /// @brief Create and add a scalar object to the current container. | ||
| 203 | + /// @tparam T The scalar object type (e.g., QPDF_Integer, QPDF_String). | ||
| 204 | + /// @tparam Args Constructor argument types. | ||
| 205 | + /// @param args Arguments to forward to the object constructor. | ||
| 206 | + template <typename T, typename... Args> | ||
| 207 | + void add_scalar(Args&&... args); | ||
| 208 | + | ||
| 209 | + /// @brief Check if too many bad tokens have been encountered and throw if so. | ||
| 210 | + void check_too_many_bad_tokens(); | ||
| 211 | + | ||
| 212 | + /// @brief Issue a warning about a duplicate dictionary key. | ||
| 213 | + void warn_duplicate_key(); | ||
| 214 | + | ||
| 215 | + /// @brief Fix dictionaries with missing keys by generating fake keys. | ||
| 216 | + void fix_missing_keys(); | ||
| 217 | + | ||
| 218 | + /// @brief Report a limits error and throw. | ||
| 219 | + /// @param limit The limit identifier. | ||
| 220 | + /// @param msg Error message. | ||
| 221 | + [[noreturn]] void limits_error(std::string const& limit, std::string const& msg); | ||
| 222 | + | ||
| 223 | + /// @brief Issue a warning at a specific offset. | ||
| 224 | + /// @param offset File offset for the warning. | ||
| 225 | + /// @param msg Warning message. | ||
| 226 | + void warn(qpdf_offset_t offset, std::string const& msg) const; | ||
| 227 | + | ||
| 228 | + /// @brief Issue a warning at the current offset. | ||
| 229 | + /// @param msg Warning message. | ||
| 230 | + void warn(std::string const& msg) const; | ||
| 231 | + | ||
| 232 | + /// @brief Issue a warning from a QPDFExc exception. | ||
| 233 | + /// @param e The exception to report. | ||
| 234 | + void warn(QPDFExc const& e) const; | ||
| 235 | + | ||
| 236 | + /// @brief Create a scalar object with description and parsed offset. | ||
| 237 | + /// @tparam T The scalar object type. | ||
| 238 | + /// @tparam Args Constructor argument types. | ||
| 239 | + /// @param args Arguments to forward to the object constructor. | ||
| 240 | + /// @return Object handle with description and offset set. | ||
| 241 | + /// @note The offset includes any leading whitespace. | ||
| 242 | + template <typename T, typename... Args> | ||
| 243 | + QPDFObjectHandle with_description(Args&&... args); | ||
| 244 | + | ||
| 245 | + /// @brief Set the description and offset on an existing object. | ||
| 246 | + /// @param obj The object to update. | ||
| 247 | + /// @param parsed_offset The file offset where the object was parsed. | ||
| 248 | + void set_description(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); | ||
| 249 | + | ||
| 250 | + // Core parsing state | ||
| 251 | + InputSource& input_; ///< Input source to read from | ||
| 252 | + std::string const& object_description_; ///< Description for error messages | ||
| 253 | + qpdf::Tokenizer& tokenizer_; ///< Tokenizer for lexical analysis | ||
| 254 | + QPDFObjectHandle::StringDecrypter* decrypter_; ///< Decrypter for encrypted strings | ||
| 255 | + QPDF* context_; ///< QPDF context for object resolution | ||
| 256 | + std::shared_ptr<QPDFObject::Description> description_; ///< Shared description for objects | ||
| 257 | + bool parse_pdf_{false}; ///< True if parsing PDF objects vs content streams | ||
| 258 | + int stream_id_{0}; ///< Object stream ID (for object stream parsing) | ||
| 259 | + int obj_id_{0}; ///< Object ID within object stream | ||
| 260 | + bool sanity_checks_{false}; ///< Enable additional validation checks | ||
| 261 | + | ||
| 262 | + // Composite object parsing state | ||
| 263 | + std::vector<StackFrame> stack_; ///< Stack of nested containers | ||
| 264 | + StackFrame* frame_{nullptr}; ///< Current stack frame pointer | ||
| 265 | + | ||
| 266 | + // Error tracking state | ||
| 267 | + /// Number of recent bad tokens. Always > 0 after first bad token encountered. | ||
| 268 | + int bad_count_{0}; | ||
| 269 | + /// Number of bad tokens remaining before giving up. | ||
| 270 | + uint32_t max_bad_count_{Limits::parser_max_errors()}; | ||
| 271 | + /// Number of good tokens since last bad token. Irrelevant if bad_count == 0. | ||
| 272 | + int good_count_{0}; | ||
| 273 | + | ||
| 274 | + // Token buffering state | ||
| 275 | + /// Start offset of current object, including any leading whitespace. | ||
| 276 | + qpdf_offset_t start_{0}; | ||
| 277 | + /// Number of successive integer tokens (for indirect reference detection). | ||
| 278 | + int int_count_{0}; | ||
| 279 | + /// Buffer for up to 2 integer tokens. | ||
| 280 | + long long int_buffer_[2]{0, 0}; | ||
| 281 | + /// Offsets corresponding to buffered integers. | ||
| 282 | + qpdf_offset_t last_offset_buffer_[2]{0, 0}; | ||
| 283 | + | ||
| 284 | + /// True if object was empty (endobj without content). | ||
| 285 | + bool empty_{false}; | ||
| 286 | + }; | ||
| 287 | +} // namespace qpdf::impl | ||
| 164 | 288 | ||
| 165 | #endif // QPDFPARSER_HH | 289 | #endif // QPDFPARSER_HH |
qpdf/qtest/qpdf/parse-object.out
| 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] | 1 | [ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ] |
| 2 | -logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references | 2 | +logic error parsing indirect: Parser::parse called without context on an object with indirect references |
| 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string | 3 | trailing data: parsed object (trailing test): trailing data found parsing object from string |
| 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string | 4 | WARNING: parsed object (offset 9): unknown token while reading object; treating as string |
| 5 | WARNING: parsed object: treating unexpected brace token as null | 5 | WARNING: parsed object: treating unexpected brace token as null |