Commit 44cd31b96291ba114bc48870be0928b0f75e2dc1

Authored by copilot-swe-agent[bot]
Committed by m-holger
1 parent f2ccff52

Add Doxygen-style comments for all Parser methods

Co-authored-by: m-holger <34626170+m-holger@users.noreply.github.com>
Showing 1 changed file with 155 additions and 35 deletions
libqpdf/qpdf/QPDFParser.hh
@@ -15,9 +15,16 @@ using namespace qpdf::global; @@ -15,9 +15,16 @@ using namespace qpdf::global;
15 15
16 namespace qpdf::impl 16 namespace qpdf::impl
17 { 17 {
  18 + /// @class Parser
  19 + /// @brief Internal parser for PDF objects and content streams.
  20 + /// @par
  21 + /// The Parser class provides static methods for parsing PDF objects from input sources.
  22 + /// It handles tokenization, error recovery, and object construction with proper offset
  23 + /// tracking and description for error reporting.
18 class Parser 24 class Parser
19 { 25 {
20 public: 26 public:
  27 + /// @brief Exception thrown when parser encounters an unrecoverable error.
21 class Error: public std::exception 28 class Error: public std::exception
22 { 29 {
23 public: 30 public:
@@ -25,16 +32,34 @@ namespace qpdf::impl @@ -25,16 +32,34 @@ namespace qpdf::impl
25 virtual ~Error() noexcept = default; 32 virtual ~Error() noexcept = default;
26 }; 33 };
27 34
  35 + /// @brief Parse a PDF object from an input source.
  36 + /// @param input The input source to read from.
  37 + /// @param object_description Description of the object for error messages.
  38 + /// @param context The QPDF context, or nullptr if parsing standalone.
  39 + /// @return The parsed QPDFObjectHandle, or null if parsing fails.
28 static QPDFObjectHandle 40 static QPDFObjectHandle
29 parse(InputSource& input, std::string const& object_description, QPDF* context); 41 parse(InputSource& input, std::string const& object_description, QPDF* context);
30 42
  43 + /// @brief Parse a content stream from an input source.
  44 + /// @param input The input source to read from.
  45 + /// @param sp_description Shared pointer to object description.
  46 + /// @param tokenizer The tokenizer to use for parsing.
  47 + /// @param context The QPDF context.
  48 + /// @return The parsed QPDFObjectHandle, or uninitialized handle on EOF.
31 static QPDFObjectHandle parse_content( 49 static QPDFObjectHandle parse_content(
32 InputSource& input, 50 InputSource& input,
33 std::shared_ptr<QPDFObject::Description> sp_description, 51 std::shared_ptr<QPDFObject::Description> sp_description,
34 qpdf::Tokenizer& tokenizer, 52 qpdf::Tokenizer& tokenizer,
35 QPDF* context); 53 QPDF* context);
36 54
37 - // For use by deprecated QPDFObjectHandle::parse. 55 + /// @brief Parse a PDF object (interface for deprecated QPDFObjectHandle::parse).
  56 + /// @param input The input source to read from.
  57 + /// @param object_description Description of the object for error messages.
  58 + /// @param tokenizer The tokenizer to use for parsing.
  59 + /// @param empty Output parameter indicating if object was empty.
  60 + /// @param decrypter String decrypter for encrypted strings, or nullptr.
  61 + /// @param context The QPDF context, or nullptr if parsing standalone.
  62 + /// @return The parsed QPDFObjectHandle.
38 static QPDFObjectHandle parse( 63 static QPDFObjectHandle parse(
39 InputSource& input, 64 InputSource& input,
40 std::string const& object_description, 65 std::string const& object_description,
@@ -43,7 +68,14 @@ namespace qpdf::impl @@ -43,7 +68,14 @@ namespace qpdf::impl
43 QPDFObjectHandle::StringDecrypter* decrypter, 68 QPDFObjectHandle::StringDecrypter* decrypter,
44 QPDF* context); 69 QPDF* context);
45 70
46 - // For use by QPDF. 71 + /// @brief Parse a PDF object for use by QPDF.
  72 + /// @param input The input source to read from.
  73 + /// @param object_description Description of the object for error messages.
  74 + /// @param tokenizer The tokenizer to use for parsing.
  75 + /// @param decrypter String decrypter for encrypted strings, or nullptr.
  76 + /// @param context The QPDF context.
  77 + /// @param sanity_checks Enable additional sanity checks during parsing.
  78 + /// @return The parsed QPDFObjectHandle.
47 static QPDFObjectHandle parse( 79 static QPDFObjectHandle parse(
48 InputSource& input, 80 InputSource& input,
49 std::string const& object_description, 81 std::string const& object_description,
@@ -52,6 +84,13 @@ namespace qpdf::impl @@ -52,6 +84,13 @@ namespace qpdf::impl
52 QPDF& context, 84 QPDF& context,
53 bool sanity_checks); 85 bool sanity_checks);
54 86
  87 + /// @brief Parse an object from an object stream.
  88 + /// @param input The offset buffer containing the object data.
  89 + /// @param stream_id The object stream number.
  90 + /// @param obj_id The object ID within the stream.
  91 + /// @param tokenizer The tokenizer to use for parsing.
  92 + /// @param context The QPDF context.
  93 + /// @return The parsed QPDFObjectHandle.
55 static QPDFObjectHandle parse( 94 static QPDFObjectHandle parse(
56 qpdf::is::OffsetBuffer& input, 95 qpdf::is::OffsetBuffer& input,
57 int stream_id, 96 int stream_id,
@@ -59,6 +98,10 @@ namespace qpdf::impl @@ -59,6 +98,10 @@ namespace qpdf::impl
59 qpdf::Tokenizer& tokenizer, 98 qpdf::Tokenizer& tokenizer,
60 QPDF& context); 99 QPDF& context);
61 100
  101 + /// @brief Create a description for a parsed object.
  102 + /// @param input_name The name of the input source.
  103 + /// @param object_description Description of the object being parsed.
  104 + /// @return Shared pointer to object description with offset placeholder.
62 static std::shared_ptr<QPDFObject::Description> 105 static std::shared_ptr<QPDFObject::Description>
63 make_description(std::string const& input_name, std::string const& object_description) 106 make_description(std::string const& input_name, std::string const& object_description)
64 { 107 {
@@ -68,6 +111,17 @@ namespace qpdf::impl @@ -68,6 +111,17 @@ namespace qpdf::impl
68 } 111 }
69 112
70 private: 113 private:
  114 + /// @brief Construct a parser instance.
  115 + /// @param input The input source to read from.
  116 + /// @param sp_description Shared pointer to object description.
  117 + /// @param object_description Description string for error messages.
  118 + /// @param tokenizer The tokenizer to use for parsing.
  119 + /// @param decrypter String decrypter for encrypted content.
  120 + /// @param context The QPDF context.
  121 + /// @param parse_pdf Whether parsing PDF objects (vs content streams).
  122 + /// @param stream_id Object stream ID for object stream parsing.
  123 + /// @param obj_id Object ID within object stream.
  124 + /// @param sanity_checks Enable additional sanity checks.
71 Parser( 125 Parser(
72 InputSource& input, 126 InputSource& input,
73 std::shared_ptr<QPDFObject::Description> sp_description, 127 std::shared_ptr<QPDFObject::Description> sp_description,
@@ -92,11 +146,11 @@ namespace qpdf::impl @@ -92,11 +146,11 @@ namespace qpdf::impl
92 { 146 {
93 } 147 }
94 148
95 - // Parser state. Note:  
96 - // state <= st_dictionary_value == (state = st_dictionary_key || state =  
97 - // st_dictionary_value) 149 + /// @brief Parser state enumeration.
  150 + /// @note state <= st_dictionary_value indicates we're in a dictionary context.
98 enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; 151 enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
99 152
  153 + /// @brief Stack frame for tracking nested arrays and dictionaries.
100 struct StackFrame 154 struct StackFrame
101 { 155 {
102 StackFrame(InputSource& input, parser_state_e state) : 156 StackFrame(InputSource& input, parser_state_e state) :
@@ -105,63 +159,129 @@ namespace qpdf::impl @@ -105,63 +159,129 @@ namespace qpdf::impl
105 { 159 {
106 } 160 }
107 161
108 - std::vector<QPDFObjectHandle> olist;  
109 - std::map<std::string, QPDFObjectHandle> dict;  
110 - parser_state_e state;  
111 - std::string key;  
112 - qpdf_offset_t offset;  
113 - std::string contents_string;  
114 - qpdf_offset_t contents_offset{-1};  
115 - int null_count{0}; 162 + std::vector<QPDFObjectHandle> olist; ///< Object list for arrays/dict values
  163 + std::map<std::string, QPDFObjectHandle> dict; ///< Dictionary entries
  164 + parser_state_e state; ///< Current parser state
  165 + std::string key; ///< Current dictionary key
  166 + qpdf_offset_t offset; ///< Offset of container start
  167 + std::string contents_string; ///< For /Contents field in signatures
  168 + qpdf_offset_t contents_offset{-1}; ///< Offset of /Contents value
  169 + int null_count{0}; ///< Count of null values in container
116 }; 170 };
117 171
  172 + /// @brief Parse an object, handling exceptions and returning null on error.
  173 + /// @param content_stream True if parsing a content stream.
  174 + /// @return The parsed object handle, or null/uninitialized on error.
118 QPDFObjectHandle parse(bool content_stream = false); 175 QPDFObjectHandle parse(bool content_stream = false);
  176 +
  177 + /// @brief Parse the first token and dispatch to appropriate handler.
  178 + /// @param content_stream True if parsing a content stream.
  179 + /// @return The parsed object handle.
119 QPDFObjectHandle parse_first(bool content_stream); 180 QPDFObjectHandle parse_first(bool content_stream);
  181 +
  182 + /// @brief Parse the remainder of a composite object (array/dict/reference).
  183 + /// @param content_stream True if parsing a content stream.
  184 + /// @return The completed object handle.
120 QPDFObjectHandle parse_remainder(bool content_stream); 185 QPDFObjectHandle parse_remainder(bool content_stream);
  186 +
  187 + /// @brief Add an object to the current container.
  188 + /// @param obj The object to add.
121 void add(std::shared_ptr<QPDFObject>&& obj); 189 void add(std::shared_ptr<QPDFObject>&& obj);
  190 +
  191 + /// @brief Add a null object to the current container.
122 void add_null(); 192 void add_null();
  193 +
  194 + /// @brief Add a null with a warning message.
  195 + /// @param msg Warning message describing the error.
123 void add_bad_null(std::string const& msg); 196 void add_bad_null(std::string const& msg);
  197 +
  198 + /// @brief Add a buffered integer from int_buffer_.
  199 + /// @param count Buffer index (1 or 2) to read from.
124 void add_int(int count); 200 void add_int(int count);
  201 +
  202 + /// @brief Create and add a scalar object to the current container.
  203 + /// @tparam T The scalar object type (e.g., QPDF_Integer, QPDF_String).
  204 + /// @tparam Args Constructor argument types.
  205 + /// @param args Arguments to forward to the object constructor.
125 template <typename T, typename... Args> 206 template <typename T, typename... Args>
126 void add_scalar(Args&&... args); 207 void add_scalar(Args&&... args);
  208 +
  209 + /// @brief Check if too many bad tokens have been encountered and throw if so.
127 void check_too_many_bad_tokens(); 210 void check_too_many_bad_tokens();
  211 +
  212 + /// @brief Issue a warning about a duplicate dictionary key.
128 void warn_duplicate_key(); 213 void warn_duplicate_key();
  214 +
  215 + /// @brief Fix dictionaries with missing keys by generating fake keys.
129 void fix_missing_keys(); 216 void fix_missing_keys();
  217 +
  218 + /// @brief Report a limits error and throw.
  219 + /// @param limit The limit identifier.
  220 + /// @param msg Error message.
130 [[noreturn]] void limits_error(std::string const& limit, std::string const& msg); 221 [[noreturn]] void limits_error(std::string const& limit, std::string const& msg);
  222 +
  223 + /// @brief Issue a warning at a specific offset.
  224 + /// @param offset File offset for the warning.
  225 + /// @param msg Warning message.
131 void warn(qpdf_offset_t offset, std::string const& msg) const; 226 void warn(qpdf_offset_t offset, std::string const& msg) const;
  227 +
  228 + /// @brief Issue a warning at the current offset.
  229 + /// @param msg Warning message.
132 void warn(std::string const& msg) const; 230 void warn(std::string const& msg) const;
133 - void warn(QPDFExc const&) const; 231 +
  232 + /// @brief Issue a warning from a QPDFExc exception.
  233 + /// @param e The exception to report.
  234 + void warn(QPDFExc const& e) const;
  235 +
  236 + /// @brief Create a scalar object with description and parsed offset.
  237 + /// @tparam T The scalar object type.
  238 + /// @tparam Args Constructor argument types.
  239 + /// @param args Arguments to forward to the object constructor.
  240 + /// @return Object handle with description and offset set.
  241 + /// @note The offset includes any leading whitespace.
134 template <typename T, typename... Args> 242 template <typename T, typename... Args>
135 - // Create a new scalar object complete with parsed offset and description.  
136 - // NB the offset includes any leading whitespace.  
137 QPDFObjectHandle with_description(Args&&... args); 243 QPDFObjectHandle with_description(Args&&... args);
  244 +
  245 + /// @brief Set the description and offset on an existing object.
  246 + /// @param obj The object to update.
  247 + /// @param parsed_offset The file offset where the object was parsed.
138 void set_description(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset); 248 void set_description(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset);
139 - InputSource& input_;  
140 - std::string const& object_description_;  
141 - qpdf::Tokenizer& tokenizer_;  
142 - QPDFObjectHandle::StringDecrypter* decrypter_;  
143 - QPDF* context_;  
144 - std::shared_ptr<QPDFObject::Description> description_;  
145 - bool parse_pdf_{false};  
146 - int stream_id_{0};  
147 - int obj_id_{0};  
148 - bool sanity_checks_{false};  
149 -  
150 - std::vector<StackFrame> stack_;  
151 - StackFrame* frame_{nullptr};  
152 - // Number of recent bad tokens. This will always be > 0 once a bad token has been  
153 - // encountered as it only gets incremented or reset when a bad token is encountered. 249 +
  250 + // Core parsing state
  251 + InputSource& input_; ///< Input source to read from
  252 + std::string const& object_description_; ///< Description for error messages
  253 + qpdf::Tokenizer& tokenizer_; ///< Tokenizer for lexical analysis
  254 + QPDFObjectHandle::StringDecrypter* decrypter_; ///< Decrypter for encrypted strings
  255 + QPDF* context_; ///< QPDF context for object resolution
  256 + std::shared_ptr<QPDFObject::Description> description_; ///< Shared description for objects
  257 + bool parse_pdf_{false}; ///< True if parsing PDF objects vs content streams
  258 + int stream_id_{0}; ///< Object stream ID (for object stream parsing)
  259 + int obj_id_{0}; ///< Object ID within object stream
  260 + bool sanity_checks_{false}; ///< Enable additional validation checks
  261 +
  262 + // Composite object parsing state
  263 + std::vector<StackFrame> stack_; ///< Stack of nested containers
  264 + StackFrame* frame_{nullptr}; ///< Current stack frame pointer
  265 +
  266 + // Error tracking state
  267 + /// Number of recent bad tokens. Always > 0 after first bad token encountered.
154 int bad_count_{0}; 268 int bad_count_{0};
155 - // Number of bad tokens (remaining) before giving up. 269 + /// Number of bad tokens remaining before giving up.
156 uint32_t max_bad_count_{Limits::parser_max_errors()}; 270 uint32_t max_bad_count_{Limits::parser_max_errors()};
157 - // Number of good tokens since last bad token. Irrelevant if bad_count == 0. 271 + /// Number of good tokens since last bad token. Irrelevant if bad_count == 0.
158 int good_count_{0}; 272 int good_count_{0};
159 - // Start offset including any leading whitespace. 273 +
  274 + // Token buffering state
  275 + /// Start offset of current object, including any leading whitespace.
160 qpdf_offset_t start_{0}; 276 qpdf_offset_t start_{0};
161 - // Number of successive integer tokens. 277 + /// Number of successive integer tokens (for indirect reference detection).
162 int int_count_{0}; 278 int int_count_{0};
  279 + /// Buffer for up to 2 integer tokens.
163 long long int_buffer_[2]{0, 0}; 280 long long int_buffer_[2]{0, 0};
  281 + /// Offsets corresponding to buffered integers.
164 qpdf_offset_t last_offset_buffer_[2]{0, 0}; 282 qpdf_offset_t last_offset_buffer_[2]{0, 0};
  283 +
  284 + /// True if object was empty (endobj without content).
165 bool empty_{false}; 285 bool empty_{false};
166 }; 286 };
167 } // namespace qpdf::impl 287 } // namespace qpdf::impl