Commit db6ab9cbfabe9be32b7386ac92dbc2a3fabd83e5
1 parent
d904eab8
In QPDFParser::parse merge state and object stacks
Showing
2 changed files
with
59 additions
and
67 deletions
libqpdf/QPDFParser.cc
| ... | ... | @@ -21,22 +21,6 @@ |
| 21 | 21 | |
| 22 | 22 | #include <memory> |
| 23 | 23 | |
| 24 | -namespace | |
| 25 | -{ | |
| 26 | - struct StackFrame | |
| 27 | - { | |
| 28 | - StackFrame(std::shared_ptr<InputSource> input) : | |
| 29 | - offset(input->tell()) | |
| 30 | - { | |
| 31 | - } | |
| 32 | - | |
| 33 | - std::vector<std::shared_ptr<QPDFObject>> olist; | |
| 34 | - qpdf_offset_t offset; | |
| 35 | - std::string contents_string{""}; | |
| 36 | - qpdf_offset_t contents_offset{-1}; | |
| 37 | - int null_count{0}; | |
| 38 | - }; | |
| 39 | -} // namespace | |
| 40 | 24 | |
| 41 | 25 | QPDFObjectHandle |
| 42 | 26 | QPDFParser::parse(bool& empty, bool content_stream) |
| ... | ... | @@ -54,23 +38,15 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 54 | 38 | std::shared_ptr<QPDFObject> object; |
| 55 | 39 | bool set_offset = false; |
| 56 | 40 | |
| 57 | - std::vector<StackFrame> stack; | |
| 58 | - stack.emplace_back(input); | |
| 59 | - std::vector<parser_state_e> state_stack; | |
| 60 | - state_stack.push_back(st_top); | |
| 61 | - qpdf_offset_t offset; | |
| 41 | + std::vector<StackFrame> stack{{input, st_top}}; | |
| 62 | 42 | bool done = false; |
| 63 | 43 | bool b_contents = false; |
| 64 | 44 | bool is_null = false; |
| 45 | + auto* frame = &stack.back(); | |
| 65 | 46 | |
| 66 | 47 | while (!done) { |
| 67 | 48 | bool indirect_ref = false; |
| 68 | 49 | is_null = false; |
| 69 | - auto& frame = stack.back(); | |
| 70 | - auto& olist = frame.olist; | |
| 71 | - parser_state_e state = state_stack.back(); | |
| 72 | - offset = frame.offset; | |
| 73 | - | |
| 74 | 50 | object = nullptr; |
| 75 | 51 | set_offset = false; |
| 76 | 52 | |
| ... | ... | @@ -81,7 +57,7 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 81 | 57 | |
| 82 | 58 | switch (tokenizer.getType()) { |
| 83 | 59 | case QPDFTokenizer::tt_eof: |
| 84 | - if (state_stack.size() > 1) { | |
| 60 | + if (stack.size() > 1) { | |
| 85 | 61 | warn("parse error while reading object"); |
| 86 | 62 | } |
| 87 | 63 | if (content_stream) { |
| ... | ... | @@ -111,21 +87,20 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 111 | 87 | break; |
| 112 | 88 | |
| 113 | 89 | case QPDFTokenizer::tt_array_close: |
| 114 | - if (state == st_array) { | |
| 115 | - if ((state_stack.size() < 2) || (stack.size() < 2)) { | |
| 90 | + if (frame->state == st_array) { | |
| 91 | + if (stack.size() < 2) { | |
| 116 | 92 | throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " |
| 117 | 93 | "insufficient elements in stack"); |
| 118 | 94 | } |
| 119 | - object = QPDF_Array::create(std::move(olist), frame.null_count > 100); | |
| 120 | - setDescription(object, offset - 1); | |
| 95 | + object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100); | |
| 96 | + setDescription(object, frame->offset - 1); | |
| 121 | 97 | // The `offset` points to the next of "[". Set the rewind offset to point to the |
| 122 | 98 | // beginning of "[". This has been explicitly tested with whitespace surrounding the |
| 123 | 99 | // array start delimiter. getLastOffset points to the array end token and therefore |
| 124 | 100 | // can't be used here. |
| 125 | 101 | set_offset = true; |
| 126 | - state_stack.pop_back(); | |
| 127 | - state = state_stack.back(); | |
| 128 | 102 | stack.pop_back(); |
| 103 | + frame = &stack.back(); | |
| 129 | 104 | } else { |
| 130 | 105 | QTC::TC("qpdf", "QPDFParser bad array close"); |
| 131 | 106 | warn("treating unexpected array close token as null"); |
| ... | ... | @@ -137,8 +112,8 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 137 | 112 | break; |
| 138 | 113 | |
| 139 | 114 | case QPDFTokenizer::tt_dict_close: |
| 140 | - if (state == st_dictionary) { | |
| 141 | - if ((state_stack.size() < 2) || (stack.size() < 2)) { | |
| 115 | + if (frame->state == st_dictionary) { | |
| 116 | + if (stack.size() < 2) { | |
| 142 | 117 | throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " |
| 143 | 118 | "insufficient elements in stack"); |
| 144 | 119 | } |
| ... | ... | @@ -146,7 +121,7 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 146 | 121 | // Convert list to map. Alternating elements are keys. Attempt to recover more or |
| 147 | 122 | // less gracefully from invalid dictionaries. |
| 148 | 123 | std::set<std::string> names; |
| 149 | - for (auto& obj: olist) { | |
| 124 | + for (auto& obj: frame->olist) { | |
| 150 | 125 | if (obj) { |
| 151 | 126 | if (obj->getTypeCode() == ::ot_name) { |
| 152 | 127 | names.insert(obj->getStringValue()); |
| ... | ... | @@ -156,7 +131,7 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 156 | 131 | |
| 157 | 132 | std::map<std::string, QPDFObjectHandle> dict; |
| 158 | 133 | int next_fake_key = 1; |
| 159 | - for (auto iter = olist.begin(); iter != olist.end();) { | |
| 134 | + for (auto iter = frame->olist.begin(); iter != frame->olist.end();) { | |
| 160 | 135 | // Calculate key. |
| 161 | 136 | std::string key; |
| 162 | 137 | if (*iter && (*iter)->getTypeCode() == ::ot_name) { |
| ... | ... | @@ -169,49 +144,48 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 169 | 144 | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); |
| 170 | 145 | } |
| 171 | 146 | warn( |
| 172 | - offset, | |
| 147 | + frame->offset, | |
| 173 | 148 | "expected dictionary key but found non-name object; inserting key " + |
| 174 | 149 | key); |
| 175 | 150 | } |
| 176 | 151 | if (dict.count(key) > 0) { |
| 177 | 152 | QTC::TC("qpdf", "QPDFParser duplicate dict key"); |
| 178 | 153 | warn( |
| 179 | - offset, | |
| 154 | + frame->offset, | |
| 180 | 155 | "dictionary has duplicated key " + key + |
| 181 | 156 | "; last occurrence overrides earlier ones"); |
| 182 | 157 | } |
| 183 | 158 | |
| 184 | 159 | // Calculate value. |
| 185 | 160 | std::shared_ptr<QPDFObject> val; |
| 186 | - if (iter != olist.end()) { | |
| 161 | + if (iter != frame->olist.end()) { | |
| 187 | 162 | val = *iter; |
| 188 | 163 | ++iter; |
| 189 | 164 | } else { |
| 190 | 165 | QTC::TC("qpdf", "QPDFParser no val for last key"); |
| 191 | 166 | warn( |
| 192 | - offset, | |
| 167 | + frame->offset, | |
| 193 | 168 | "dictionary ended prematurely; using null as value for last key"); |
| 194 | 169 | val = QPDF_Null::create(); |
| 195 | 170 | } |
| 196 | 171 | |
| 197 | 172 | dict[std::move(key)] = std::move(val); |
| 198 | 173 | } |
| 199 | - if (!frame.contents_string.empty() && dict.count("/Type") && | |
| 174 | + if (!frame->contents_string.empty() && dict.count("/Type") && | |
| 200 | 175 | dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && |
| 201 | 176 | dict.count("/Contents") && dict["/Contents"].isString()) { |
| 202 | - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string); | |
| 203 | - dict["/Contents"].setParsedOffset(frame.contents_offset); | |
| 177 | + dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); | |
| 178 | + dict["/Contents"].setParsedOffset(frame->contents_offset); | |
| 204 | 179 | } |
| 205 | 180 | object = QPDF_Dictionary::create(std::move(dict)); |
| 206 | - setDescription(object, offset - 2); | |
| 181 | + setDescription(object, frame->offset - 2); | |
| 207 | 182 | // The `offset` points to the next of "<<". Set the rewind offset to point to the |
| 208 | 183 | // beginning of "<<". This has been explicitly tested with whitespace surrounding |
| 209 | 184 | // the dictionary start delimiter. getLastOffset points to the dictionary end token |
| 210 | 185 | // and therefore can't be used here. |
| 211 | 186 | set_offset = true; |
| 212 | - state_stack.pop_back(); | |
| 213 | - state = state_stack.back(); | |
| 214 | 187 | stack.pop_back(); |
| 188 | + frame = &stack.back(); | |
| 215 | 189 | } else { |
| 216 | 190 | QTC::TC("qpdf", "QPDFParser bad dictionary close"); |
| 217 | 191 | warn("unexpected dictionary close token"); |
| ... | ... | @@ -229,11 +203,12 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 229 | 203 | warn("ignoring excessively deeply nested data structure"); |
| 230 | 204 | return {QPDF_Null::create()}; |
| 231 | 205 | } else { |
| 232 | - state_stack.push_back( | |
| 206 | + b_contents = false; | |
| 207 | + stack.emplace_back( | |
| 208 | + input, | |
| 233 | 209 | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array |
| 234 | 210 | : st_dictionary); |
| 235 | - b_contents = false; | |
| 236 | - stack.emplace_back(input); | |
| 211 | + frame = &stack.back(); | |
| 237 | 212 | continue; |
| 238 | 213 | } |
| 239 | 214 | |
| ... | ... | @@ -243,7 +218,7 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 243 | 218 | |
| 244 | 219 | case QPDFTokenizer::tt_null: |
| 245 | 220 | is_null = true; |
| 246 | - ++frame.null_count; | |
| 221 | + ++frame->null_count; | |
| 247 | 222 | |
| 248 | 223 | break; |
| 249 | 224 | |
| ... | ... | @@ -271,23 +246,23 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 271 | 246 | case QPDFTokenizer::tt_word: |
| 272 | 247 | { |
| 273 | 248 | auto const& value = tokenizer.getValue(); |
| 274 | - auto size = olist.size(); | |
| 249 | + auto size = frame->olist.size(); | |
| 275 | 250 | if (content_stream) { |
| 276 | 251 | object = QPDF_Operator::create(value); |
| 277 | 252 | } else if ( |
| 278 | - value == "R" && state != st_top && size >= 2 && olist.back() && | |
| 279 | - olist.back()->getTypeCode() == ::ot_integer && | |
| 280 | - !olist.back()->getObjGen().isIndirect() && olist.at(size - 2) && | |
| 281 | - olist.at(size - 2)->getTypeCode() == ::ot_integer && | |
| 282 | - !olist.at(size - 2)->getObjGen().isIndirect()) { | |
| 253 | + value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() && | |
| 254 | + frame->olist.back()->getTypeCode() == ::ot_integer && | |
| 255 | + !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && | |
| 256 | + frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && | |
| 257 | + !frame->olist.at(size - 2)->getObjGen().isIndirect()) { | |
| 283 | 258 | if (context == nullptr) { |
| 284 | 259 | QTC::TC("qpdf", "QPDFParser indirect without context"); |
| 285 | 260 | throw std::logic_error("QPDFObjectHandle::parse called without context on " |
| 286 | 261 | "an object with indirect references"); |
| 287 | 262 | } |
| 288 | 263 | auto ref_og = QPDFObjGen( |
| 289 | - QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), | |
| 290 | - QPDFObjectHandle(olist.back()).getIntValueAsInt()); | |
| 264 | + QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(), | |
| 265 | + QPDFObjectHandle(frame->olist.back()).getIntValueAsInt()); | |
| 291 | 266 | if (ref_og.isIndirect()) { |
| 292 | 267 | // This action has the desirable side effect of causing dangling references |
| 293 | 268 | // (references to indirect objects that don't appear in the PDF) in any |
| ... | ... | @@ -298,9 +273,9 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 298 | 273 | QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); |
| 299 | 274 | is_null = true; |
| 300 | 275 | } |
| 301 | - olist.pop_back(); | |
| 302 | - olist.pop_back(); | |
| 303 | - } else if ((value == "endobj") && (state == st_top)) { | |
| 276 | + frame->olist.pop_back(); | |
| 277 | + frame->olist.pop_back(); | |
| 278 | + } else if ((value == "endobj") && (frame->state == st_top)) { | |
| 304 | 279 | // We just saw endobj without having read anything. Treat this as a null and do |
| 305 | 280 | // not move the input source's offset. |
| 306 | 281 | is_null = true; |
| ... | ... | @@ -322,8 +297,8 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 322 | 297 | auto const& val = tokenizer.getValue(); |
| 323 | 298 | if (decrypter) { |
| 324 | 299 | if (b_contents) { |
| 325 | - frame.contents_string = val; | |
| 326 | - frame.contents_offset = input->getLastOffset(); | |
| 300 | + frame->contents_string = val; | |
| 301 | + frame->contents_offset = input->getLastOffset(); | |
| 327 | 302 | b_contents = false; |
| 328 | 303 | } |
| 329 | 304 | std::string s{val}; |
| ... | ... | @@ -348,7 +323,7 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 348 | 323 | throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); |
| 349 | 324 | } |
| 350 | 325 | |
| 351 | - switch (state) { | |
| 326 | + switch (frame->state) { | |
| 352 | 327 | case st_dictionary: |
| 353 | 328 | case st_array: |
| 354 | 329 | if (is_null) { |
| ... | ... | @@ -358,7 +333,7 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 358 | 333 | setDescription(object, input->getLastOffset()); |
| 359 | 334 | } |
| 360 | 335 | set_offset = true; |
| 361 | - stack.back().olist.push_back(object); | |
| 336 | + frame->olist.push_back(object); | |
| 362 | 337 | break; |
| 363 | 338 | |
| 364 | 339 | case st_top: |
| ... | ... | @@ -371,7 +346,7 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 371 | 346 | object = QPDF_Null::create(); |
| 372 | 347 | } |
| 373 | 348 | if (!set_offset) { |
| 374 | - setDescription(object, offset); | |
| 349 | + setDescription(object, frame->offset); | |
| 375 | 350 | } |
| 376 | 351 | return object; |
| 377 | 352 | } | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -31,8 +31,25 @@ class QPDFParser |
| 31 | 31 | QPDFObjectHandle parse(bool& empty, bool content_stream); |
| 32 | 32 | |
| 33 | 33 | private: |
| 34 | + struct StackFrame; | |
| 34 | 35 | enum parser_state_e { st_top, st_dictionary, st_array }; |
| 35 | 36 | |
| 37 | + struct StackFrame | |
| 38 | + { | |
| 39 | + StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) : | |
| 40 | + state(state), | |
| 41 | + offset(input->tell()) | |
| 42 | + { | |
| 43 | + } | |
| 44 | + | |
| 45 | + std::vector<std::shared_ptr<QPDFObject>> olist; | |
| 46 | + parser_state_e state; | |
| 47 | + qpdf_offset_t offset; | |
| 48 | + std::string contents_string{""}; | |
| 49 | + qpdf_offset_t contents_offset{-1}; | |
| 50 | + int null_count{0}; | |
| 51 | + }; | |
| 52 | + | |
| 36 | 53 | bool tooManyBadTokens(); |
| 37 | 54 | void warn(qpdf_offset_t offset, std::string const& msg) const; |
| 38 | 55 | void warn(std::string const& msg) const; | ... | ... |