Commit db6ab9cbfabe9be32b7386ac92dbc2a3fabd83e5
1 parent
d904eab8
In QPDFParser::parse merge state and object stacks
Showing
2 changed files
with
59 additions
and
67 deletions
libqpdf/QPDFParser.cc
| @@ -21,22 +21,6 @@ | @@ -21,22 +21,6 @@ | ||
| 21 | 21 | ||
| 22 | #include <memory> | 22 | #include <memory> |
| 23 | 23 | ||
| 24 | -namespace | ||
| 25 | -{ | ||
| 26 | - struct StackFrame | ||
| 27 | - { | ||
| 28 | - StackFrame(std::shared_ptr<InputSource> input) : | ||
| 29 | - offset(input->tell()) | ||
| 30 | - { | ||
| 31 | - } | ||
| 32 | - | ||
| 33 | - std::vector<std::shared_ptr<QPDFObject>> olist; | ||
| 34 | - qpdf_offset_t offset; | ||
| 35 | - std::string contents_string{""}; | ||
| 36 | - qpdf_offset_t contents_offset{-1}; | ||
| 37 | - int null_count{0}; | ||
| 38 | - }; | ||
| 39 | -} // namespace | ||
| 40 | 24 | ||
| 41 | QPDFObjectHandle | 25 | QPDFObjectHandle |
| 42 | QPDFParser::parse(bool& empty, bool content_stream) | 26 | QPDFParser::parse(bool& empty, bool content_stream) |
| @@ -54,23 +38,15 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -54,23 +38,15 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 54 | std::shared_ptr<QPDFObject> object; | 38 | std::shared_ptr<QPDFObject> object; |
| 55 | bool set_offset = false; | 39 | bool set_offset = false; |
| 56 | 40 | ||
| 57 | - std::vector<StackFrame> stack; | ||
| 58 | - stack.emplace_back(input); | ||
| 59 | - std::vector<parser_state_e> state_stack; | ||
| 60 | - state_stack.push_back(st_top); | ||
| 61 | - qpdf_offset_t offset; | 41 | + std::vector<StackFrame> stack{{input, st_top}}; |
| 62 | bool done = false; | 42 | bool done = false; |
| 63 | bool b_contents = false; | 43 | bool b_contents = false; |
| 64 | bool is_null = false; | 44 | bool is_null = false; |
| 45 | + auto* frame = &stack.back(); | ||
| 65 | 46 | ||
| 66 | while (!done) { | 47 | while (!done) { |
| 67 | bool indirect_ref = false; | 48 | bool indirect_ref = false; |
| 68 | is_null = false; | 49 | is_null = false; |
| 69 | - auto& frame = stack.back(); | ||
| 70 | - auto& olist = frame.olist; | ||
| 71 | - parser_state_e state = state_stack.back(); | ||
| 72 | - offset = frame.offset; | ||
| 73 | - | ||
| 74 | object = nullptr; | 50 | object = nullptr; |
| 75 | set_offset = false; | 51 | set_offset = false; |
| 76 | 52 | ||
| @@ -81,7 +57,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -81,7 +57,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 81 | 57 | ||
| 82 | switch (tokenizer.getType()) { | 58 | switch (tokenizer.getType()) { |
| 83 | case QPDFTokenizer::tt_eof: | 59 | case QPDFTokenizer::tt_eof: |
| 84 | - if (state_stack.size() > 1) { | 60 | + if (stack.size() > 1) { |
| 85 | warn("parse error while reading object"); | 61 | warn("parse error while reading object"); |
| 86 | } | 62 | } |
| 87 | if (content_stream) { | 63 | if (content_stream) { |
| @@ -111,21 +87,20 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -111,21 +87,20 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 111 | break; | 87 | break; |
| 112 | 88 | ||
| 113 | case QPDFTokenizer::tt_array_close: | 89 | case QPDFTokenizer::tt_array_close: |
| 114 | - if (state == st_array) { | ||
| 115 | - if ((state_stack.size() < 2) || (stack.size() < 2)) { | 90 | + if (frame->state == st_array) { |
| 91 | + if (stack.size() < 2) { | ||
| 116 | throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | 92 | throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " |
| 117 | "insufficient elements in stack"); | 93 | "insufficient elements in stack"); |
| 118 | } | 94 | } |
| 119 | - object = QPDF_Array::create(std::move(olist), frame.null_count > 100); | ||
| 120 | - setDescription(object, offset - 1); | 95 | + object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100); |
| 96 | + setDescription(object, frame->offset - 1); | ||
| 121 | // The `offset` points to the next of "[". Set the rewind offset to point to the | 97 | // The `offset` points to the next of "[". Set the rewind offset to point to the |
| 122 | // beginning of "[". This has been explicitly tested with whitespace surrounding the | 98 | // beginning of "[". This has been explicitly tested with whitespace surrounding the |
| 123 | // array start delimiter. getLastOffset points to the array end token and therefore | 99 | // array start delimiter. getLastOffset points to the array end token and therefore |
| 124 | // can't be used here. | 100 | // can't be used here. |
| 125 | set_offset = true; | 101 | set_offset = true; |
| 126 | - state_stack.pop_back(); | ||
| 127 | - state = state_stack.back(); | ||
| 128 | stack.pop_back(); | 102 | stack.pop_back(); |
| 103 | + frame = &stack.back(); | ||
| 129 | } else { | 104 | } else { |
| 130 | QTC::TC("qpdf", "QPDFParser bad array close"); | 105 | QTC::TC("qpdf", "QPDFParser bad array close"); |
| 131 | warn("treating unexpected array close token as null"); | 106 | warn("treating unexpected array close token as null"); |
| @@ -137,8 +112,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -137,8 +112,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 137 | break; | 112 | break; |
| 138 | 113 | ||
| 139 | case QPDFTokenizer::tt_dict_close: | 114 | case QPDFTokenizer::tt_dict_close: |
| 140 | - if (state == st_dictionary) { | ||
| 141 | - if ((state_stack.size() < 2) || (stack.size() < 2)) { | 115 | + if (frame->state == st_dictionary) { |
| 116 | + if (stack.size() < 2) { | ||
| 142 | throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | 117 | throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " |
| 143 | "insufficient elements in stack"); | 118 | "insufficient elements in stack"); |
| 144 | } | 119 | } |
| @@ -146,7 +121,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -146,7 +121,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 146 | // Convert list to map. Alternating elements are keys. Attempt to recover more or | 121 | // Convert list to map. Alternating elements are keys. Attempt to recover more or |
| 147 | // less gracefully from invalid dictionaries. | 122 | // less gracefully from invalid dictionaries. |
| 148 | std::set<std::string> names; | 123 | std::set<std::string> names; |
| 149 | - for (auto& obj: olist) { | 124 | + for (auto& obj: frame->olist) { |
| 150 | if (obj) { | 125 | if (obj) { |
| 151 | if (obj->getTypeCode() == ::ot_name) { | 126 | if (obj->getTypeCode() == ::ot_name) { |
| 152 | names.insert(obj->getStringValue()); | 127 | names.insert(obj->getStringValue()); |
| @@ -156,7 +131,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -156,7 +131,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 156 | 131 | ||
| 157 | std::map<std::string, QPDFObjectHandle> dict; | 132 | std::map<std::string, QPDFObjectHandle> dict; |
| 158 | int next_fake_key = 1; | 133 | int next_fake_key = 1; |
| 159 | - for (auto iter = olist.begin(); iter != olist.end();) { | 134 | + for (auto iter = frame->olist.begin(); iter != frame->olist.end();) { |
| 160 | // Calculate key. | 135 | // Calculate key. |
| 161 | std::string key; | 136 | std::string key; |
| 162 | if (*iter && (*iter)->getTypeCode() == ::ot_name) { | 137 | if (*iter && (*iter)->getTypeCode() == ::ot_name) { |
| @@ -169,49 +144,48 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -169,49 +144,48 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 169 | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | 144 | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); |
| 170 | } | 145 | } |
| 171 | warn( | 146 | warn( |
| 172 | - offset, | 147 | + frame->offset, |
| 173 | "expected dictionary key but found non-name object; inserting key " + | 148 | "expected dictionary key but found non-name object; inserting key " + |
| 174 | key); | 149 | key); |
| 175 | } | 150 | } |
| 176 | if (dict.count(key) > 0) { | 151 | if (dict.count(key) > 0) { |
| 177 | QTC::TC("qpdf", "QPDFParser duplicate dict key"); | 152 | QTC::TC("qpdf", "QPDFParser duplicate dict key"); |
| 178 | warn( | 153 | warn( |
| 179 | - offset, | 154 | + frame->offset, |
| 180 | "dictionary has duplicated key " + key + | 155 | "dictionary has duplicated key " + key + |
| 181 | "; last occurrence overrides earlier ones"); | 156 | "; last occurrence overrides earlier ones"); |
| 182 | } | 157 | } |
| 183 | 158 | ||
| 184 | // Calculate value. | 159 | // Calculate value. |
| 185 | std::shared_ptr<QPDFObject> val; | 160 | std::shared_ptr<QPDFObject> val; |
| 186 | - if (iter != olist.end()) { | 161 | + if (iter != frame->olist.end()) { |
| 187 | val = *iter; | 162 | val = *iter; |
| 188 | ++iter; | 163 | ++iter; |
| 189 | } else { | 164 | } else { |
| 190 | QTC::TC("qpdf", "QPDFParser no val for last key"); | 165 | QTC::TC("qpdf", "QPDFParser no val for last key"); |
| 191 | warn( | 166 | warn( |
| 192 | - offset, | 167 | + frame->offset, |
| 193 | "dictionary ended prematurely; using null as value for last key"); | 168 | "dictionary ended prematurely; using null as value for last key"); |
| 194 | val = QPDF_Null::create(); | 169 | val = QPDF_Null::create(); |
| 195 | } | 170 | } |
| 196 | 171 | ||
| 197 | dict[std::move(key)] = std::move(val); | 172 | dict[std::move(key)] = std::move(val); |
| 198 | } | 173 | } |
| 199 | - if (!frame.contents_string.empty() && dict.count("/Type") && | 174 | + if (!frame->contents_string.empty() && dict.count("/Type") && |
| 200 | dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | 175 | dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && |
| 201 | dict.count("/Contents") && dict["/Contents"].isString()) { | 176 | dict.count("/Contents") && dict["/Contents"].isString()) { |
| 202 | - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string); | ||
| 203 | - dict["/Contents"].setParsedOffset(frame.contents_offset); | 177 | + dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string); |
| 178 | + dict["/Contents"].setParsedOffset(frame->contents_offset); | ||
| 204 | } | 179 | } |
| 205 | object = QPDF_Dictionary::create(std::move(dict)); | 180 | object = QPDF_Dictionary::create(std::move(dict)); |
| 206 | - setDescription(object, offset - 2); | 181 | + setDescription(object, frame->offset - 2); |
| 207 | // The `offset` points to the next of "<<". Set the rewind offset to point to the | 182 | // The `offset` points to the next of "<<". Set the rewind offset to point to the |
| 208 | // beginning of "<<". This has been explicitly tested with whitespace surrounding | 183 | // beginning of "<<". This has been explicitly tested with whitespace surrounding |
| 209 | // the dictionary start delimiter. getLastOffset points to the dictionary end token | 184 | // the dictionary start delimiter. getLastOffset points to the dictionary end token |
| 210 | // and therefore can't be used here. | 185 | // and therefore can't be used here. |
| 211 | set_offset = true; | 186 | set_offset = true; |
| 212 | - state_stack.pop_back(); | ||
| 213 | - state = state_stack.back(); | ||
| 214 | stack.pop_back(); | 187 | stack.pop_back(); |
| 188 | + frame = &stack.back(); | ||
| 215 | } else { | 189 | } else { |
| 216 | QTC::TC("qpdf", "QPDFParser bad dictionary close"); | 190 | QTC::TC("qpdf", "QPDFParser bad dictionary close"); |
| 217 | warn("unexpected dictionary close token"); | 191 | warn("unexpected dictionary close token"); |
| @@ -229,11 +203,12 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -229,11 +203,12 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 229 | warn("ignoring excessively deeply nested data structure"); | 203 | warn("ignoring excessively deeply nested data structure"); |
| 230 | return {QPDF_Null::create()}; | 204 | return {QPDF_Null::create()}; |
| 231 | } else { | 205 | } else { |
| 232 | - state_stack.push_back( | 206 | + b_contents = false; |
| 207 | + stack.emplace_back( | ||
| 208 | + input, | ||
| 233 | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array | 209 | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array |
| 234 | : st_dictionary); | 210 | : st_dictionary); |
| 235 | - b_contents = false; | ||
| 236 | - stack.emplace_back(input); | 211 | + frame = &stack.back(); |
| 237 | continue; | 212 | continue; |
| 238 | } | 213 | } |
| 239 | 214 | ||
| @@ -243,7 +218,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -243,7 +218,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 243 | 218 | ||
| 244 | case QPDFTokenizer::tt_null: | 219 | case QPDFTokenizer::tt_null: |
| 245 | is_null = true; | 220 | is_null = true; |
| 246 | - ++frame.null_count; | 221 | + ++frame->null_count; |
| 247 | 222 | ||
| 248 | break; | 223 | break; |
| 249 | 224 | ||
| @@ -271,23 +246,23 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -271,23 +246,23 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 271 | case QPDFTokenizer::tt_word: | 246 | case QPDFTokenizer::tt_word: |
| 272 | { | 247 | { |
| 273 | auto const& value = tokenizer.getValue(); | 248 | auto const& value = tokenizer.getValue(); |
| 274 | - auto size = olist.size(); | 249 | + auto size = frame->olist.size(); |
| 275 | if (content_stream) { | 250 | if (content_stream) { |
| 276 | object = QPDF_Operator::create(value); | 251 | object = QPDF_Operator::create(value); |
| 277 | } else if ( | 252 | } else if ( |
| 278 | - value == "R" && state != st_top && size >= 2 && olist.back() && | ||
| 279 | - olist.back()->getTypeCode() == ::ot_integer && | ||
| 280 | - !olist.back()->getObjGen().isIndirect() && olist.at(size - 2) && | ||
| 281 | - olist.at(size - 2)->getTypeCode() == ::ot_integer && | ||
| 282 | - !olist.at(size - 2)->getObjGen().isIndirect()) { | 253 | + value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() && |
| 254 | + frame->olist.back()->getTypeCode() == ::ot_integer && | ||
| 255 | + !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) && | ||
| 256 | + frame->olist.at(size - 2)->getTypeCode() == ::ot_integer && | ||
| 257 | + !frame->olist.at(size - 2)->getObjGen().isIndirect()) { | ||
| 283 | if (context == nullptr) { | 258 | if (context == nullptr) { |
| 284 | QTC::TC("qpdf", "QPDFParser indirect without context"); | 259 | QTC::TC("qpdf", "QPDFParser indirect without context"); |
| 285 | throw std::logic_error("QPDFObjectHandle::parse called without context on " | 260 | throw std::logic_error("QPDFObjectHandle::parse called without context on " |
| 286 | "an object with indirect references"); | 261 | "an object with indirect references"); |
| 287 | } | 262 | } |
| 288 | auto ref_og = QPDFObjGen( | 263 | auto ref_og = QPDFObjGen( |
| 289 | - QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), | ||
| 290 | - QPDFObjectHandle(olist.back()).getIntValueAsInt()); | 264 | + QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(), |
| 265 | + QPDFObjectHandle(frame->olist.back()).getIntValueAsInt()); | ||
| 291 | if (ref_og.isIndirect()) { | 266 | if (ref_og.isIndirect()) { |
| 292 | // This action has the desirable side effect of causing dangling references | 267 | // This action has the desirable side effect of causing dangling references |
| 293 | // (references to indirect objects that don't appear in the PDF) in any | 268 | // (references to indirect objects that don't appear in the PDF) in any |
| @@ -298,9 +273,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -298,9 +273,9 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 298 | QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); | 273 | QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); |
| 299 | is_null = true; | 274 | is_null = true; |
| 300 | } | 275 | } |
| 301 | - olist.pop_back(); | ||
| 302 | - olist.pop_back(); | ||
| 303 | - } else if ((value == "endobj") && (state == st_top)) { | 276 | + frame->olist.pop_back(); |
| 277 | + frame->olist.pop_back(); | ||
| 278 | + } else if ((value == "endobj") && (frame->state == st_top)) { | ||
| 304 | // We just saw endobj without having read anything. Treat this as a null and do | 279 | // We just saw endobj without having read anything. Treat this as a null and do |
| 305 | // not move the input source's offset. | 280 | // not move the input source's offset. |
| 306 | is_null = true; | 281 | is_null = true; |
| @@ -322,8 +297,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -322,8 +297,8 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 322 | auto const& val = tokenizer.getValue(); | 297 | auto const& val = tokenizer.getValue(); |
| 323 | if (decrypter) { | 298 | if (decrypter) { |
| 324 | if (b_contents) { | 299 | if (b_contents) { |
| 325 | - frame.contents_string = val; | ||
| 326 | - frame.contents_offset = input->getLastOffset(); | 300 | + frame->contents_string = val; |
| 301 | + frame->contents_offset = input->getLastOffset(); | ||
| 327 | b_contents = false; | 302 | b_contents = false; |
| 328 | } | 303 | } |
| 329 | std::string s{val}; | 304 | std::string s{val}; |
| @@ -348,7 +323,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -348,7 +323,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 348 | throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); | 323 | throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); |
| 349 | } | 324 | } |
| 350 | 325 | ||
| 351 | - switch (state) { | 326 | + switch (frame->state) { |
| 352 | case st_dictionary: | 327 | case st_dictionary: |
| 353 | case st_array: | 328 | case st_array: |
| 354 | if (is_null) { | 329 | if (is_null) { |
| @@ -358,7 +333,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -358,7 +333,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 358 | setDescription(object, input->getLastOffset()); | 333 | setDescription(object, input->getLastOffset()); |
| 359 | } | 334 | } |
| 360 | set_offset = true; | 335 | set_offset = true; |
| 361 | - stack.back().olist.push_back(object); | 336 | + frame->olist.push_back(object); |
| 362 | break; | 337 | break; |
| 363 | 338 | ||
| 364 | case st_top: | 339 | case st_top: |
| @@ -371,7 +346,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -371,7 +346,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 371 | object = QPDF_Null::create(); | 346 | object = QPDF_Null::create(); |
| 372 | } | 347 | } |
| 373 | if (!set_offset) { | 348 | if (!set_offset) { |
| 374 | - setDescription(object, offset); | 349 | + setDescription(object, frame->offset); |
| 375 | } | 350 | } |
| 376 | return object; | 351 | return object; |
| 377 | } | 352 | } |
libqpdf/qpdf/QPDFParser.hh
| @@ -31,8 +31,25 @@ class QPDFParser | @@ -31,8 +31,25 @@ class QPDFParser | ||
| 31 | QPDFObjectHandle parse(bool& empty, bool content_stream); | 31 | QPDFObjectHandle parse(bool& empty, bool content_stream); |
| 32 | 32 | ||
| 33 | private: | 33 | private: |
| 34 | + struct StackFrame; | ||
| 34 | enum parser_state_e { st_top, st_dictionary, st_array }; | 35 | enum parser_state_e { st_top, st_dictionary, st_array }; |
| 35 | 36 | ||
| 37 | + struct StackFrame | ||
| 38 | + { | ||
| 39 | + StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) : | ||
| 40 | + state(state), | ||
| 41 | + offset(input->tell()) | ||
| 42 | + { | ||
| 43 | + } | ||
| 44 | + | ||
| 45 | + std::vector<std::shared_ptr<QPDFObject>> olist; | ||
| 46 | + parser_state_e state; | ||
| 47 | + qpdf_offset_t offset; | ||
| 48 | + std::string contents_string{""}; | ||
| 49 | + qpdf_offset_t contents_offset{-1}; | ||
| 50 | + int null_count{0}; | ||
| 51 | + }; | ||
| 52 | + | ||
| 36 | bool tooManyBadTokens(); | 53 | bool tooManyBadTokens(); |
| 37 | void warn(qpdf_offset_t offset, std::string const& msg) const; | 54 | void warn(qpdf_offset_t offset, std::string const& msg) const; |
| 38 | void warn(std::string const& msg) const; | 55 | void warn(std::string const& msg) const; |