Commit 90829228b814c6fe3ea3192da34db90dc1e36843
1 parent
12837f14
In QPDFParser::parse refactor handling of dict_close tokens
Showing
2 changed files
with
76 additions
and
85 deletions
libqpdf/QPDFParser.cc
| ... | ... | @@ -138,7 +138,80 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 138 | 138 | |
| 139 | 139 | case QPDFTokenizer::tt_dict_close: |
| 140 | 140 | if (state == st_dictionary) { |
| 141 | - state = st_stop; | |
| 141 | + if ((state_stack.size() < 2) || (stack.size() < 2)) { | |
| 142 | + throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | |
| 143 | + "insufficient elements in stack"); | |
| 144 | + } | |
| 145 | + | |
| 146 | + // Convert list to map. Alternating elements are keys. Attempt to recover more or | |
| 147 | + // less gracefully from invalid dictionaries. | |
| 148 | + std::set<std::string> names; | |
| 149 | + for (auto& obj: olist) { | |
| 150 | + if (obj) { | |
| 151 | + if (obj->getTypeCode() == ::ot_name) { | |
| 152 | + names.insert(obj->getStringValue()); | |
| 153 | + } | |
| 154 | + } | |
| 155 | + } | |
| 156 | + | |
| 157 | + std::map<std::string, QPDFObjectHandle> dict; | |
| 158 | + int next_fake_key = 1; | |
| 159 | + for (auto iter = olist.begin(); iter != olist.end();) { | |
| 160 | + // Calculate key. | |
| 161 | + std::string key; | |
| 162 | + if (*iter && (*iter)->getTypeCode() == ::ot_name) { | |
| 163 | + key = (*iter)->getStringValue(); | |
| 164 | + ++iter; | |
| 165 | + } else { | |
| 166 | + for (bool found_fake = false; !found_fake;) { | |
| 167 | + key = "/QPDFFake" + std::to_string(next_fake_key++); | |
| 168 | + found_fake = (names.count(key) == 0); | |
| 169 | + QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | |
| 170 | + } | |
| 171 | + warn( | |
| 172 | + offset, | |
| 173 | + "expected dictionary key but found non-name object; inserting key " + | |
| 174 | + key); | |
| 175 | + } | |
| 176 | + if (dict.count(key) > 0) { | |
| 177 | + QTC::TC("qpdf", "QPDFParser duplicate dict key"); | |
| 178 | + warn( | |
| 179 | + offset, | |
| 180 | + "dictionary has duplicated key " + key + | |
| 181 | + "; last occurrence overrides earlier ones"); | |
| 182 | + } | |
| 183 | + | |
| 184 | + // Calculate value. | |
| 185 | + std::shared_ptr<QPDFObject> val; | |
| 186 | + if (iter != olist.end()) { | |
| 187 | + val = *iter; | |
| 188 | + ++iter; | |
| 189 | + } else { | |
| 190 | + QTC::TC("qpdf", "QPDFParser no val for last key"); | |
| 191 | + warn( | |
| 192 | + offset, | |
| 193 | + "dictionary ended prematurely; using null as value for last key"); | |
| 194 | + val = QPDF_Null::create(); | |
| 195 | + } | |
| 196 | + | |
| 197 | + dict[std::move(key)] = std::move(val); | |
| 198 | + } | |
| 199 | + if (!frame.contents_string.empty() && dict.count("/Type") && | |
| 200 | + dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | |
| 201 | + dict.count("/Contents") && dict["/Contents"].isString()) { | |
| 202 | + dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string); | |
| 203 | + dict["/Contents"].setParsedOffset(frame.contents_offset); | |
| 204 | + } | |
| 205 | + object = QPDF_Dictionary::create(std::move(dict)); | |
| 206 | + setDescription(object, offset - 2); | |
| 207 | + // The `offset` points to the next of "<<". Set the rewind offset to point to the | |
| 208 | + // beginning of "<<". This has been explicitly tested with whitespace surrounding | |
| 209 | + // the dictionary start delimiter. getLastOffset points to the dictionary end token | |
| 210 | + // and therefore can't be used here. | |
| 211 | + set_offset = true; | |
| 212 | + state_stack.pop_back(); | |
| 213 | + state = state_stack.back(); | |
| 214 | + stack.pop_back(); | |
| 142 | 215 | } else { |
| 143 | 216 | QTC::TC("qpdf", "QPDFParser bad dictionary close"); |
| 144 | 217 | warn("unexpected dictionary close token"); |
| ... | ... | @@ -276,7 +349,7 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 276 | 349 | break; |
| 277 | 350 | } |
| 278 | 351 | |
| 279 | - if (object == nullptr && !is_null && (!(state == st_start || state == st_stop))) { | |
| 352 | + if (object == nullptr && !is_null && state != st_start) { | |
| 280 | 353 | throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); |
| 281 | 354 | } |
| 282 | 355 | |
| ... | ... | @@ -299,88 +372,6 @@ QPDFParser::parse(bool& empty, bool content_stream) |
| 299 | 372 | |
| 300 | 373 | case st_start: |
| 301 | 374 | break; |
| 302 | - | |
| 303 | - case st_stop: | |
| 304 | - if ((state_stack.size() < 2) || (stack.size() < 2)) { | |
| 305 | - throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " | |
| 306 | - "insufficient elements in stack"); | |
| 307 | - } | |
| 308 | - parser_state_e old_state = state_stack.back(); | |
| 309 | - state_stack.pop_back(); | |
| 310 | - if (old_state == st_dictionary) { | |
| 311 | - // Convert list to map. Alternating elements are keys. Attempt to recover more or | |
| 312 | - // less gracefully from invalid dictionaries. | |
| 313 | - std::set<std::string> names; | |
| 314 | - for (auto& obj: olist) { | |
| 315 | - if (obj) { | |
| 316 | - if (obj->getTypeCode() == ::ot_name) { | |
| 317 | - names.insert(obj->getStringValue()); | |
| 318 | - } | |
| 319 | - } | |
| 320 | - } | |
| 321 | - | |
| 322 | - std::map<std::string, QPDFObjectHandle> dict; | |
| 323 | - int next_fake_key = 1; | |
| 324 | - for (auto iter = olist.begin(); iter != olist.end();) { | |
| 325 | - // Calculate key. | |
| 326 | - std::string key; | |
| 327 | - if (*iter && (*iter)->getTypeCode() == ::ot_name) { | |
| 328 | - key = (*iter)->getStringValue(); | |
| 329 | - ++iter; | |
| 330 | - } else { | |
| 331 | - for (bool found_fake = false; !found_fake;) { | |
| 332 | - key = "/QPDFFake" + std::to_string(next_fake_key++); | |
| 333 | - found_fake = (names.count(key) == 0); | |
| 334 | - QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | |
| 335 | - } | |
| 336 | - warn( | |
| 337 | - offset, | |
| 338 | - "expected dictionary key but found non-name object; inserting key " + | |
| 339 | - key); | |
| 340 | - } | |
| 341 | - if (dict.count(key) > 0) { | |
| 342 | - QTC::TC("qpdf", "QPDFParser duplicate dict key"); | |
| 343 | - warn( | |
| 344 | - offset, | |
| 345 | - "dictionary has duplicated key " + key + | |
| 346 | - "; last occurrence overrides earlier ones"); | |
| 347 | - } | |
| 348 | - | |
| 349 | - // Calculate value. | |
| 350 | - std::shared_ptr<QPDFObject> val; | |
| 351 | - if (iter != olist.end()) { | |
| 352 | - val = *iter; | |
| 353 | - ++iter; | |
| 354 | - } else { | |
| 355 | - QTC::TC("qpdf", "QPDFParser no val for last key"); | |
| 356 | - warn( | |
| 357 | - offset, | |
| 358 | - "dictionary ended prematurely; using null as value for last key"); | |
| 359 | - val = QPDF_Null::create(); | |
| 360 | - } | |
| 361 | - | |
| 362 | - dict[std::move(key)] = std::move(val); | |
| 363 | - } | |
| 364 | - if (!frame.contents_string.empty() && dict.count("/Type") && | |
| 365 | - dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | |
| 366 | - dict.count("/Contents") && dict["/Contents"].isString()) { | |
| 367 | - dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string); | |
| 368 | - dict["/Contents"].setParsedOffset(frame.contents_offset); | |
| 369 | - } | |
| 370 | - object = QPDF_Dictionary::create(std::move(dict)); | |
| 371 | - setDescription(object, offset - 2); | |
| 372 | - // The `offset` points to the next of "<<". Set the rewind offset to point to the | |
| 373 | - // beginning of "<<". This has been explicitly tested with whitespace surrounding | |
| 374 | - // the dictionary start delimiter. getLastOffset points to the dictionary end token | |
| 375 | - // and therefore can't be used here. | |
| 376 | - set_offset = true; | |
| 377 | - } | |
| 378 | - stack.pop_back(); | |
| 379 | - if (state_stack.back() == st_top) { | |
| 380 | - done = true; | |
| 381 | - } else { | |
| 382 | - stack.back().olist.push_back(object); | |
| 383 | - } | |
| 384 | 375 | } |
| 385 | 376 | } |
| 386 | 377 | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -31,7 +31,7 @@ class QPDFParser |
| 31 | 31 | QPDFObjectHandle parse(bool& empty, bool content_stream); |
| 32 | 32 | |
| 33 | 33 | private: |
| 34 | - enum parser_state_e { st_top, st_start, st_stop, st_dictionary, st_array }; | |
| 34 | + enum parser_state_e { st_top, st_start, st_dictionary, st_array }; | |
| 35 | 35 | |
| 36 | 36 | bool tooManyBadTokens(); |
| 37 | 37 | void warn(qpdf_offset_t offset, std::string const& msg) const; | ... | ... |