Commit 605b1429e8b58d7fada225acaf530cfe8e9954ac
1 parent
0328d872
In QPDFParser::parse create dictionaries on the fly
Also, don't search for /Contents name unless the result is used.
Showing
5 changed files
with
80 additions
and
65 deletions
libqpdf/QPDFParser.cc
| @@ -74,7 +74,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | @@ -74,7 +74,7 @@ QPDFParser::parse(bool& empty, bool content_stream) | ||
| 74 | stack.clear(); | 74 | stack.clear(); |
| 75 | stack.emplace_back( | 75 | stack.emplace_back( |
| 76 | input, | 76 | input, |
| 77 | - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary); | 77 | + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); |
| 78 | frame = &stack.back(); | 78 | frame = &stack.back(); |
| 79 | return parseRemainder(content_stream); | 79 | return parseRemainder(content_stream); |
| 80 | 80 | ||
| @@ -242,60 +242,44 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -242,60 +242,44 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 242 | continue; | 242 | continue; |
| 243 | 243 | ||
| 244 | case QPDFTokenizer::tt_dict_close: | 244 | case QPDFTokenizer::tt_dict_close: |
| 245 | - if (frame->state == st_dictionary) { | ||
| 246 | - // Convert list to map. Alternating elements are keys. Attempt to recover more or | ||
| 247 | - // less gracefully from invalid dictionaries. | ||
| 248 | - std::set<std::string> names; | ||
| 249 | - for (auto& obj: frame->olist) { | ||
| 250 | - if (obj) { | 245 | + if (frame->state <= st_dictionary_value) { |
| 246 | + // Attempt to recover more or less gracefully from invalid dictionaries. | ||
| 247 | + | ||
| 248 | + auto& dict = frame->dict; | ||
| 249 | + if (frame->state == st_dictionary_value) { | ||
| 250 | + QTC::TC("qpdf", "QPDFParser no val for last key"); | ||
| 251 | + warn( | ||
| 252 | + frame->offset, | ||
| 253 | + "dictionary ended prematurely; using null as value for last key"); | ||
| 254 | + dict[frame->key] = QPDF_Null::create(); | ||
| 255 | + } | ||
| 256 | + | ||
| 257 | + if (!frame->olist.empty()) { | ||
| 258 | + std::set<std::string> names; | ||
| 259 | + for (auto& obj: frame->olist) { | ||
| 251 | if (obj->getTypeCode() == ::ot_name) { | 260 | if (obj->getTypeCode() == ::ot_name) { |
| 252 | names.insert(obj->getStringValue()); | 261 | names.insert(obj->getStringValue()); |
| 253 | } | 262 | } |
| 254 | } | 263 | } |
| 255 | - } | ||
| 256 | - | ||
| 257 | - std::map<std::string, QPDFObjectHandle> dict; | ||
| 258 | - int next_fake_key = 1; | ||
| 259 | - for (auto iter = frame->olist.begin(); iter != frame->olist.end();) { | ||
| 260 | - // Calculate key. | ||
| 261 | - std::string key; | ||
| 262 | - if (*iter && (*iter)->getTypeCode() == ::ot_name) { | ||
| 263 | - key = (*iter)->getStringValue(); | ||
| 264 | - ++iter; | ||
| 265 | - } else { | ||
| 266 | - for (bool found_fake = false; !found_fake;) { | ||
| 267 | - key = "/QPDFFake" + std::to_string(next_fake_key++); | ||
| 268 | - found_fake = (names.count(key) == 0); | 264 | + int next_fake_key = 1; |
| 265 | + for (auto const& item: frame->olist) { | ||
| 266 | + while (true) { | ||
| 267 | + const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); | ||
| 268 | + const bool found_fake = (dict.count(key) == 0 && names.count(key) == 0); | ||
| 269 | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); | 269 | QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); |
| 270 | + if (found_fake) { | ||
| 271 | + warn( | ||
| 272 | + frame->offset, | ||
| 273 | + "expected dictionary key but found non-name object; inserting " | ||
| 274 | + "key " + | ||
| 275 | + key); | ||
| 276 | + dict[key] = item; | ||
| 277 | + break; | ||
| 278 | + } | ||
| 270 | } | 279 | } |
| 271 | - warn( | ||
| 272 | - frame->offset, | ||
| 273 | - "expected dictionary key but found non-name object; inserting key " + | ||
| 274 | - key); | ||
| 275 | - } | ||
| 276 | - if (dict.count(key) > 0) { | ||
| 277 | - QTC::TC("qpdf", "QPDFParser duplicate dict key"); | ||
| 278 | - warn( | ||
| 279 | - frame->offset, | ||
| 280 | - "dictionary has duplicated key " + key + | ||
| 281 | - "; last occurrence overrides earlier ones"); | ||
| 282 | - } | ||
| 283 | - | ||
| 284 | - // Calculate value. | ||
| 285 | - ObjectPtr val; | ||
| 286 | - if (iter != frame->olist.end()) { | ||
| 287 | - val = *iter; | ||
| 288 | - ++iter; | ||
| 289 | - } else { | ||
| 290 | - QTC::TC("qpdf", "QPDFParser no val for last key"); | ||
| 291 | - warn( | ||
| 292 | - frame->offset, | ||
| 293 | - "dictionary ended prematurely; using null as value for last key"); | ||
| 294 | - val = QPDF_Null::create(); | ||
| 295 | } | 280 | } |
| 296 | - | ||
| 297 | - dict[std::move(key)] = val; | ||
| 298 | } | 281 | } |
| 282 | + | ||
| 299 | if (!frame->contents_string.empty() && dict.count("/Type") && | 283 | if (!frame->contents_string.empty() && dict.count("/Type") && |
| 300 | dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && | 284 | dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && |
| 301 | dict.count("/Contents") && dict["/Contents"].isString()) { | 285 | dict.count("/Contents") && dict["/Contents"].isString()) { |
| @@ -335,7 +319,7 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -335,7 +319,7 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 335 | stack.emplace_back( | 319 | stack.emplace_back( |
| 336 | input, | 320 | input, |
| 337 | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array | 321 | (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array |
| 338 | - : st_dictionary); | 322 | + : st_dictionary_key); |
| 339 | frame = &stack.back(); | 323 | frame = &stack.back(); |
| 340 | continue; | 324 | continue; |
| 341 | } | 325 | } |
| @@ -364,15 +348,13 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -364,15 +348,13 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 364 | continue; | 348 | continue; |
| 365 | 349 | ||
| 366 | case QPDFTokenizer::tt_name: | 350 | case QPDFTokenizer::tt_name: |
| 367 | - { | ||
| 368 | - auto const& name = tokenizer.getValue(); | ||
| 369 | - addScalar<QPDF_Name>(name); | ||
| 370 | - | ||
| 371 | - if (name == "/Contents") { | ||
| 372 | - b_contents = true; | ||
| 373 | - } else { | ||
| 374 | - b_contents = false; | ||
| 375 | - } | 351 | + if (frame->state == st_dictionary_key) { |
| 352 | + frame->key = tokenizer.getValue(); | ||
| 353 | + frame->state = st_dictionary_value; | ||
| 354 | + b_contents = decrypter && frame->key == "/Contents"; | ||
| 355 | + continue; | ||
| 356 | + } else { | ||
| 357 | + addScalar<QPDF_Name>(tokenizer.getValue()); | ||
| 376 | } | 358 | } |
| 377 | continue; | 359 | continue; |
| 378 | 360 | ||
| @@ -415,13 +397,21 @@ QPDFParser::parseRemainder(bool content_stream) | @@ -415,13 +397,21 @@ QPDFParser::parseRemainder(bool content_stream) | ||
| 415 | addNull(); | 397 | addNull(); |
| 416 | } | 398 | } |
| 417 | } | 399 | } |
| 418 | - return {}; // unreachable | ||
| 419 | } | 400 | } |
| 420 | 401 | ||
| 421 | void | 402 | void |
| 422 | QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) | 403 | QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) |
| 423 | { | 404 | { |
| 424 | - frame->olist.emplace_back(std::move(obj)); | 405 | + if (frame->state != st_dictionary_value) { |
| 406 | + // If state is st_dictionary_key then there is a missing key. Push onto olist for | ||
| 407 | + // processing once the tt_dict_close token has been found. | ||
| 408 | + frame->olist.emplace_back(std::move(obj)); | ||
| 409 | + } else { | ||
| 410 | + if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { | ||
| 411 | + warnDuplicateKey(); | ||
| 412 | + } | ||
| 413 | + frame->state = st_dictionary_key; | ||
| 414 | + } | ||
| 425 | } | 415 | } |
| 426 | 416 | ||
| 427 | void | 417 | void |
| @@ -429,7 +419,16 @@ QPDFParser::addNull() | @@ -429,7 +419,16 @@ QPDFParser::addNull() | ||
| 429 | { | 419 | { |
| 430 | const static ObjectPtr null_obj = QPDF_Null::create(); | 420 | const static ObjectPtr null_obj = QPDF_Null::create(); |
| 431 | 421 | ||
| 432 | - frame->olist.emplace_back(null_obj); | 422 | + if (frame->state != st_dictionary_value) { |
| 423 | + // If state is st_dictionary_key then there is a missing key. Push onto olist for | ||
| 424 | + // processing once the tt_dict_close token has been found. | ||
| 425 | + frame->olist.emplace_back(null_obj); | ||
| 426 | + } else { | ||
| 427 | + if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { | ||
| 428 | + warnDuplicateKey(); | ||
| 429 | + } | ||
| 430 | + frame->state = st_dictionary_key; | ||
| 431 | + } | ||
| 433 | ++frame->null_count; | 432 | ++frame->null_count; |
| 434 | } | 433 | } |
| 435 | 434 | ||
| @@ -496,6 +495,15 @@ QPDFParser::warn(QPDFExc const& e) const | @@ -496,6 +495,15 @@ QPDFParser::warn(QPDFExc const& e) const | ||
| 496 | } | 495 | } |
| 497 | 496 | ||
| 498 | void | 497 | void |
| 498 | +QPDFParser::warnDuplicateKey() | ||
| 499 | +{ | ||
| 500 | + QTC::TC("qpdf", "QPDFParser duplicate dict key"); | ||
| 501 | + warn( | ||
| 502 | + frame->offset, | ||
| 503 | + "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); | ||
| 504 | +} | ||
| 505 | + | ||
| 506 | +void | ||
| 499 | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const | 507 | QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const |
| 500 | { | 508 | { |
| 501 | warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg)); | 509 | warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg)); |
libqpdf/qpdf/QPDFParser.hh
| @@ -31,8 +31,9 @@ class QPDFParser | @@ -31,8 +31,9 @@ class QPDFParser | ||
| 31 | QPDFObjectHandle parse(bool& empty, bool content_stream); | 31 | QPDFObjectHandle parse(bool& empty, bool content_stream); |
| 32 | 32 | ||
| 33 | private: | 33 | private: |
| 34 | - struct StackFrame; | ||
| 35 | - enum parser_state_e { st_dictionary, st_array }; | 34 | + // Parser state. Note: |
| 35 | + // state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) | ||
| 36 | + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; | ||
| 36 | 37 | ||
| 37 | struct StackFrame | 38 | struct StackFrame |
| 38 | { | 39 | { |
| @@ -43,7 +44,9 @@ class QPDFParser | @@ -43,7 +44,9 @@ class QPDFParser | ||
| 43 | } | 44 | } |
| 44 | 45 | ||
| 45 | std::vector<std::shared_ptr<QPDFObject>> olist; | 46 | std::vector<std::shared_ptr<QPDFObject>> olist; |
| 47 | + std::map<std::string, QPDFObjectHandle> dict; | ||
| 46 | parser_state_e state; | 48 | parser_state_e state; |
| 49 | + std::string key; | ||
| 47 | qpdf_offset_t offset; | 50 | qpdf_offset_t offset; |
| 48 | std::string contents_string; | 51 | std::string contents_string; |
| 49 | qpdf_offset_t contents_offset{-1}; | 52 | qpdf_offset_t contents_offset{-1}; |
| @@ -57,6 +60,7 @@ class QPDFParser | @@ -57,6 +60,7 @@ class QPDFParser | ||
| 57 | template <typename T, typename... Args> | 60 | template <typename T, typename... Args> |
| 58 | void addScalar(Args&&... args); | 61 | void addScalar(Args&&... args); |
| 59 | bool tooManyBadTokens(); | 62 | bool tooManyBadTokens(); |
| 63 | + void warnDuplicateKey(); | ||
| 60 | void warn(qpdf_offset_t offset, std::string const& msg) const; | 64 | void warn(qpdf_offset_t offset, std::string const& msg) const; |
| 61 | void warn(std::string const& msg) const; | 65 | void warn(std::string const& msg) const; |
| 62 | void warn(QPDFExc const&) const; | 66 | void warn(QPDFExc const&) const; |
| @@ -83,7 +87,6 @@ class QPDFParser | @@ -83,7 +87,6 @@ class QPDFParser | ||
| 83 | int int_count = 0; | 87 | int int_count = 0; |
| 84 | long long int_buffer[2]{0, 0}; | 88 | long long int_buffer[2]{0, 0}; |
| 85 | qpdf_offset_t last_offset_buffer[2]{0, 0}; | 89 | qpdf_offset_t last_offset_buffer[2]{0, 0}; |
| 86 | - | ||
| 87 | }; | 90 | }; |
| 88 | 91 | ||
| 89 | #endif // QPDFPARSER_HH | 92 | #endif // QPDFPARSER_HH |
qpdf/qtest/qpdf/bad36-recover.out
| 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string | 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string |
| 2 | -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | ||
| 3 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key | 2 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key |
| 3 | +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | ||
| 4 | /QTest is implicit | 4 | /QTest is implicit |
| 5 | /QTest is direct and has type null (2) | 5 | /QTest is direct and has type null (2) |
| 6 | /QTest is null | 6 | /QTest is null |
qpdf/qtest/qpdf/bad36.out
| 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string | 1 | WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string |
| 2 | -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | ||
| 3 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key | 2 | WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key |
| 3 | +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 | ||
| 4 | /QTest is implicit | 4 | /QTest is implicit |
| 5 | /QTest is direct and has type null (2) | 5 | /QTest is direct and has type null (2) |
| 6 | /QTest is null | 6 | /QTest is null |
qpdf/qtest/qpdf/issue-335a.out
| @@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | @@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | ||
| 51 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) | 51 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) |
| 52 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 | 52 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 |
| 53 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) | 53 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) |
| 54 | +WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L | ||
| 54 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) | 55 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) |
| 55 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) | 56 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) |
| 56 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 | 57 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 |
| @@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | @@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | ||
| 74 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) | 75 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) |
| 75 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 | 76 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 |
| 76 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) | 77 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) |
| 78 | +WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L | ||
| 77 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) | 79 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) |
| 78 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) | 80 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) |
| 79 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 | 81 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 |
| @@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | @@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) | ||
| 97 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) | 99 | WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) |
| 98 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 | 100 | WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 |
| 99 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) | 101 | WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) |
| 102 | +WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L | ||
| 100 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) | 103 | WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) |
| 101 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) | 104 | WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) |
| 102 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 | 105 | WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 |
| @@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected ) | @@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected ) | ||
| 448 | WARNING: issue-335a.pdf (trailer, offset 1328): unexpected ) | 451 | WARNING: issue-335a.pdf (trailer, offset 1328): unexpected ) |
| 449 | WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2 | 452 | WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2 |
| 450 | WARNING: issue-335a.pdf (trailer, offset 1332): unexpected ) | 453 | WARNING: issue-335a.pdf (trailer, offset 1332): unexpected ) |
| 454 | +WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L | ||
| 451 | WARNING: issue-335a.pdf (trailer, offset 1333): unexpected ) | 455 | WARNING: issue-335a.pdf (trailer, offset 1333): unexpected ) |
| 452 | WARNING: issue-335a.pdf (trailer, offset 1344): unexpected ) | 456 | WARNING: issue-335a.pdf (trailer, offset 1344): unexpected ) |
| 453 | WARNING: issue-335a.pdf (trailer, offset 1428): unexpected ) | 457 | WARNING: issue-335a.pdf (trailer, offset 1428): unexpected ) |