Commit 605b1429e8b58d7fada225acaf530cfe8e9954ac

Authored by m-holger
1 parent 0328d872

In QPDFParser::parse create dictionaries on the fly

Also, don't search for /Contents name unless the result is used.
libqpdf/QPDFParser.cc
... ... @@ -74,7 +74,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
74 74 stack.clear();
75 75 stack.emplace_back(
76 76 input,
77   - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary);
  77 + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
78 78 frame = &stack.back();
79 79 return parseRemainder(content_stream);
80 80  
... ... @@ -242,60 +242,44 @@ QPDFParser::parseRemainder(bool content_stream)
242 242 continue;
243 243  
244 244 case QPDFTokenizer::tt_dict_close:
245   - if (frame->state == st_dictionary) {
246   - // Convert list to map. Alternating elements are keys. Attempt to recover more or
247   - // less gracefully from invalid dictionaries.
248   - std::set<std::string> names;
249   - for (auto& obj: frame->olist) {
250   - if (obj) {
  245 + if (frame->state <= st_dictionary_value) {
  246 + // Attempt to recover more or less gracefully from invalid dictionaries.
  247 +
  248 + auto& dict = frame->dict;
  249 + if (frame->state == st_dictionary_value) {
  250 + QTC::TC("qpdf", "QPDFParser no val for last key");
  251 + warn(
  252 + frame->offset,
  253 + "dictionary ended prematurely; using null as value for last key");
  254 + dict[frame->key] = QPDF_Null::create();
  255 + }
  256 +
  257 + if (!frame->olist.empty()) {
  258 + std::set<std::string> names;
  259 + for (auto& obj: frame->olist) {
251 260 if (obj->getTypeCode() == ::ot_name) {
252 261 names.insert(obj->getStringValue());
253 262 }
254 263 }
255   - }
256   -
257   - std::map<std::string, QPDFObjectHandle> dict;
258   - int next_fake_key = 1;
259   - for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {
260   - // Calculate key.
261   - std::string key;
262   - if (*iter && (*iter)->getTypeCode() == ::ot_name) {
263   - key = (*iter)->getStringValue();
264   - ++iter;
265   - } else {
266   - for (bool found_fake = false; !found_fake;) {
267   - key = "/QPDFFake" + std::to_string(next_fake_key++);
268   - found_fake = (names.count(key) == 0);
  264 + int next_fake_key = 1;
  265 + for (auto const& item: frame->olist) {
  266 + while (true) {
  267 + const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
  268 + const bool found_fake = (dict.count(key) == 0 && names.count(key) == 0);
269 269 QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
  270 + if (found_fake) {
  271 + warn(
  272 + frame->offset,
  273 + "expected dictionary key but found non-name object; inserting "
  274 + "key " +
  275 + key);
  276 + dict[key] = item;
  277 + break;
  278 + }
270 279 }
271   - warn(
272   - frame->offset,
273   - "expected dictionary key but found non-name object; inserting key " +
274   - key);
275   - }
276   - if (dict.count(key) > 0) {
277   - QTC::TC("qpdf", "QPDFParser duplicate dict key");
278   - warn(
279   - frame->offset,
280   - "dictionary has duplicated key " + key +
281   - "; last occurrence overrides earlier ones");
282   - }
283   -
284   - // Calculate value.
285   - ObjectPtr val;
286   - if (iter != frame->olist.end()) {
287   - val = *iter;
288   - ++iter;
289   - } else {
290   - QTC::TC("qpdf", "QPDFParser no val for last key");
291   - warn(
292   - frame->offset,
293   - "dictionary ended prematurely; using null as value for last key");
294   - val = QPDF_Null::create();
295 280 }
296   -
297   - dict[std::move(key)] = val;
298 281 }
  282 +
299 283 if (!frame->contents_string.empty() && dict.count("/Type") &&
300 284 dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
301 285 dict.count("/Contents") && dict["/Contents"].isString()) {
... ... @@ -335,7 +319,7 @@ QPDFParser::parseRemainder(bool content_stream)
335 319 stack.emplace_back(
336 320 input,
337 321 (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
338   - : st_dictionary);
  322 + : st_dictionary_key);
339 323 frame = &stack.back();
340 324 continue;
341 325 }
... ... @@ -364,15 +348,13 @@ QPDFParser::parseRemainder(bool content_stream)
364 348 continue;
365 349  
366 350 case QPDFTokenizer::tt_name:
367   - {
368   - auto const& name = tokenizer.getValue();
369   - addScalar<QPDF_Name>(name);
370   -
371   - if (name == "/Contents") {
372   - b_contents = true;
373   - } else {
374   - b_contents = false;
375   - }
  351 + if (frame->state == st_dictionary_key) {
  352 + frame->key = tokenizer.getValue();
  353 + frame->state = st_dictionary_value;
  354 + b_contents = decrypter && frame->key == "/Contents";
  355 + continue;
  356 + } else {
  357 + addScalar<QPDF_Name>(tokenizer.getValue());
376 358 }
377 359 continue;
378 360  
... ... @@ -415,13 +397,21 @@ QPDFParser::parseRemainder(bool content_stream)
415 397 addNull();
416 398 }
417 399 }
418   - return {}; // unreachable
419 400 }
420 401  
421 402 void
422 403 QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
423 404 {
424   - frame->olist.emplace_back(std::move(obj));
  405 + if (frame->state != st_dictionary_value) {
  406 + // If state is st_dictionary_key then there is a missing key. Push onto olist for
  407 + // processing once the tt_dict_close token has been found.
  408 + frame->olist.emplace_back(std::move(obj));
  409 + } else {
  410 + if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
  411 + warnDuplicateKey();
  412 + }
  413 + frame->state = st_dictionary_key;
  414 + }
425 415 }
426 416  
427 417 void
... ... @@ -429,7 +419,16 @@ QPDFParser::addNull()
429 419 {
430 420 const static ObjectPtr null_obj = QPDF_Null::create();
431 421  
432   - frame->olist.emplace_back(null_obj);
  422 + if (frame->state != st_dictionary_value) {
  423 + // If state is st_dictionary_key then there is a missing key. Push onto olist for
  424 + // processing once the tt_dict_close token has been found.
  425 + frame->olist.emplace_back(null_obj);
  426 + } else {
  427 + if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
  428 + warnDuplicateKey();
  429 + }
  430 + frame->state = st_dictionary_key;
  431 + }
433 432 ++frame->null_count;
434 433 }
435 434  
... ... @@ -496,6 +495,15 @@ QPDFParser::warn(QPDFExc const&amp; e) const
496 495 }
497 496  
498 497 void
  498 +QPDFParser::warnDuplicateKey()
  499 +{
  500 + QTC::TC("qpdf", "QPDFParser duplicate dict key");
  501 + warn(
  502 + frame->offset,
  503 + "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
  504 +}
  505 +
  506 +void
499 507 QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
500 508 {
501 509 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg));
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -31,8 +31,9 @@ class QPDFParser
31 31 QPDFObjectHandle parse(bool& empty, bool content_stream);
32 32  
33 33 private:
34   - struct StackFrame;
35   - enum parser_state_e { st_dictionary, st_array };
  34 + // Parser state. Note:
  35 + // state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
  36 + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
36 37  
37 38 struct StackFrame
38 39 {
... ... @@ -43,7 +44,9 @@ class QPDFParser
43 44 }
44 45  
45 46 std::vector<std::shared_ptr<QPDFObject>> olist;
  47 + std::map<std::string, QPDFObjectHandle> dict;
46 48 parser_state_e state;
  49 + std::string key;
47 50 qpdf_offset_t offset;
48 51 std::string contents_string;
49 52 qpdf_offset_t contents_offset{-1};
... ... @@ -57,6 +60,7 @@ class QPDFParser
57 60 template <typename T, typename... Args>
58 61 void addScalar(Args&&... args);
59 62 bool tooManyBadTokens();
  63 + void warnDuplicateKey();
60 64 void warn(qpdf_offset_t offset, std::string const& msg) const;
61 65 void warn(std::string const& msg) const;
62 66 void warn(QPDFExc const&) const;
... ... @@ -83,7 +87,6 @@ class QPDFParser
83 87 int int_count = 0;
84 88 long long int_buffer[2]{0, 0};
85 89 qpdf_offset_t last_offset_buffer[2]{0, 0};
86   -
87 90 };
88 91  
89 92 #endif // QPDFPARSER_HH
... ...
qpdf/qtest/qpdf/bad36-recover.out
1 1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
2   -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
3 2 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
  3 +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
4 4 /QTest is implicit
5 5 /QTest is direct and has type null (2)
6 6 /QTest is null
... ...
qpdf/qtest/qpdf/bad36.out
1 1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
2   -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
3 2 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
  3 +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
4 4 /QTest is implicit
5 5 /QTest is direct and has type null (2)
6 6 /QTest is null
... ...
qpdf/qtest/qpdf/issue-335a.out
... ... @@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
51 51 WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
52 52 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
53 53 WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
  54 +WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L
54 55 WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
55 56 WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
56 57 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
... ... @@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
74 75 WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
75 76 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
76 77 WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
  78 +WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L
77 79 WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
78 80 WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
79 81 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
... ... @@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
97 99 WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
98 100 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
99 101 WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
  102 +WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L
100 103 WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
101 104 WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
102 105 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
... ... @@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected )
448 451 WARNING: issue-335a.pdf (trailer, offset 1328): unexpected )
449 452 WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2
450 453 WARNING: issue-335a.pdf (trailer, offset 1332): unexpected )
  454 +WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L
451 455 WARNING: issue-335a.pdf (trailer, offset 1333): unexpected )
452 456 WARNING: issue-335a.pdf (trailer, offset 1344): unexpected )
453 457 WARNING: issue-335a.pdf (trailer, offset 1428): unexpected )
... ...