Commit 605b1429e8b58d7fada225acaf530cfe8e9954ac

Authored by m-holger
1 parent 0328d872

In QPDFParser::parse create dictionaries on the fly

Also, don't search for /Contents name unless the result is used.
libqpdf/QPDFParser.cc
@@ -74,7 +74,7 @@ QPDFParser::parse(bool& empty, bool content_stream) @@ -74,7 +74,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
74 stack.clear(); 74 stack.clear();
75 stack.emplace_back( 75 stack.emplace_back(
76 input, 76 input,
77 - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary); 77 + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
78 frame = &stack.back(); 78 frame = &stack.back();
79 return parseRemainder(content_stream); 79 return parseRemainder(content_stream);
80 80
@@ -242,60 +242,44 @@ QPDFParser::parseRemainder(bool content_stream) @@ -242,60 +242,44 @@ QPDFParser::parseRemainder(bool content_stream)
242 continue; 242 continue;
243 243
244 case QPDFTokenizer::tt_dict_close: 244 case QPDFTokenizer::tt_dict_close:
245 - if (frame->state == st_dictionary) {  
246 - // Convert list to map. Alternating elements are keys. Attempt to recover more or  
247 - // less gracefully from invalid dictionaries.  
248 - std::set<std::string> names;  
249 - for (auto& obj: frame->olist) {  
250 - if (obj) { 245 + if (frame->state <= st_dictionary_value) {
  246 + // Attempt to recover more or less gracefully from invalid dictionaries.
  247 +
  248 + auto& dict = frame->dict;
  249 + if (frame->state == st_dictionary_value) {
  250 + QTC::TC("qpdf", "QPDFParser no val for last key");
  251 + warn(
  252 + frame->offset,
  253 + "dictionary ended prematurely; using null as value for last key");
  254 + dict[frame->key] = QPDF_Null::create();
  255 + }
  256 +
  257 + if (!frame->olist.empty()) {
  258 + std::set<std::string> names;
  259 + for (auto& obj: frame->olist) {
251 if (obj->getTypeCode() == ::ot_name) { 260 if (obj->getTypeCode() == ::ot_name) {
252 names.insert(obj->getStringValue()); 261 names.insert(obj->getStringValue());
253 } 262 }
254 } 263 }
255 - }  
256 -  
257 - std::map<std::string, QPDFObjectHandle> dict;  
258 - int next_fake_key = 1;  
259 - for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {  
260 - // Calculate key.  
261 - std::string key;  
262 - if (*iter && (*iter)->getTypeCode() == ::ot_name) {  
263 - key = (*iter)->getStringValue();  
264 - ++iter;  
265 - } else {  
266 - for (bool found_fake = false; !found_fake;) {  
267 - key = "/QPDFFake" + std::to_string(next_fake_key++);  
268 - found_fake = (names.count(key) == 0); 264 + int next_fake_key = 1;
  265 + for (auto const& item: frame->olist) {
  266 + while (true) {
  267 + const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
  268 + const bool found_fake = (dict.count(key) == 0 && names.count(key) == 0);
269 QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); 269 QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
  270 + if (found_fake) {
  271 + warn(
  272 + frame->offset,
  273 + "expected dictionary key but found non-name object; inserting "
  274 + "key " +
  275 + key);
  276 + dict[key] = item;
  277 + break;
  278 + }
270 } 279 }
271 - warn(  
272 - frame->offset,  
273 - "expected dictionary key but found non-name object; inserting key " +  
274 - key);  
275 - }  
276 - if (dict.count(key) > 0) {  
277 - QTC::TC("qpdf", "QPDFParser duplicate dict key");  
278 - warn(  
279 - frame->offset,  
280 - "dictionary has duplicated key " + key +  
281 - "; last occurrence overrides earlier ones");  
282 - }  
283 -  
284 - // Calculate value.  
285 - ObjectPtr val;  
286 - if (iter != frame->olist.end()) {  
287 - val = *iter;  
288 - ++iter;  
289 - } else {  
290 - QTC::TC("qpdf", "QPDFParser no val for last key");  
291 - warn(  
292 - frame->offset,  
293 - "dictionary ended prematurely; using null as value for last key");  
294 - val = QPDF_Null::create();  
295 } 280 }
296 -  
297 - dict[std::move(key)] = val;  
298 } 281 }
  282 +
299 if (!frame->contents_string.empty() && dict.count("/Type") && 283 if (!frame->contents_string.empty() && dict.count("/Type") &&
300 dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && 284 dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
301 dict.count("/Contents") && dict["/Contents"].isString()) { 285 dict.count("/Contents") && dict["/Contents"].isString()) {
@@ -335,7 +319,7 @@ QPDFParser::parseRemainder(bool content_stream) @@ -335,7 +319,7 @@ QPDFParser::parseRemainder(bool content_stream)
335 stack.emplace_back( 319 stack.emplace_back(
336 input, 320 input,
337 (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array 321 (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
338 - : st_dictionary); 322 + : st_dictionary_key);
339 frame = &stack.back(); 323 frame = &stack.back();
340 continue; 324 continue;
341 } 325 }
@@ -364,15 +348,13 @@ QPDFParser::parseRemainder(bool content_stream) @@ -364,15 +348,13 @@ QPDFParser::parseRemainder(bool content_stream)
364 continue; 348 continue;
365 349
366 case QPDFTokenizer::tt_name: 350 case QPDFTokenizer::tt_name:
367 - {  
368 - auto const& name = tokenizer.getValue();  
369 - addScalar<QPDF_Name>(name);  
370 -  
371 - if (name == "/Contents") {  
372 - b_contents = true;  
373 - } else {  
374 - b_contents = false;  
375 - } 351 + if (frame->state == st_dictionary_key) {
  352 + frame->key = tokenizer.getValue();
  353 + frame->state = st_dictionary_value;
  354 + b_contents = decrypter && frame->key == "/Contents";
  355 + continue;
  356 + } else {
  357 + addScalar<QPDF_Name>(tokenizer.getValue());
376 } 358 }
377 continue; 359 continue;
378 360
@@ -415,13 +397,21 @@ QPDFParser::parseRemainder(bool content_stream) @@ -415,13 +397,21 @@ QPDFParser::parseRemainder(bool content_stream)
415 addNull(); 397 addNull();
416 } 398 }
417 } 399 }
418 - return {}; // unreachable  
419 } 400 }
420 401
421 void 402 void
422 QPDFParser::add(std::shared_ptr<QPDFObject>&& obj) 403 QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
423 { 404 {
424 - frame->olist.emplace_back(std::move(obj)); 405 + if (frame->state != st_dictionary_value) {
  406 + // If state is st_dictionary_key then there is a missing key. Push onto olist for
  407 + // processing once the tt_dict_close token has been found.
  408 + frame->olist.emplace_back(std::move(obj));
  409 + } else {
  410 + if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
  411 + warnDuplicateKey();
  412 + }
  413 + frame->state = st_dictionary_key;
  414 + }
425 } 415 }
426 416
427 void 417 void
@@ -429,7 +419,16 @@ QPDFParser::addNull() @@ -429,7 +419,16 @@ QPDFParser::addNull()
429 { 419 {
430 const static ObjectPtr null_obj = QPDF_Null::create(); 420 const static ObjectPtr null_obj = QPDF_Null::create();
431 421
432 - frame->olist.emplace_back(null_obj); 422 + if (frame->state != st_dictionary_value) {
  423 + // If state is st_dictionary_key then there is a missing key. Push onto olist for
  424 + // processing once the tt_dict_close token has been found.
  425 + frame->olist.emplace_back(null_obj);
  426 + } else {
  427 + if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
  428 + warnDuplicateKey();
  429 + }
  430 + frame->state = st_dictionary_key;
  431 + }
433 ++frame->null_count; 432 ++frame->null_count;
434 } 433 }
435 434
@@ -496,6 +495,15 @@ QPDFParser::warn(QPDFExc const&amp; e) const @@ -496,6 +495,15 @@ QPDFParser::warn(QPDFExc const&amp; e) const
496 } 495 }
497 496
498 void 497 void
  498 +QPDFParser::warnDuplicateKey()
  499 +{
  500 + QTC::TC("qpdf", "QPDFParser duplicate dict key");
  501 + warn(
  502 + frame->offset,
  503 + "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
  504 +}
  505 +
  506 +void
499 QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const 507 QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
500 { 508 {
501 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg)); 509 warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg));
libqpdf/qpdf/QPDFParser.hh
@@ -31,8 +31,9 @@ class QPDFParser @@ -31,8 +31,9 @@ class QPDFParser
31 QPDFObjectHandle parse(bool& empty, bool content_stream); 31 QPDFObjectHandle parse(bool& empty, bool content_stream);
32 32
33 private: 33 private:
34 - struct StackFrame;  
35 - enum parser_state_e { st_dictionary, st_array }; 34 + // Parser state. Note:
  35 + // state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
  36 + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
36 37
37 struct StackFrame 38 struct StackFrame
38 { 39 {
@@ -43,7 +44,9 @@ class QPDFParser @@ -43,7 +44,9 @@ class QPDFParser
43 } 44 }
44 45
45 std::vector<std::shared_ptr<QPDFObject>> olist; 46 std::vector<std::shared_ptr<QPDFObject>> olist;
  47 + std::map<std::string, QPDFObjectHandle> dict;
46 parser_state_e state; 48 parser_state_e state;
  49 + std::string key;
47 qpdf_offset_t offset; 50 qpdf_offset_t offset;
48 std::string contents_string; 51 std::string contents_string;
49 qpdf_offset_t contents_offset{-1}; 52 qpdf_offset_t contents_offset{-1};
@@ -57,6 +60,7 @@ class QPDFParser @@ -57,6 +60,7 @@ class QPDFParser
57 template <typename T, typename... Args> 60 template <typename T, typename... Args>
58 void addScalar(Args&&... args); 61 void addScalar(Args&&... args);
59 bool tooManyBadTokens(); 62 bool tooManyBadTokens();
  63 + void warnDuplicateKey();
60 void warn(qpdf_offset_t offset, std::string const& msg) const; 64 void warn(qpdf_offset_t offset, std::string const& msg) const;
61 void warn(std::string const& msg) const; 65 void warn(std::string const& msg) const;
62 void warn(QPDFExc const&) const; 66 void warn(QPDFExc const&) const;
@@ -83,7 +87,6 @@ class QPDFParser @@ -83,7 +87,6 @@ class QPDFParser
83 int int_count = 0; 87 int int_count = 0;
84 long long int_buffer[2]{0, 0}; 88 long long int_buffer[2]{0, 0};
85 qpdf_offset_t last_offset_buffer[2]{0, 0}; 89 qpdf_offset_t last_offset_buffer[2]{0, 0};
86 -  
87 }; 90 };
88 91
89 #endif // QPDFPARSER_HH 92 #endif // QPDFPARSER_HH
qpdf/qtest/qpdf/bad36-recover.out
1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string 1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
2 -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2  
3 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key 2 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
  3 +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
4 /QTest is implicit 4 /QTest is implicit
5 /QTest is direct and has type null (2) 5 /QTest is direct and has type null (2)
6 /QTest is null 6 /QTest is null
qpdf/qtest/qpdf/bad36.out
1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string 1 WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
2 -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2  
3 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key 2 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
  3 +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
4 /QTest is implicit 4 /QTest is implicit
5 /QTest is direct and has type null (2) 5 /QTest is direct and has type null (2)
6 /QTest is null 6 /QTest is null
qpdf/qtest/qpdf/issue-335a.out
@@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) @@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
51 WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) 51 WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
52 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 52 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
53 WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) 53 WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
  54 +WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L
54 WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) 55 WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
55 WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) 56 WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
56 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 57 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) @@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
74 WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) 75 WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
75 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 76 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
76 WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) 77 WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
  78 +WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L
77 WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) 79 WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
78 WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) 80 WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
79 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 81 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) @@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
97 WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) 99 WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
98 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 100 WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
99 WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) 101 WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
  102 +WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L
100 WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) 103 WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
101 WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) 104 WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
102 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 105 WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected ) @@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected )
448 WARNING: issue-335a.pdf (trailer, offset 1328): unexpected ) 451 WARNING: issue-335a.pdf (trailer, offset 1328): unexpected )
449 WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2 452 WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2
450 WARNING: issue-335a.pdf (trailer, offset 1332): unexpected ) 453 WARNING: issue-335a.pdf (trailer, offset 1332): unexpected )
  454 +WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L
451 WARNING: issue-335a.pdf (trailer, offset 1333): unexpected ) 455 WARNING: issue-335a.pdf (trailer, offset 1333): unexpected )
452 WARNING: issue-335a.pdf (trailer, offset 1344): unexpected ) 456 WARNING: issue-335a.pdf (trailer, offset 1344): unexpected )
453 WARNING: issue-335a.pdf (trailer, offset 1428): unexpected ) 457 WARNING: issue-335a.pdf (trailer, offset 1428): unexpected )