Commit 8b4fb5b2dfc6e42cdf5a59a6f141d2a98ffcba67

Authored by m-holger
1 parent eca6d7ab

Refactor `ResourceFinder`: streamline logic and optimize data structure handling.

libqpdf/QPDFAcroFormDocumentHelper.cc
@@ -489,7 +489,7 @@ namespace @@ -489,7 +489,7 @@ namespace
489 public: 489 public:
490 ResourceReplacer( 490 ResourceReplacer(
491 std::map<std::string, std::map<std::string, std::string>> const& dr_map, 491 std::map<std::string, std::map<std::string, std::string>> const& dr_map,
492 - std::map<std::string, std::map<std::string, std::set<size_t>>> const& rnames); 492 + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& rnames);
493 ~ResourceReplacer() override = default; 493 ~ResourceReplacer() override = default;
494 void handleToken(QPDFTokenizer::Token const&) override; 494 void handleToken(QPDFTokenizer::Token const&) override;
495 495
@@ -501,7 +501,7 @@ namespace @@ -501,7 +501,7 @@ namespace
501 501
502 ResourceReplacer::ResourceReplacer( 502 ResourceReplacer::ResourceReplacer(
503 std::map<std::string, std::map<std::string, std::string>> const& dr_map, 503 std::map<std::string, std::map<std::string, std::string>> const& dr_map,
504 - std::map<std::string, std::map<std::string, std::set<size_t>>> const& rnames) 504 + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& rnames)
505 { 505 {
506 // We have: 506 // We have:
507 // * dr_map[resource_type][key] == new_key 507 // * dr_map[resource_type][key] == new_key
@@ -510,22 +510,18 @@ ResourceReplacer::ResourceReplacer( @@ -510,22 +510,18 @@ ResourceReplacer::ResourceReplacer(
510 // We want: 510 // We want:
511 // * to_replace[key][offset] = new_key 511 // * to_replace[key][offset] = new_key
512 512
513 - for (auto const& rn_iter: rnames) {  
514 - std::string const& rtype = rn_iter.first; 513 + for (auto const& [rtype, key_offsets]: rnames) {
515 auto dr_map_rtype = dr_map.find(rtype); 514 auto dr_map_rtype = dr_map.find(rtype);
516 if (dr_map_rtype == dr_map.end()) { 515 if (dr_map_rtype == dr_map.end()) {
517 continue; 516 continue;
518 } 517 }
519 - auto const& key_offsets = rn_iter.second;  
520 - for (auto const& ko_iter: key_offsets) {  
521 - std::string const& old_key = ko_iter.first; 518 + for (auto const& [old_key, offsets]: key_offsets) {
522 auto dr_map_rtype_old = dr_map_rtype->second.find(old_key); 519 auto dr_map_rtype_old = dr_map_rtype->second.find(old_key);
523 if (dr_map_rtype_old == dr_map_rtype->second.end()) { 520 if (dr_map_rtype_old == dr_map_rtype->second.end()) {
524 continue; 521 continue;
525 } 522 }
526 - auto const& offsets = ko_iter.second;  
527 - for (auto const& o_iter: offsets) {  
528 - to_replace[old_key][o_iter] = dr_map_rtype_old->second; 523 + for (auto const& offs: offsets) {
  524 + to_replace[old_key][offs] = dr_map_rtype_old->second;
529 } 525 }
530 } 526 }
531 } 527 }
@@ -534,19 +530,20 @@ ResourceReplacer::ResourceReplacer( @@ -534,19 +530,20 @@ ResourceReplacer::ResourceReplacer(
534 void 530 void
535 ResourceReplacer::handleToken(QPDFTokenizer::Token const& token) 531 ResourceReplacer::handleToken(QPDFTokenizer::Token const& token)
536 { 532 {
537 - bool wrote = false;  
538 if (token.getType() == QPDFTokenizer::tt_name) { 533 if (token.getType() == QPDFTokenizer::tt_name) {
539 - std::string name = QPDFObjectHandle::newName(token.getValue()).getName();  
540 - if (to_replace.contains(name) && to_replace[name].contains(offset)) {  
541 - QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token");  
542 - write(to_replace[name][offset]);  
543 - wrote = true; 534 + auto it1 = to_replace.find(token.getValue());
  535 + if (it1 != to_replace.end()) {
  536 + auto it2 = it1->second.find(offset);
  537 + if (it2 != it1->second.end()) {
  538 + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token");
  539 + write(it2->second);
  540 + offset += token.getRawValue().length();
  541 + return;
  542 + }
544 } 543 }
545 } 544 }
546 offset += token.getRawValue().length(); 545 offset += token.getRawValue().length();
547 - if (!wrote) {  
548 - writeToken(token);  
549 - } 546 + writeToken(token);
550 } 547 }
551 548
552 void 549 void
libqpdf/QPDFPageObjectHelper.cc
@@ -543,14 +543,14 @@ bool @@ -543,14 +543,14 @@ bool
543 QPDFPageObjectHelper::removeUnreferencedResourcesHelper( 543 QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
544 QPDFPageObjectHelper ph, std::set<std::string>& unresolved) 544 QPDFPageObjectHelper ph, std::set<std::string>& unresolved)
545 { 545 {
546 - bool is_page = (!ph.oh().isFormXObject()); 546 + const bool is_page = !ph.oh().isFormXObject();
547 if (!is_page) { 547 if (!is_page) {
548 QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); 548 QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
549 } 549 }
550 550
551 ResourceFinder rf; 551 ResourceFinder rf;
552 try { 552 try {
553 - auto q = ph.oh().getOwningQPDF(); 553 + auto q = ph.qpdf();
554 size_t before_nw = (q ? q->numWarnings() : 0); 554 size_t before_nw = (q ? q->numWarnings() : 0);
555 ph.parseContents(&rf); 555 ph.parseContents(&rf);
556 size_t after_nw = (q ? q->numWarnings() : 0); 556 size_t after_nw = (q ? q->numWarnings() : 0);
@@ -575,23 +575,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( @@ -575,23 +575,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
575 QPDFObjectHandle resources = ph.getAttribute("/Resources", true); 575 QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
576 std::vector<QPDFObjectHandle> rdicts; 576 std::vector<QPDFObjectHandle> rdicts;
577 std::set<std::string> known_names; 577 std::set<std::string> known_names;
578 - std::vector<std::string> to_filter = {"/Font", "/XObject"};  
579 if (resources.isDictionary()) { 578 if (resources.isDictionary()) {
580 - for (auto const& iter: to_filter) { 579 + for (auto const& iter: {"/Font", "/XObject"}) {
581 QPDFObjectHandle dict = resources.getKey(iter); 580 QPDFObjectHandle dict = resources.getKey(iter);
582 if (dict.isDictionary()) { 581 if (dict.isDictionary()) {
583 dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy()); 582 dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy());
584 rdicts.push_back(dict); 583 rdicts.push_back(dict);
585 - auto keys = dict.getKeys();  
586 - known_names.insert(keys.begin(), keys.end()); 584 + known_names.merge(dict.getKeys());
587 } 585 }
588 } 586 }
589 } 587 }
590 588
591 std::set<std::string> local_unresolved; 589 std::set<std::string> local_unresolved;
592 - auto names_by_rtype = rf.getNamesByResourceType();  
593 - for (auto const& i1: to_filter) {  
594 - for (auto const& n_iter: names_by_rtype[i1]) { 590 + auto const& names_by_rtype = rf.getNamesByResourceType();
  591 + for (auto const& i1: {"/Font", "/XObject"}) {
  592 + auto it = names_by_rtype.find(i1);
  593 + if (it == names_by_rtype.end()) {
  594 + continue;
  595 + }
  596 + for (auto const& n_iter: it->second) {
595 std::string const& name = n_iter.first; 597 std::string const& name = n_iter.first;
596 if (!known_names.contains(name)) { 598 if (!known_names.contains(name)) {
597 unresolved.insert(name); 599 unresolved.insert(name);
@@ -610,7 +612,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( @@ -610,7 +612,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
610 // unresolved names, and for page objects, we avoid removing any such names found in nested form 612 // unresolved names, and for page objects, we avoid removing any such names found in nested form
611 // XObjects. 613 // XObjects.
612 614
613 - if ((!local_unresolved.empty()) && resources.isDictionary()) { 615 + if (!local_unresolved.empty() && resources.isDictionary()) {
614 // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only 616 // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
615 // looking at names that are referencing fonts and XObjects, but until we're certain that we 617 // looking at names that are referencing fonts and XObjects, but until we're certain that we
616 // know the meaning of every name in a content stream, we don't want to give warnings that 618 // know the meaning of every name in a content stream, we don't want to give warnings that
libqpdf/ResourceFinder.cc
@@ -3,8 +3,8 @@ @@ -3,8 +3,8 @@
3 void 3 void
4 ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) 4 ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t)
5 { 5 {
6 - if (obj.isOperator() && (!this->last_name.empty())) {  
7 - static std::map<std::string, std::string> op_to_rtype = { 6 + if (obj.isOperator() && !last_name.empty()) {
  7 + static const std::map<std::string, std::string> op_to_rtype{
8 {"CS", "/ColorSpace"}, 8 {"CS", "/ColorSpace"},
9 {"cs", "/ColorSpace"}, 9 {"cs", "/ColorSpace"},
10 {"gs", "/ExtGState"}, 10 {"gs", "/ExtGState"},
@@ -17,19 +17,15 @@ ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) @@ -17,19 +17,15 @@ ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t)
17 {"Do", "/XObject"}, 17 {"Do", "/XObject"},
18 }; 18 };
19 std::string op = obj.getOperatorValue(); 19 std::string op = obj.getOperatorValue();
20 - std::string resource_type;  
21 auto iter = op_to_rtype.find(op); 20 auto iter = op_to_rtype.find(op);
22 - if (iter != op_to_rtype.end()) {  
23 - resource_type = iter->second;  
24 - }  
25 - if (!resource_type.empty()) {  
26 - this->names.insert(this->last_name);  
27 - this->names_by_resource_type[resource_type][this->last_name].insert(  
28 - this->last_name_offset); 21 + if (iter == op_to_rtype.end()) {
  22 + return;
29 } 23 }
  24 + names.insert(last_name);
  25 + names_by_resource_type[iter->second][last_name].push_back(last_name_offset);
30 } else if (obj.isName()) { 26 } else if (obj.isName()) {
31 - this->last_name = obj.getName();  
32 - this->last_name_offset = offset; 27 + last_name = obj.getName();
  28 + last_name_offset = offset;
33 } 29 }
34 } 30 }
35 31
@@ -37,15 +33,3 @@ void @@ -37,15 +33,3 @@ void
37 ResourceFinder::handleEOF() 33 ResourceFinder::handleEOF()
38 { 34 {
39 } 35 }
40 -  
41 -std::set<std::string> const&  
42 -ResourceFinder::getNames() const  
43 -{  
44 - return this->names;  
45 -}  
46 -  
47 -std::map<std::string, std::map<std::string, std::set<size_t>>> const&  
48 -ResourceFinder::getNamesByResourceType() const  
49 -{  
50 - return this->names_by_resource_type;  
51 -}  
libqpdf/qpdf/ResourceFinder.hh
@@ -10,15 +10,22 @@ class ResourceFinder final: public QPDFObjectHandle::ParserCallbacks @@ -10,15 +10,22 @@ class ResourceFinder final: public QPDFObjectHandle::ParserCallbacks
10 ~ResourceFinder() final = default; 10 ~ResourceFinder() final = default;
11 void handleObject(QPDFObjectHandle, size_t, size_t) final; 11 void handleObject(QPDFObjectHandle, size_t, size_t) final;
12 void handleEOF() final; 12 void handleEOF() final;
13 - std::set<std::string> const& getNames() const;  
14 - std::map<std::string, std::map<std::string, std::set<size_t>>> const&  
15 - getNamesByResourceType() const; 13 + std::set<std::string> const&
  14 + getNames() const
  15 + {
  16 + return names;
  17 + }
  18 + std::map<std::string, std::map<std::string, std::vector<size_t>>> const&
  19 + getNamesByResourceType() const
  20 + {
  21 + return names_by_resource_type;
  22 + }
16 23
17 private: 24 private:
18 std::string last_name; 25 std::string last_name;
19 size_t last_name_offset{0}; 26 size_t last_name_offset{0};
20 std::set<std::string> names; 27 std::set<std::string> names;
21 - std::map<std::string, std::map<std::string, std::set<size_t>>> names_by_resource_type; 28 + std::map<std::string, std::map<std::string, std::vector<size_t>>> names_by_resource_type;
22 }; 29 };
23 30
24 #endif // RESOURCEFINDER_HH 31 #endif // RESOURCEFINDER_HH