Commit 8b4fb5b2dfc6e42cdf5a59a6f141d2a98ffcba67
1 parent
eca6d7ab
Refactor `ResourceFinder`: streamline logic and optimize data structure handling.
Showing
4 changed files
with
47 additions
and
57 deletions
libqpdf/QPDFAcroFormDocumentHelper.cc
| @@ -489,7 +489,7 @@ namespace | @@ -489,7 +489,7 @@ namespace | ||
| 489 | public: | 489 | public: |
| 490 | ResourceReplacer( | 490 | ResourceReplacer( |
| 491 | std::map<std::string, std::map<std::string, std::string>> const& dr_map, | 491 | std::map<std::string, std::map<std::string, std::string>> const& dr_map, |
| 492 | - std::map<std::string, std::map<std::string, std::set<size_t>>> const& rnames); | 492 | + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& rnames); |
| 493 | ~ResourceReplacer() override = default; | 493 | ~ResourceReplacer() override = default; |
| 494 | void handleToken(QPDFTokenizer::Token const&) override; | 494 | void handleToken(QPDFTokenizer::Token const&) override; |
| 495 | 495 | ||
| @@ -501,7 +501,7 @@ namespace | @@ -501,7 +501,7 @@ namespace | ||
| 501 | 501 | ||
| 502 | ResourceReplacer::ResourceReplacer( | 502 | ResourceReplacer::ResourceReplacer( |
| 503 | std::map<std::string, std::map<std::string, std::string>> const& dr_map, | 503 | std::map<std::string, std::map<std::string, std::string>> const& dr_map, |
| 504 | - std::map<std::string, std::map<std::string, std::set<size_t>>> const& rnames) | 504 | + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& rnames) |
| 505 | { | 505 | { |
| 506 | // We have: | 506 | // We have: |
| 507 | // * dr_map[resource_type][key] == new_key | 507 | // * dr_map[resource_type][key] == new_key |
| @@ -510,22 +510,18 @@ ResourceReplacer::ResourceReplacer( | @@ -510,22 +510,18 @@ ResourceReplacer::ResourceReplacer( | ||
| 510 | // We want: | 510 | // We want: |
| 511 | // * to_replace[key][offset] = new_key | 511 | // * to_replace[key][offset] = new_key |
| 512 | 512 | ||
| 513 | - for (auto const& rn_iter: rnames) { | ||
| 514 | - std::string const& rtype = rn_iter.first; | 513 | + for (auto const& [rtype, key_offsets]: rnames) { |
| 515 | auto dr_map_rtype = dr_map.find(rtype); | 514 | auto dr_map_rtype = dr_map.find(rtype); |
| 516 | if (dr_map_rtype == dr_map.end()) { | 515 | if (dr_map_rtype == dr_map.end()) { |
| 517 | continue; | 516 | continue; |
| 518 | } | 517 | } |
| 519 | - auto const& key_offsets = rn_iter.second; | ||
| 520 | - for (auto const& ko_iter: key_offsets) { | ||
| 521 | - std::string const& old_key = ko_iter.first; | 518 | + for (auto const& [old_key, offsets]: key_offsets) { |
| 522 | auto dr_map_rtype_old = dr_map_rtype->second.find(old_key); | 519 | auto dr_map_rtype_old = dr_map_rtype->second.find(old_key); |
| 523 | if (dr_map_rtype_old == dr_map_rtype->second.end()) { | 520 | if (dr_map_rtype_old == dr_map_rtype->second.end()) { |
| 524 | continue; | 521 | continue; |
| 525 | } | 522 | } |
| 526 | - auto const& offsets = ko_iter.second; | ||
| 527 | - for (auto const& o_iter: offsets) { | ||
| 528 | - to_replace[old_key][o_iter] = dr_map_rtype_old->second; | 523 | + for (auto const& offs: offsets) { |
| 524 | + to_replace[old_key][offs] = dr_map_rtype_old->second; | ||
| 529 | } | 525 | } |
| 530 | } | 526 | } |
| 531 | } | 527 | } |
| @@ -534,19 +530,20 @@ ResourceReplacer::ResourceReplacer( | @@ -534,19 +530,20 @@ ResourceReplacer::ResourceReplacer( | ||
| 534 | void | 530 | void |
| 535 | ResourceReplacer::handleToken(QPDFTokenizer::Token const& token) | 531 | ResourceReplacer::handleToken(QPDFTokenizer::Token const& token) |
| 536 | { | 532 | { |
| 537 | - bool wrote = false; | ||
| 538 | if (token.getType() == QPDFTokenizer::tt_name) { | 533 | if (token.getType() == QPDFTokenizer::tt_name) { |
| 539 | - std::string name = QPDFObjectHandle::newName(token.getValue()).getName(); | ||
| 540 | - if (to_replace.contains(name) && to_replace[name].contains(offset)) { | ||
| 541 | - QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token"); | ||
| 542 | - write(to_replace[name][offset]); | ||
| 543 | - wrote = true; | 534 | + auto it1 = to_replace.find(token.getValue()); |
| 535 | + if (it1 != to_replace.end()) { | ||
| 536 | + auto it2 = it1->second.find(offset); | ||
| 537 | + if (it2 != it1->second.end()) { | ||
| 538 | + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token"); | ||
| 539 | + write(it2->second); | ||
| 540 | + offset += token.getRawValue().length(); | ||
| 541 | + return; | ||
| 542 | + } | ||
| 544 | } | 543 | } |
| 545 | } | 544 | } |
| 546 | offset += token.getRawValue().length(); | 545 | offset += token.getRawValue().length(); |
| 547 | - if (!wrote) { | ||
| 548 | - writeToken(token); | ||
| 549 | - } | 546 | + writeToken(token); |
| 550 | } | 547 | } |
| 551 | 548 | ||
| 552 | void | 549 | void |
libqpdf/QPDFPageObjectHelper.cc
| @@ -543,14 +543,14 @@ bool | @@ -543,14 +543,14 @@ bool | ||
| 543 | QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | 543 | QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 544 | QPDFPageObjectHelper ph, std::set<std::string>& unresolved) | 544 | QPDFPageObjectHelper ph, std::set<std::string>& unresolved) |
| 545 | { | 545 | { |
| 546 | - bool is_page = (!ph.oh().isFormXObject()); | 546 | + const bool is_page = !ph.oh().isFormXObject(); |
| 547 | if (!is_page) { | 547 | if (!is_page) { |
| 548 | QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); | 548 | QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); |
| 549 | } | 549 | } |
| 550 | 550 | ||
| 551 | ResourceFinder rf; | 551 | ResourceFinder rf; |
| 552 | try { | 552 | try { |
| 553 | - auto q = ph.oh().getOwningQPDF(); | 553 | + auto q = ph.qpdf(); |
| 554 | size_t before_nw = (q ? q->numWarnings() : 0); | 554 | size_t before_nw = (q ? q->numWarnings() : 0); |
| 555 | ph.parseContents(&rf); | 555 | ph.parseContents(&rf); |
| 556 | size_t after_nw = (q ? q->numWarnings() : 0); | 556 | size_t after_nw = (q ? q->numWarnings() : 0); |
| @@ -575,23 +575,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -575,23 +575,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 575 | QPDFObjectHandle resources = ph.getAttribute("/Resources", true); | 575 | QPDFObjectHandle resources = ph.getAttribute("/Resources", true); |
| 576 | std::vector<QPDFObjectHandle> rdicts; | 576 | std::vector<QPDFObjectHandle> rdicts; |
| 577 | std::set<std::string> known_names; | 577 | std::set<std::string> known_names; |
| 578 | - std::vector<std::string> to_filter = {"/Font", "/XObject"}; | ||
| 579 | if (resources.isDictionary()) { | 578 | if (resources.isDictionary()) { |
| 580 | - for (auto const& iter: to_filter) { | 579 | + for (auto const& iter: {"/Font", "/XObject"}) { |
| 581 | QPDFObjectHandle dict = resources.getKey(iter); | 580 | QPDFObjectHandle dict = resources.getKey(iter); |
| 582 | if (dict.isDictionary()) { | 581 | if (dict.isDictionary()) { |
| 583 | dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy()); | 582 | dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy()); |
| 584 | rdicts.push_back(dict); | 583 | rdicts.push_back(dict); |
| 585 | - auto keys = dict.getKeys(); | ||
| 586 | - known_names.insert(keys.begin(), keys.end()); | 584 | + known_names.merge(dict.getKeys()); |
| 587 | } | 585 | } |
| 588 | } | 586 | } |
| 589 | } | 587 | } |
| 590 | 588 | ||
| 591 | std::set<std::string> local_unresolved; | 589 | std::set<std::string> local_unresolved; |
| 592 | - auto names_by_rtype = rf.getNamesByResourceType(); | ||
| 593 | - for (auto const& i1: to_filter) { | ||
| 594 | - for (auto const& n_iter: names_by_rtype[i1]) { | 590 | + auto const& names_by_rtype = rf.getNamesByResourceType(); |
| 591 | + for (auto const& i1: {"/Font", "/XObject"}) { | ||
| 592 | + auto it = names_by_rtype.find(i1); | ||
| 593 | + if (it == names_by_rtype.end()) { | ||
| 594 | + continue; | ||
| 595 | + } | ||
| 596 | + for (auto const& n_iter: it->second) { | ||
| 595 | std::string const& name = n_iter.first; | 597 | std::string const& name = n_iter.first; |
| 596 | if (!known_names.contains(name)) { | 598 | if (!known_names.contains(name)) { |
| 597 | unresolved.insert(name); | 599 | unresolved.insert(name); |
| @@ -610,7 +612,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | @@ -610,7 +612,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( | ||
| 610 | // unresolved names, and for page objects, we avoid removing any such names found in nested form | 612 | // unresolved names, and for page objects, we avoid removing any such names found in nested form |
| 611 | // XObjects. | 613 | // XObjects. |
| 612 | 614 | ||
| 613 | - if ((!local_unresolved.empty()) && resources.isDictionary()) { | 615 | + if (!local_unresolved.empty() && resources.isDictionary()) { |
| 614 | // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only | 616 | // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only |
| 615 | // looking at names that are referencing fonts and XObjects, but until we're certain that we | 617 | // looking at names that are referencing fonts and XObjects, but until we're certain that we |
| 616 | // know the meaning of every name in a content stream, we don't want to give warnings that | 618 | // know the meaning of every name in a content stream, we don't want to give warnings that |
libqpdf/ResourceFinder.cc
| @@ -3,8 +3,8 @@ | @@ -3,8 +3,8 @@ | ||
| 3 | void | 3 | void |
| 4 | ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) | 4 | ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) |
| 5 | { | 5 | { |
| 6 | - if (obj.isOperator() && (!this->last_name.empty())) { | ||
| 7 | - static std::map<std::string, std::string> op_to_rtype = { | 6 | + if (obj.isOperator() && !last_name.empty()) { |
| 7 | + static const std::map<std::string, std::string> op_to_rtype{ | ||
| 8 | {"CS", "/ColorSpace"}, | 8 | {"CS", "/ColorSpace"}, |
| 9 | {"cs", "/ColorSpace"}, | 9 | {"cs", "/ColorSpace"}, |
| 10 | {"gs", "/ExtGState"}, | 10 | {"gs", "/ExtGState"}, |
| @@ -17,19 +17,15 @@ ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) | @@ -17,19 +17,15 @@ ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) | ||
| 17 | {"Do", "/XObject"}, | 17 | {"Do", "/XObject"}, |
| 18 | }; | 18 | }; |
| 19 | std::string op = obj.getOperatorValue(); | 19 | std::string op = obj.getOperatorValue(); |
| 20 | - std::string resource_type; | ||
| 21 | auto iter = op_to_rtype.find(op); | 20 | auto iter = op_to_rtype.find(op); |
| 22 | - if (iter != op_to_rtype.end()) { | ||
| 23 | - resource_type = iter->second; | ||
| 24 | - } | ||
| 25 | - if (!resource_type.empty()) { | ||
| 26 | - this->names.insert(this->last_name); | ||
| 27 | - this->names_by_resource_type[resource_type][this->last_name].insert( | ||
| 28 | - this->last_name_offset); | 21 | + if (iter == op_to_rtype.end()) { |
| 22 | + return; | ||
| 29 | } | 23 | } |
| 24 | + names.insert(last_name); | ||
| 25 | + names_by_resource_type[iter->second][last_name].push_back(last_name_offset); | ||
| 30 | } else if (obj.isName()) { | 26 | } else if (obj.isName()) { |
| 31 | - this->last_name = obj.getName(); | ||
| 32 | - this->last_name_offset = offset; | 27 | + last_name = obj.getName(); |
| 28 | + last_name_offset = offset; | ||
| 33 | } | 29 | } |
| 34 | } | 30 | } |
| 35 | 31 | ||
| @@ -37,15 +33,3 @@ void | @@ -37,15 +33,3 @@ void | ||
| 37 | ResourceFinder::handleEOF() | 33 | ResourceFinder::handleEOF() |
| 38 | { | 34 | { |
| 39 | } | 35 | } |
| 40 | - | ||
| 41 | -std::set<std::string> const& | ||
| 42 | -ResourceFinder::getNames() const | ||
| 43 | -{ | ||
| 44 | - return this->names; | ||
| 45 | -} | ||
| 46 | - | ||
| 47 | -std::map<std::string, std::map<std::string, std::set<size_t>>> const& | ||
| 48 | -ResourceFinder::getNamesByResourceType() const | ||
| 49 | -{ | ||
| 50 | - return this->names_by_resource_type; | ||
| 51 | -} |
libqpdf/qpdf/ResourceFinder.hh
| @@ -10,15 +10,22 @@ class ResourceFinder final: public QPDFObjectHandle::ParserCallbacks | @@ -10,15 +10,22 @@ class ResourceFinder final: public QPDFObjectHandle::ParserCallbacks | ||
| 10 | ~ResourceFinder() final = default; | 10 | ~ResourceFinder() final = default; |
| 11 | void handleObject(QPDFObjectHandle, size_t, size_t) final; | 11 | void handleObject(QPDFObjectHandle, size_t, size_t) final; |
| 12 | void handleEOF() final; | 12 | void handleEOF() final; |
| 13 | - std::set<std::string> const& getNames() const; | ||
| 14 | - std::map<std::string, std::map<std::string, std::set<size_t>>> const& | ||
| 15 | - getNamesByResourceType() const; | 13 | + std::set<std::string> const& |
| 14 | + getNames() const | ||
| 15 | + { | ||
| 16 | + return names; | ||
| 17 | + } | ||
| 18 | + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& | ||
| 19 | + getNamesByResourceType() const | ||
| 20 | + { | ||
| 21 | + return names_by_resource_type; | ||
| 22 | + } | ||
| 16 | 23 | ||
| 17 | private: | 24 | private: |
| 18 | std::string last_name; | 25 | std::string last_name; |
| 19 | size_t last_name_offset{0}; | 26 | size_t last_name_offset{0}; |
| 20 | std::set<std::string> names; | 27 | std::set<std::string> names; |
| 21 | - std::map<std::string, std::map<std::string, std::set<size_t>>> names_by_resource_type; | 28 | + std::map<std::string, std::map<std::string, std::vector<size_t>>> names_by_resource_type; |
| 22 | }; | 29 | }; |
| 23 | 30 | ||
| 24 | #endif // RESOURCEFINDER_HH | 31 | #endif // RESOURCEFINDER_HH |