Commit 8b4fb5b2dfc6e42cdf5a59a6f141d2a98ffcba67
1 parent
eca6d7ab
Refactor `ResourceFinder`: streamline logic and optimize data structure handling.
Showing
4 changed files
with
47 additions
and
57 deletions
libqpdf/QPDFAcroFormDocumentHelper.cc
| ... | ... | @@ -489,7 +489,7 @@ namespace |
| 489 | 489 | public: |
| 490 | 490 | ResourceReplacer( |
| 491 | 491 | std::map<std::string, std::map<std::string, std::string>> const& dr_map, |
| 492 | - std::map<std::string, std::map<std::string, std::set<size_t>>> const& rnames); | |
| 492 | + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& rnames); | |
| 493 | 493 | ~ResourceReplacer() override = default; |
| 494 | 494 | void handleToken(QPDFTokenizer::Token const&) override; |
| 495 | 495 | |
| ... | ... | @@ -501,7 +501,7 @@ namespace |
| 501 | 501 | |
| 502 | 502 | ResourceReplacer::ResourceReplacer( |
| 503 | 503 | std::map<std::string, std::map<std::string, std::string>> const& dr_map, |
| 504 | - std::map<std::string, std::map<std::string, std::set<size_t>>> const& rnames) | |
| 504 | + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& rnames) | |
| 505 | 505 | { |
| 506 | 506 | // We have: |
| 507 | 507 | // * dr_map[resource_type][key] == new_key |
| ... | ... | @@ -510,22 +510,18 @@ ResourceReplacer::ResourceReplacer( |
| 510 | 510 | // We want: |
| 511 | 511 | // * to_replace[key][offset] = new_key |
| 512 | 512 | |
| 513 | - for (auto const& rn_iter: rnames) { | |
| 514 | - std::string const& rtype = rn_iter.first; | |
| 513 | + for (auto const& [rtype, key_offsets]: rnames) { | |
| 515 | 514 | auto dr_map_rtype = dr_map.find(rtype); |
| 516 | 515 | if (dr_map_rtype == dr_map.end()) { |
| 517 | 516 | continue; |
| 518 | 517 | } |
| 519 | - auto const& key_offsets = rn_iter.second; | |
| 520 | - for (auto const& ko_iter: key_offsets) { | |
| 521 | - std::string const& old_key = ko_iter.first; | |
| 518 | + for (auto const& [old_key, offsets]: key_offsets) { | |
| 522 | 519 | auto dr_map_rtype_old = dr_map_rtype->second.find(old_key); |
| 523 | 520 | if (dr_map_rtype_old == dr_map_rtype->second.end()) { |
| 524 | 521 | continue; |
| 525 | 522 | } |
| 526 | - auto const& offsets = ko_iter.second; | |
| 527 | - for (auto const& o_iter: offsets) { | |
| 528 | - to_replace[old_key][o_iter] = dr_map_rtype_old->second; | |
| 523 | + for (auto const& offs: offsets) { | |
| 524 | + to_replace[old_key][offs] = dr_map_rtype_old->second; | |
| 529 | 525 | } |
| 530 | 526 | } |
| 531 | 527 | } |
| ... | ... | @@ -534,19 +530,20 @@ ResourceReplacer::ResourceReplacer( |
| 534 | 530 | void |
| 535 | 531 | ResourceReplacer::handleToken(QPDFTokenizer::Token const& token) |
| 536 | 532 | { |
| 537 | - bool wrote = false; | |
| 538 | 533 | if (token.getType() == QPDFTokenizer::tt_name) { |
| 539 | - std::string name = QPDFObjectHandle::newName(token.getValue()).getName(); | |
| 540 | - if (to_replace.contains(name) && to_replace[name].contains(offset)) { | |
| 541 | - QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token"); | |
| 542 | - write(to_replace[name][offset]); | |
| 543 | - wrote = true; | |
| 534 | + auto it1 = to_replace.find(token.getValue()); | |
| 535 | + if (it1 != to_replace.end()) { | |
| 536 | + auto it2 = it1->second.find(offset); | |
| 537 | + if (it2 != it1->second.end()) { | |
| 538 | + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token"); | |
| 539 | + write(it2->second); | |
| 540 | + offset += token.getRawValue().length(); | |
| 541 | + return; | |
| 542 | + } | |
| 544 | 543 | } |
| 545 | 544 | } |
| 546 | 545 | offset += token.getRawValue().length(); |
| 547 | - if (!wrote) { | |
| 548 | - writeToken(token); | |
| 549 | - } | |
| 546 | + writeToken(token); | |
| 550 | 547 | } |
| 551 | 548 | |
| 552 | 549 | void | ... | ... |
libqpdf/QPDFPageObjectHelper.cc
| ... | ... | @@ -543,14 +543,14 @@ bool |
| 543 | 543 | QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 544 | 544 | QPDFPageObjectHelper ph, std::set<std::string>& unresolved) |
| 545 | 545 | { |
| 546 | - bool is_page = (!ph.oh().isFormXObject()); | |
| 546 | + const bool is_page = !ph.oh().isFormXObject(); | |
| 547 | 547 | if (!is_page) { |
| 548 | 548 | QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); |
| 549 | 549 | } |
| 550 | 550 | |
| 551 | 551 | ResourceFinder rf; |
| 552 | 552 | try { |
| 553 | - auto q = ph.oh().getOwningQPDF(); | |
| 553 | + auto q = ph.qpdf(); | |
| 554 | 554 | size_t before_nw = (q ? q->numWarnings() : 0); |
| 555 | 555 | ph.parseContents(&rf); |
| 556 | 556 | size_t after_nw = (q ? q->numWarnings() : 0); |
| ... | ... | @@ -575,23 +575,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 575 | 575 | QPDFObjectHandle resources = ph.getAttribute("/Resources", true); |
| 576 | 576 | std::vector<QPDFObjectHandle> rdicts; |
| 577 | 577 | std::set<std::string> known_names; |
| 578 | - std::vector<std::string> to_filter = {"/Font", "/XObject"}; | |
| 579 | 578 | if (resources.isDictionary()) { |
| 580 | - for (auto const& iter: to_filter) { | |
| 579 | + for (auto const& iter: {"/Font", "/XObject"}) { | |
| 581 | 580 | QPDFObjectHandle dict = resources.getKey(iter); |
| 582 | 581 | if (dict.isDictionary()) { |
| 583 | 582 | dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy()); |
| 584 | 583 | rdicts.push_back(dict); |
| 585 | - auto keys = dict.getKeys(); | |
| 586 | - known_names.insert(keys.begin(), keys.end()); | |
| 584 | + known_names.merge(dict.getKeys()); | |
| 587 | 585 | } |
| 588 | 586 | } |
| 589 | 587 | } |
| 590 | 588 | |
| 591 | 589 | std::set<std::string> local_unresolved; |
| 592 | - auto names_by_rtype = rf.getNamesByResourceType(); | |
| 593 | - for (auto const& i1: to_filter) { | |
| 594 | - for (auto const& n_iter: names_by_rtype[i1]) { | |
| 590 | + auto const& names_by_rtype = rf.getNamesByResourceType(); | |
| 591 | + for (auto const& i1: {"/Font", "/XObject"}) { | |
| 592 | + auto it = names_by_rtype.find(i1); | |
| 593 | + if (it == names_by_rtype.end()) { | |
| 594 | + continue; | |
| 595 | + } | |
| 596 | + for (auto const& n_iter: it->second) { | |
| 595 | 597 | std::string const& name = n_iter.first; |
| 596 | 598 | if (!known_names.contains(name)) { |
| 597 | 599 | unresolved.insert(name); |
| ... | ... | @@ -610,7 +612,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( |
| 610 | 612 | // unresolved names, and for page objects, we avoid removing any such names found in nested form |
| 611 | 613 | // XObjects. |
| 612 | 614 | |
| 613 | - if ((!local_unresolved.empty()) && resources.isDictionary()) { | |
| 615 | + if (!local_unresolved.empty() && resources.isDictionary()) { | |
| 614 | 616 | // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only |
| 615 | 617 | // looking at names that are referencing fonts and XObjects, but until we're certain that we |
| 616 | 618 | // know the meaning of every name in a content stream, we don't want to give warnings that | ... | ... |
libqpdf/ResourceFinder.cc
| ... | ... | @@ -3,8 +3,8 @@ |
| 3 | 3 | void |
| 4 | 4 | ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) |
| 5 | 5 | { |
| 6 | - if (obj.isOperator() && (!this->last_name.empty())) { | |
| 7 | - static std::map<std::string, std::string> op_to_rtype = { | |
| 6 | + if (obj.isOperator() && !last_name.empty()) { | |
| 7 | + static const std::map<std::string, std::string> op_to_rtype{ | |
| 8 | 8 | {"CS", "/ColorSpace"}, |
| 9 | 9 | {"cs", "/ColorSpace"}, |
| 10 | 10 | {"gs", "/ExtGState"}, |
| ... | ... | @@ -17,19 +17,15 @@ ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) |
| 17 | 17 | {"Do", "/XObject"}, |
| 18 | 18 | }; |
| 19 | 19 | std::string op = obj.getOperatorValue(); |
| 20 | - std::string resource_type; | |
| 21 | 20 | auto iter = op_to_rtype.find(op); |
| 22 | - if (iter != op_to_rtype.end()) { | |
| 23 | - resource_type = iter->second; | |
| 24 | - } | |
| 25 | - if (!resource_type.empty()) { | |
| 26 | - this->names.insert(this->last_name); | |
| 27 | - this->names_by_resource_type[resource_type][this->last_name].insert( | |
| 28 | - this->last_name_offset); | |
| 21 | + if (iter == op_to_rtype.end()) { | |
| 22 | + return; | |
| 29 | 23 | } |
| 24 | + names.insert(last_name); | |
| 25 | + names_by_resource_type[iter->second][last_name].push_back(last_name_offset); | |
| 30 | 26 | } else if (obj.isName()) { |
| 31 | - this->last_name = obj.getName(); | |
| 32 | - this->last_name_offset = offset; | |
| 27 | + last_name = obj.getName(); | |
| 28 | + last_name_offset = offset; | |
| 33 | 29 | } |
| 34 | 30 | } |
| 35 | 31 | |
| ... | ... | @@ -37,15 +33,3 @@ void |
| 37 | 33 | ResourceFinder::handleEOF() |
| 38 | 34 | { |
| 39 | 35 | } |
| 40 | - | |
| 41 | -std::set<std::string> const& | |
| 42 | -ResourceFinder::getNames() const | |
| 43 | -{ | |
| 44 | - return this->names; | |
| 45 | -} | |
| 46 | - | |
| 47 | -std::map<std::string, std::map<std::string, std::set<size_t>>> const& | |
| 48 | -ResourceFinder::getNamesByResourceType() const | |
| 49 | -{ | |
| 50 | - return this->names_by_resource_type; | |
| 51 | -} | ... | ... |
libqpdf/qpdf/ResourceFinder.hh
| ... | ... | @@ -10,15 +10,22 @@ class ResourceFinder final: public QPDFObjectHandle::ParserCallbacks |
| 10 | 10 | ~ResourceFinder() final = default; |
| 11 | 11 | void handleObject(QPDFObjectHandle, size_t, size_t) final; |
| 12 | 12 | void handleEOF() final; |
| 13 | - std::set<std::string> const& getNames() const; | |
| 14 | - std::map<std::string, std::map<std::string, std::set<size_t>>> const& | |
| 15 | - getNamesByResourceType() const; | |
| 13 | + std::set<std::string> const& | |
| 14 | + getNames() const | |
| 15 | + { | |
| 16 | + return names; | |
| 17 | + } | |
| 18 | + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& | |
| 19 | + getNamesByResourceType() const | |
| 20 | + { | |
| 21 | + return names_by_resource_type; | |
| 22 | + } | |
| 16 | 23 | |
| 17 | 24 | private: |
| 18 | 25 | std::string last_name; |
| 19 | 26 | size_t last_name_offset{0}; |
| 20 | 27 | std::set<std::string> names; |
| 21 | - std::map<std::string, std::map<std::string, std::set<size_t>>> names_by_resource_type; | |
| 28 | + std::map<std::string, std::map<std::string, std::vector<size_t>>> names_by_resource_type; | |
| 22 | 29 | }; |
| 23 | 30 | |
| 24 | 31 | #endif // RESOURCEFINDER_HH | ... | ... |