From 8b4fb5b2dfc6e42cdf5a59a6f141d2a98ffcba67 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 8 Aug 2025 15:18:16 +0100 Subject: [PATCH] Refactor `ResourceFinder`: streamline logic and optimize data structure handling. --- libqpdf/QPDFAcroFormDocumentHelper.cc | 35 ++++++++++++++++------------------- libqpdf/QPDFPageObjectHelper.cc | 22 ++++++++++++---------- libqpdf/ResourceFinder.cc | 32 ++++++++------------------------ libqpdf/qpdf/ResourceFinder.hh | 15 +++++++++++---- 4 files changed, 47 insertions(+), 57 deletions(-) diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc index aa54d81..18997e8 100644 --- a/libqpdf/QPDFAcroFormDocumentHelper.cc +++ b/libqpdf/QPDFAcroFormDocumentHelper.cc @@ -489,7 +489,7 @@ namespace public: ResourceReplacer( std::map> const& dr_map, - std::map>> const& rnames); + std::map>> const& rnames); ~ResourceReplacer() override = default; void handleToken(QPDFTokenizer::Token const&) override; @@ -501,7 +501,7 @@ namespace ResourceReplacer::ResourceReplacer( std::map> const& dr_map, - std::map>> const& rnames) + std::map>> const& rnames) { // We have: // * dr_map[resource_type][key] == new_key @@ -510,22 +510,18 @@ ResourceReplacer::ResourceReplacer( // We want: // * to_replace[key][offset] = new_key - for (auto const& rn_iter: rnames) { - std::string const& rtype = rn_iter.first; + for (auto const& [rtype, key_offsets]: rnames) { auto dr_map_rtype = dr_map.find(rtype); if (dr_map_rtype == dr_map.end()) { continue; } - auto const& key_offsets = rn_iter.second; - for (auto const& ko_iter: key_offsets) { - std::string const& old_key = ko_iter.first; + for (auto const& [old_key, offsets]: key_offsets) { auto dr_map_rtype_old = dr_map_rtype->second.find(old_key); if (dr_map_rtype_old == dr_map_rtype->second.end()) { continue; } - auto const& offsets = ko_iter.second; - for (auto const& o_iter: offsets) { - to_replace[old_key][o_iter] = dr_map_rtype_old->second; + for (auto const& offs: offsets) { + to_replace[old_key][offs] = dr_map_rtype_old->second; } } } @@ -534,19 +530,20 @@ ResourceReplacer::ResourceReplacer( void ResourceReplacer::handleToken(QPDFTokenizer::Token const& token) { - bool wrote = false; if (token.getType() == QPDFTokenizer::tt_name) { - std::string name = QPDFObjectHandle::newName(token.getValue()).getName(); - if (to_replace.contains(name) && to_replace[name].contains(offset)) { - QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token"); - write(to_replace[name][offset]); - wrote = true; + auto it1 = to_replace.find(token.getValue()); + if (it1 != to_replace.end()) { + auto it2 = it1->second.find(offset); + if (it2 != it1->second.end()) { + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token"); + write(it2->second); + offset += token.getRawValue().length(); + return; + } } } offset += token.getRawValue().length(); - if (!wrote) { - writeToken(token); - } + writeToken(token); } void diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index 67badaa..6a095d3 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -543,14 +543,14 @@ bool QPDFPageObjectHelper::removeUnreferencedResourcesHelper( QPDFPageObjectHelper ph, std::set& unresolved) { - bool is_page = (!ph.oh().isFormXObject()); + const bool is_page = !ph.oh().isFormXObject(); if (!is_page) { QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); } ResourceFinder rf; try { - auto q = ph.oh().getOwningQPDF(); + auto q = ph.qpdf(); size_t before_nw = (q ? q->numWarnings() : 0); ph.parseContents(&rf); size_t after_nw = (q ? q->numWarnings() : 0); @@ -575,23 +575,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( QPDFObjectHandle resources = ph.getAttribute("/Resources", true); std::vector rdicts; std::set known_names; - std::vector to_filter = {"/Font", "/XObject"}; if (resources.isDictionary()) { - for (auto const& iter: to_filter) { + for (auto const& iter: {"/Font", "/XObject"}) { QPDFObjectHandle dict = resources.getKey(iter); if (dict.isDictionary()) { dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy()); rdicts.push_back(dict); - auto keys = dict.getKeys(); - known_names.insert(keys.begin(), keys.end()); + known_names.merge(dict.getKeys()); } } } std::set local_unresolved; - auto names_by_rtype = rf.getNamesByResourceType(); - for (auto const& i1: to_filter) { - for (auto const& n_iter: names_by_rtype[i1]) { + auto const& names_by_rtype = rf.getNamesByResourceType(); + for (auto const& i1: {"/Font", "/XObject"}) { + auto it = names_by_rtype.find(i1); + if (it == names_by_rtype.end()) { + continue; + } + for (auto const& n_iter: it->second) { std::string const& name = n_iter.first; if (!known_names.contains(name)) { unresolved.insert(name); @@ -610,7 +612,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( // unresolved names, and for page objects, we avoid removing any such names found in nested form // XObjects. - if ((!local_unresolved.empty()) && resources.isDictionary()) { + if (!local_unresolved.empty() && resources.isDictionary()) { // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only // looking at names that are referencing fonts and XObjects, but until we're certain that we // know the meaning of every name in a content stream, we don't want to give warnings that diff --git a/libqpdf/ResourceFinder.cc b/libqpdf/ResourceFinder.cc index 06d37e1..cd27597 100644 --- a/libqpdf/ResourceFinder.cc +++ b/libqpdf/ResourceFinder.cc @@ -3,8 +3,8 @@ void ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) { - if (obj.isOperator() && (!this->last_name.empty())) { - static std::map op_to_rtype = { + if (obj.isOperator() && !last_name.empty()) { + static const std::map op_to_rtype{ {"CS", "/ColorSpace"}, {"cs", "/ColorSpace"}, {"gs", "/ExtGState"}, @@ -17,19 +17,15 @@ ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t) {"Do", "/XObject"}, }; std::string op = obj.getOperatorValue(); - std::string resource_type; auto iter = op_to_rtype.find(op); - if (iter != op_to_rtype.end()) { - resource_type = iter->second; - } - if (!resource_type.empty()) { - this->names.insert(this->last_name); - this->names_by_resource_type[resource_type][this->last_name].insert( - this->last_name_offset); + if (iter == op_to_rtype.end()) { + return; } + names.insert(last_name); + names_by_resource_type[iter->second][last_name].push_back(last_name_offset); } else if (obj.isName()) { - this->last_name = obj.getName(); - this->last_name_offset = offset; + last_name = obj.getName(); + last_name_offset = offset; } } @@ -37,15 +33,3 @@ void ResourceFinder::handleEOF() { } - -std::set const& -ResourceFinder::getNames() const -{ - return this->names; -} - -std::map>> const& -ResourceFinder::getNamesByResourceType() const -{ - return this->names_by_resource_type; -} diff --git a/libqpdf/qpdf/ResourceFinder.hh b/libqpdf/qpdf/ResourceFinder.hh index 04614d7..1aa8fc5 100644 --- a/libqpdf/qpdf/ResourceFinder.hh +++ b/libqpdf/qpdf/ResourceFinder.hh @@ -10,15 +10,22 @@ class ResourceFinder final: public QPDFObjectHandle::ParserCallbacks ~ResourceFinder() final = default; void handleObject(QPDFObjectHandle, size_t, size_t) final; void handleEOF() final; - std::set const& getNames() const; - std::map>> const& - getNamesByResourceType() const; + std::set const& + getNames() const + { + return names; + } + std::map>> const& + getNamesByResourceType() const + { + return names_by_resource_type; + } private: std::string last_name; size_t last_name_offset{0}; std::set names; - std::map>> names_by_resource_type; + std::map>> names_by_resource_type; }; #endif // RESOURCEFINDER_HH -- libgit2 0.21.4