Commit 8b4fb5b2dfc6e42cdf5a59a6f141d2a98ffcba67

Authored by m-holger
1 parent eca6d7ab

Refactor `ResourceFinder`: streamline logic and optimize data structure handling.

libqpdf/QPDFAcroFormDocumentHelper.cc
... ... @@ -489,7 +489,7 @@ namespace
489 489 public:
490 490 ResourceReplacer(
491 491 std::map<std::string, std::map<std::string, std::string>> const& dr_map,
492   - std::map<std::string, std::map<std::string, std::set<size_t>>> const& rnames);
  492 + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& rnames);
493 493 ~ResourceReplacer() override = default;
494 494 void handleToken(QPDFTokenizer::Token const&) override;
495 495  
... ... @@ -501,7 +501,7 @@ namespace
501 501  
502 502 ResourceReplacer::ResourceReplacer(
503 503 std::map<std::string, std::map<std::string, std::string>> const& dr_map,
504   - std::map<std::string, std::map<std::string, std::set<size_t>>> const& rnames)
  504 + std::map<std::string, std::map<std::string, std::vector<size_t>>> const& rnames)
505 505 {
506 506 // We have:
507 507 // * dr_map[resource_type][key] == new_key
... ... @@ -510,22 +510,18 @@ ResourceReplacer::ResourceReplacer(
510 510 // We want:
511 511 // * to_replace[key][offset] = new_key
512 512  
513   - for (auto const& rn_iter: rnames) {
514   - std::string const& rtype = rn_iter.first;
  513 + for (auto const& [rtype, key_offsets]: rnames) {
515 514 auto dr_map_rtype = dr_map.find(rtype);
516 515 if (dr_map_rtype == dr_map.end()) {
517 516 continue;
518 517 }
519   - auto const& key_offsets = rn_iter.second;
520   - for (auto const& ko_iter: key_offsets) {
521   - std::string const& old_key = ko_iter.first;
  518 + for (auto const& [old_key, offsets]: key_offsets) {
522 519 auto dr_map_rtype_old = dr_map_rtype->second.find(old_key);
523 520 if (dr_map_rtype_old == dr_map_rtype->second.end()) {
524 521 continue;
525 522 }
526   - auto const& offsets = ko_iter.second;
527   - for (auto const& o_iter: offsets) {
528   - to_replace[old_key][o_iter] = dr_map_rtype_old->second;
  523 + for (auto const& offs: offsets) {
  524 + to_replace[old_key][offs] = dr_map_rtype_old->second;
529 525 }
530 526 }
531 527 }
... ... @@ -534,19 +530,20 @@ ResourceReplacer::ResourceReplacer(
534 530 void
535 531 ResourceReplacer::handleToken(QPDFTokenizer::Token const& token)
536 532 {
537   - bool wrote = false;
538 533 if (token.getType() == QPDFTokenizer::tt_name) {
539   - std::string name = QPDFObjectHandle::newName(token.getValue()).getName();
540   - if (to_replace.contains(name) && to_replace[name].contains(offset)) {
541   - QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token");
542   - write(to_replace[name][offset]);
543   - wrote = true;
  534 + auto it1 = to_replace.find(token.getValue());
  535 + if (it1 != to_replace.end()) {
  536 + auto it2 = it1->second.find(offset);
  537 + if (it2 != it1->second.end()) {
  538 + QTC::TC("qpdf", "QPDFAcroFormDocumentHelper replaced DA token");
  539 + write(it2->second);
  540 + offset += token.getRawValue().length();
  541 + return;
  542 + }
544 543 }
545 544 }
546 545 offset += token.getRawValue().length();
547   - if (!wrote) {
548   - writeToken(token);
549   - }
  546 + writeToken(token);
550 547 }
551 548  
552 549 void
... ...
libqpdf/QPDFPageObjectHelper.cc
... ... @@ -543,14 +543,14 @@ bool
543 543 QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
544 544 QPDFPageObjectHelper ph, std::set<std::string>& unresolved)
545 545 {
546   - bool is_page = (!ph.oh().isFormXObject());
  546 + const bool is_page = !ph.oh().isFormXObject();
547 547 if (!is_page) {
548 548 QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
549 549 }
550 550  
551 551 ResourceFinder rf;
552 552 try {
553   - auto q = ph.oh().getOwningQPDF();
  553 + auto q = ph.qpdf();
554 554 size_t before_nw = (q ? q->numWarnings() : 0);
555 555 ph.parseContents(&rf);
556 556 size_t after_nw = (q ? q->numWarnings() : 0);
... ... @@ -575,23 +575,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
575 575 QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
576 576 std::vector<QPDFObjectHandle> rdicts;
577 577 std::set<std::string> known_names;
578   - std::vector<std::string> to_filter = {"/Font", "/XObject"};
579 578 if (resources.isDictionary()) {
580   - for (auto const& iter: to_filter) {
  579 + for (auto const& iter: {"/Font", "/XObject"}) {
581 580 QPDFObjectHandle dict = resources.getKey(iter);
582 581 if (dict.isDictionary()) {
583 582 dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy());
584 583 rdicts.push_back(dict);
585   - auto keys = dict.getKeys();
586   - known_names.insert(keys.begin(), keys.end());
  584 + known_names.merge(dict.getKeys());
587 585 }
588 586 }
589 587 }
590 588  
591 589 std::set<std::string> local_unresolved;
592   - auto names_by_rtype = rf.getNamesByResourceType();
593   - for (auto const& i1: to_filter) {
594   - for (auto const& n_iter: names_by_rtype[i1]) {
  590 + auto const& names_by_rtype = rf.getNamesByResourceType();
  591 + for (auto const& i1: {"/Font", "/XObject"}) {
  592 + auto it = names_by_rtype.find(i1);
  593 + if (it == names_by_rtype.end()) {
  594 + continue;
  595 + }
  596 + for (auto const& n_iter: it->second) {
595 597 std::string const& name = n_iter.first;
596 598 if (!known_names.contains(name)) {
597 599 unresolved.insert(name);
... ... @@ -610,7 +612,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
610 612 // unresolved names, and for page objects, we avoid removing any such names found in nested form
611 613 // XObjects.
612 614  
613   - if ((!local_unresolved.empty()) && resources.isDictionary()) {
  615 + if (!local_unresolved.empty() && resources.isDictionary()) {
614 616 // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
615 617 // looking at names that are referencing fonts and XObjects, but until we're certain that we
616 618 // know the meaning of every name in a content stream, we don't want to give warnings that
... ...
libqpdf/ResourceFinder.cc
... ... @@ -3,8 +3,8 @@
3 3 void
4 4 ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t)
5 5 {
6   - if (obj.isOperator() && (!this->last_name.empty())) {
7   - static std::map<std::string, std::string> op_to_rtype = {
  6 + if (obj.isOperator() && !last_name.empty()) {
  7 + static const std::map<std::string, std::string> op_to_rtype{
8 8 {"CS", "/ColorSpace"},
9 9 {"cs", "/ColorSpace"},
10 10 {"gs", "/ExtGState"},
... ... @@ -17,19 +17,15 @@ ResourceFinder::handleObject(QPDFObjectHandle obj, size_t offset, size_t)
17 17 {"Do", "/XObject"},
18 18 };
19 19 std::string op = obj.getOperatorValue();
20   - std::string resource_type;
21 20 auto iter = op_to_rtype.find(op);
22   - if (iter != op_to_rtype.end()) {
23   - resource_type = iter->second;
24   - }
25   - if (!resource_type.empty()) {
26   - this->names.insert(this->last_name);
27   - this->names_by_resource_type[resource_type][this->last_name].insert(
28   - this->last_name_offset);
  21 + if (iter == op_to_rtype.end()) {
  22 + return;
29 23 }
  24 + names.insert(last_name);
  25 + names_by_resource_type[iter->second][last_name].push_back(last_name_offset);
30 26 } else if (obj.isName()) {
31   - this->last_name = obj.getName();
32   - this->last_name_offset = offset;
  27 + last_name = obj.getName();
  28 + last_name_offset = offset;
33 29 }
34 30 }
35 31  
... ... @@ -37,15 +33,3 @@ void
37 33 ResourceFinder::handleEOF()
38 34 {
39 35 }
40   -
41   -std::set<std::string> const&
42   -ResourceFinder::getNames() const
43   -{
44   - return this->names;
45   -}
46   -
47   -std::map<std::string, std::map<std::string, std::set<size_t>>> const&
48   -ResourceFinder::getNamesByResourceType() const
49   -{
50   - return this->names_by_resource_type;
51   -}
... ...
libqpdf/qpdf/ResourceFinder.hh
... ... @@ -10,15 +10,22 @@ class ResourceFinder final: public QPDFObjectHandle::ParserCallbacks
10 10 ~ResourceFinder() final = default;
11 11 void handleObject(QPDFObjectHandle, size_t, size_t) final;
12 12 void handleEOF() final;
13   - std::set<std::string> const& getNames() const;
14   - std::map<std::string, std::map<std::string, std::set<size_t>>> const&
15   - getNamesByResourceType() const;
  13 + std::set<std::string> const&
  14 + getNames() const
  15 + {
  16 + return names;
  17 + }
  18 + std::map<std::string, std::map<std::string, std::vector<size_t>>> const&
  19 + getNamesByResourceType() const
  20 + {
  21 + return names_by_resource_type;
  22 + }
16 23  
17 24 private:
18 25 std::string last_name;
19 26 size_t last_name_offset{0};
20 27 std::set<std::string> names;
21   - std::map<std::string, std::map<std::string, std::set<size_t>>> names_by_resource_type;
  28 + std::map<std::string, std::map<std::string, std::vector<size_t>>> names_by_resource_type;
22 29 };
23 30  
24 31 #endif // RESOURCEFINDER_HH
... ...