Commit d7ffdfa994e3648a1e03fd1c1796c6e24bf00693

Authored by Jay Berkenbilt
1 parent e17585c2

Add optional conflict detection to mergeResources

Also improve behavior around direct vs. indirect resources.
ChangeLog
  1 +2021-03-03 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add QPDFObjectHandle::makeResourcesIndirect
  4 +
  5 +2021-03-02 Jay Berkenbilt <ejb@ql.org>
  6 +
  7 + * Add an optional resource_names argument to getUniqueResourceName
  8 + for added efficiency.
  9 +
  10 + * Add conflict detection QPDFObjectHandle::mergeResources.
  11 +
1 12 2021-03-01 Jay Berkenbilt <ejb@ql.org>
2 13  
3 14 * Improve code that finds unreferenced resources to ignore names
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -731,13 +731,27 @@ class QPDFObjectHandle
731 731 QPDF_DLL
732 732 bool isOrHasName(std::string const&);
733 733  
734   - // Merge resource dictionaries. Assumes resource dictionaries have
735   - // the property that the collection of keys of all first-level
736   - // dictionary members contains no duplicates. This method does
737   - // nothing if both this object and the other object are not
738   - // dictionaries. Otherwise, it has following behavior, where
739   - // "object" refers to the object whose method is invoked, and
740   - // "other" refers to the argument:
  734 + // Make all resources in a resource dictionary indirect. This just
  735 + // goes through all entries of top-level subdictionaries and
  736 + // converts any direct objects to indirect objects. This can be
  737 + // useful to call before mergeResources if it is going to be
  738 + // called multiple times to prevent resources from being copied
  739 + // multiple times.
  740 + QPDF_DLL
  741 + void makeResourcesIndirect(QPDF& owning_qpdf);
  742 +
  743 + // Merge resource dictionaries. If the "conflicts" parameter is
  744 + // provided, conflicts in dictionary subitems are resolved, and
  745 + // "conflicts" is initialized to a map such that
  746 + // conflicts[resource_type][old_key] == [new_key]
  747 + //
  748 + // See also makeResourcesIndirect, which can be useful to call
  749 + // before calling this.
  750 + //
  751 + // This method does nothing if both this object and the other
  752 + // object are not dictionaries. Otherwise, it has following
  753 + // behavior, where "object" refers to the object whose method is
  754 + // invoked, and "other" refers to the argument:
741 755 //
742 756 // * For each key in "other" whose value is an array:
743 757 // * If "object" does not have that entry, shallow copy it.
... ... @@ -747,20 +761,32 @@ class QPDFObjectHandle
747 761 // * For each key in "other" whose value is a dictionary:
748 762 // * If "object" does not have that entry, shallow copy it.
749 763 // * Otherwise, for each key in the subdictionary:
750   - // * If key is not present in "object"'s entry, shallow copy it.
751   - // * Otherwise, ignore. Conflicts are not detected.
  764 + // * If key is not present in "object"'s entry, shallow copy
  765 + // it if direct or just add it if indirect.
  766 + // * Otherwise, if conflicts are being detected:
  767 + // * If there is a key (oldkey) already in the dictionary
  768 + // that points to the same indirect destination as key,
  769 + // indicate that key was replaced by oldkey. This would
  770 + // happen if these two resource dictionaries have
  771 + // previously been merged.
  772 + // * Otherwise pick a new key (newkey) that is unique within
  773 + // the resource dictionary, store that in the resource
  774 + // dictionary with key's destination as its destination,
  775 + // and indicate that key was replaced by newkey.
752 776 //
753 777 // The primary purpose of this method is to facilitate merging of
754 778 // resource dictionaries that are supposed to have the same scope
755 779 // as each other. For example, this can be used to merge a form
756   - // XObject's /Resources dictionary with a form field's /DR.
757   - // Conflicts are not detected. If, in the future, there should be
758   - // a need to detect conflicts, this method could detect them and
759   - // return a mapping from old to new names. This mapping could be
760   - // used for filtering the stream. This would be necessary, for
761   - // example, to merge a form XObject's resources with a page's
762   - // resources with the intention of concatenating the content
763   - // streams.
  780 + // XObject's /Resources dictionary with a form field's /DR or to
  781 + // merge two /DR dictionaries. The "conflicts" parameter may be
  782 + // previously initialized. This method adds to whatever is already
  783 + // there, which can be useful when merging with multiple things.
  784 + QPDF_DLL
  785 + void mergeResources(
  786 + QPDFObjectHandle other,
  787 + std::map<std::string, std::map<std::string, std::string>>* conflicts);
  788 + // ABI: eliminate version without conflicts and make conflicts
  789 + // default to nullptr.
764 790 QPDF_DLL
765 791 void mergeResources(QPDFObjectHandle other);
766 792  
... ... @@ -779,7 +805,19 @@ class QPDFObjectHandle
779 805 // increase efficiency if adding multiple items with the same
780 806 // prefix. (Why doesn't it set min_suffix to the next number?
781 807 // Well, maybe you aren't going to actually use the name it
782   - // returns.)
  808 + // returns.) If you are calling this multiple times on the same
  809 + // resource dictionary, you can initialize resource_names by
  810 + // calling getResourceNames(), incrementally update it as you add
  811 + // resources, and keep passing it in so that getUniqueResourceName
  812 + // doesn't have to traverse the resource dictionary each time it's
  813 + // called.
  814 + QPDF_DLL
  815 + std::string getUniqueResourceName(
  816 + std::string const& prefix,
  817 + int& min_suffix,
  818 + std::set<std::string>* resource_names);
  819 + // ABI: remove this version and make resource_names default to
  820 + // nullptr.
783 821 QPDF_DLL
784 822 std::string getUniqueResourceName(std::string const& prefix,
785 823 int& min_suffix);
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -1057,59 +1057,142 @@ QPDFObjectHandle::isOrHasName(std::string const&amp; value)
1057 1057 }
1058 1058  
1059 1059 void
  1060 +QPDFObjectHandle::makeResourcesIndirect(QPDF& owning_qpdf)
  1061 +{
  1062 + if (! isDictionary())
  1063 + {
  1064 + return;
  1065 + }
  1066 + for (auto const& i1: ditems())
  1067 + {
  1068 + QPDFObjectHandle sub = i1.second;
  1069 + if (! sub.isDictionary())
  1070 + {
  1071 + continue;
  1072 + }
  1073 + for (auto i2: sub.ditems())
  1074 + {
  1075 + std::string const& key = i2.first;
  1076 + QPDFObjectHandle val = i2.second;
  1077 + if (! val.isIndirect())
  1078 + {
  1079 + sub.replaceKey(key, owning_qpdf.makeIndirectObject(val));
  1080 + }
  1081 + }
  1082 + }
  1083 +}
  1084 +
  1085 +void
1060 1086 QPDFObjectHandle::mergeResources(QPDFObjectHandle other)
1061 1087 {
  1088 + mergeResources(other, nullptr);
  1089 +}
  1090 +
  1091 +void
  1092 +QPDFObjectHandle::mergeResources(
  1093 + QPDFObjectHandle other,
  1094 + std::map<std::string, std::map<std::string, std::string>>* conflicts)
  1095 +{
1062 1096 if (! (isDictionary() && other.isDictionary()))
1063 1097 {
1064 1098 QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch");
1065 1099 return;
1066 1100 }
1067   - std::set<std::string> other_keys = other.getKeys();
1068   - for (std::set<std::string>::iterator iter = other_keys.begin();
1069   - iter != other_keys.end(); ++iter)
  1101 +
  1102 + auto make_og_to_name = [](
  1103 + QPDFObjectHandle& dict,
  1104 + std::map<QPDFObjGen, std::string>& og_to_name)
1070 1105 {
1071   - std::string const& key = *iter;
1072   - QPDFObjectHandle other_val = other.getKey(key);
1073   - if (hasKey(key))
  1106 + for (auto i: dict.ditems())
1074 1107 {
1075   - QPDFObjectHandle this_val = getKey(key);
  1108 + if (i.second.isIndirect())
  1109 + {
  1110 + og_to_name[i.second.getObjGen()] = i.first;
  1111 + }
  1112 + }
  1113 + };
  1114 +
  1115 + // This algorithm is described in comments in QPDFObjectHandle.hh
  1116 + // above the declaration of mergeResources.
  1117 + for (auto o_top: other.ditems())
  1118 + {
  1119 + std::string const& rtype = o_top.first;
  1120 + QPDFObjectHandle other_val = o_top.second;
  1121 + if (hasKey(rtype))
  1122 + {
  1123 + QPDFObjectHandle this_val = getKey(rtype);
1076 1124 if (this_val.isDictionary() && other_val.isDictionary())
1077 1125 {
1078 1126 if (this_val.isIndirect())
1079 1127 {
  1128 + // Do this even if there are no keys. Various
  1129 + // places in the code call mergeResources with
  1130 + // resource dictionaries that contain empty
  1131 + // subdictionaries just to get this shallow copy
  1132 + // functionality.
1080 1133 QTC::TC("qpdf", "QPDFObjectHandle replace with copy");
1081 1134 this_val = this_val.shallowCopy();
1082   - replaceKey(key, this_val);
  1135 + replaceKey(rtype, this_val);
1083 1136 }
1084   - std::set<std::string> other_val_keys = other_val.getKeys();
1085   - for (std::set<std::string>::iterator i2 =
1086   - other_val_keys.begin();
1087   - i2 != other_val_keys.end(); ++i2)
  1137 + std::map<QPDFObjGen, std::string> og_to_name;
  1138 + std::set<std::string> rnames;
  1139 + int min_suffix = 1;
  1140 + bool initialized_maps = false;
  1141 + for (auto ov_iter: other_val.ditems())
1088 1142 {
1089   - if (! this_val.hasKey(*i2))
  1143 + std::string const& key = ov_iter.first;
  1144 + QPDFObjectHandle rval = ov_iter.second;
  1145 + if (! this_val.hasKey(key))
1090 1146 {
1091   - QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy");
1092   - this_val.replaceKey(
1093   - *i2, other_val.getKey(*i2).shallowCopy());
  1147 + if (! rval.isIndirect())
  1148 + {
  1149 + QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy");
  1150 + rval = rval.shallowCopy();
  1151 + }
  1152 + this_val.replaceKey(key, rval);
  1153 + }
  1154 + else if (conflicts)
  1155 + {
  1156 + if (! initialized_maps)
  1157 + {
  1158 + make_og_to_name(this_val, og_to_name);
  1159 + rnames = this_val.getResourceNames();
  1160 + initialized_maps = true;
  1161 + }
  1162 + auto rval_og = rval.getObjGen();
  1163 + if (rval.isIndirect() &&
  1164 + og_to_name.count(rval_og))
  1165 + {
  1166 + QTC::TC("qpdf", "QPDFObjectHandle merge reuse");
  1167 + auto new_key = og_to_name[rval_og];
  1168 + if (new_key != key)
  1169 + {
  1170 + (*conflicts)[rtype][key] = new_key;
  1171 + }
  1172 + }
  1173 + else
  1174 + {
  1175 + QTC::TC("qpdf", "QPDFObjectHandle merge generate");
  1176 + std::string new_key = getUniqueResourceName(
  1177 + key + "_", min_suffix, &rnames);
  1178 + (*conflicts)[rtype][key] = new_key;
  1179 + this_val.replaceKey(new_key, rval);
  1180 + }
1094 1181 }
1095 1182 }
1096 1183 }
1097 1184 else if (this_val.isArray() && other_val.isArray())
1098 1185 {
1099 1186 std::set<std::string> scalars;
1100   - int n = this_val.getArrayNItems();
1101   - for (int i = 0; i < n; ++i)
  1187 + for (auto this_item: this_val.aitems())
1102 1188 {
1103   - QPDFObjectHandle this_item = this_val.getArrayItem(i);
1104 1189 if (this_item.isScalar())
1105 1190 {
1106 1191 scalars.insert(this_item.unparse());
1107 1192 }
1108 1193 }
1109   - n = other_val.getArrayNItems();
1110   - for (int i = 0; i < n; ++i)
  1194 + for (auto other_item: other_val.aitems())
1111 1195 {
1112   - QPDFObjectHandle other_item = other_val.getArrayItem(i);
1113 1196 if (other_item.isScalar())
1114 1197 {
1115 1198 if (scalars.count(other_item.unparse()) == 0)
... ... @@ -1128,7 +1211,7 @@ QPDFObjectHandle::mergeResources(QPDFObjectHandle other)
1128 1211 else
1129 1212 {
1130 1213 QTC::TC("qpdf", "QPDFObjectHandle merge copy from other");
1131   - replaceKey(key, other_val.shallowCopy());
  1214 + replaceKey(rtype, other_val.shallowCopy());
1132 1215 }
1133 1216 }
1134 1217 }
... ... @@ -1165,7 +1248,16 @@ std::string
1165 1248 QPDFObjectHandle::getUniqueResourceName(std::string const& prefix,
1166 1249 int& min_suffix)
1167 1250 {
1168   - std::set<std::string> names = getResourceNames();
  1251 + return getUniqueResourceName(prefix, min_suffix, nullptr);
  1252 +}
  1253 +
  1254 +std::string
  1255 +QPDFObjectHandle::getUniqueResourceName(std::string const& prefix,
  1256 + int& min_suffix,
  1257 + std::set<std::string>* namesp)
  1258 +
  1259 +{
  1260 + std::set<std::string> names = (namesp ? *namesp : getResourceNames());
1169 1261 int max_suffix = min_suffix + QIntC::to_int(names.size());
1170 1262 while (min_suffix <= max_suffix)
1171 1263 {
... ...
qpdf/qpdf.testcov
... ... @@ -581,3 +581,5 @@ qpdf copy form fields in pages 0
581 581 qpdf keep some fields in pages 0
582 582 qpdf pages keeping field from original 0
583 583 qpdf no more fields in pages 0
  584 +QPDFObjectHandle merge reuse 0
  585 +QPDFObjectHandle merge generate 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -1598,7 +1598,7 @@ $td-&gt;runtest(&quot;merge dictionary&quot;,
1598 1598 $td->NORMALIZE_NEWLINES);
1599 1599 $td->runtest("unique resource name",
1600 1600 {$td->COMMAND => "test_driver 60 minimal.pdf"},
1601   - {$td->STRING => "test 60 done\n", $td->EXIT_STATUS => 0},
  1601 + {$td->FILE => "test60.out", $td->EXIT_STATUS => 0},
1602 1602 $td->NORMALIZE_NEWLINES);
1603 1603 $td->runtest("check output",
1604 1604 {$td->FILE => "a.pdf"},
... ...
qpdf/qtest/qpdf/form-filled-by-acrobat-out.pdf
No preview for this file type
qpdf/qtest/qpdf/test60.out 0 โ†’ 100644
  1 +first merge
  2 +/Y:
  3 + /F3 -> /F3_1
  4 +/Z:
  5 + /F2 -> /F2_1
  6 +second merge
  7 +/Y:
  8 + /F3 -> /F3_1
  9 + /F5 -> /F5_1
  10 +/Z:
  11 + /F2 -> /F2_1
  12 +third merge
  13 +/Y:
  14 + /F3 -> /F3_1
  15 + /F5 -> /F5_1
  16 +/Z:
  17 + /F2 -> /F2_1
  18 +fourth merge
  19 +/Y:
  20 + /F3 -> /F3_1
  21 + /F5 -> /F5_1
  22 +/Z:
  23 + /F2 -> /F2_1
  24 +test 60 done
... ...
qpdf/qtest/qpdf/unique-resources.pdf
1 1 Binary files a/qpdf/qtest/qpdf/unique-resources.pdf and b/qpdf/qtest/qpdf/unique-resources.pdf differ
... ...
qpdf/test_driver.cc
... ... @@ -2362,7 +2362,9 @@ void runtest(int n, char const* filename1, char const* arg2)
2362 2362 }
2363 2363 else if (n == 60)
2364 2364 {
2365   - // Boundary condition testing for getUniqueResourceName
  2365 + // Boundary condition testing for getUniqueResourceName;
  2366 + // additional testing of mergeResources with conflict
  2367 + // detection
2366 2368 QPDFObjectHandle r1 = QPDFObjectHandle::newDictionary();
2367 2369 int min_suffix = 1;
2368 2370 for (int i = 1; i < 3; ++i)
... ... @@ -2372,8 +2374,69 @@ void runtest(int n, char const* filename1, char const* arg2)
2372 2374 r1.getKey("/Z").replaceKey(
2373 2375 name, QPDFObjectHandle::newString("moo"));
2374 2376 }
2375   - pdf.getTrailer().replaceKey("/QTest", r1);
  2377 + auto make_resource = [&](QPDFObjectHandle& dict,
  2378 + std::string const& key,
  2379 + std::string const& str) {
  2380 + auto o1 = QPDFObjectHandle::newArray();
  2381 + o1.appendItem(QPDFObjectHandle::newString(str));
  2382 + dict.replaceKey(key, pdf.makeIndirectObject(o1));
  2383 + };
  2384 +
  2385 + auto z = r1.getKey("/Z");
  2386 + r1.replaceKey("/Y", QPDFObjectHandle::newDictionary());
  2387 + auto y = r1.getKey("/Y");
  2388 + make_resource(z, "/F1", "r1.Z.F1");
  2389 + make_resource(z, "/F2", "r1.Z.F2");
  2390 + make_resource(y, "/F2", "r1.Y.F2");
  2391 + make_resource(y, "/F3", "r1.Y.F3");
  2392 + QPDFObjectHandle r2 =
  2393 + QPDFObjectHandle::parse("<< /Z << >> /Y << >> >>");
  2394 + z = r2.getKey("/Z");
  2395 + y = r2.getKey("/Y");
  2396 + make_resource(z, "/F2", "r2.Z.F2");
  2397 + make_resource(y, "/F3", "r2.Y.F3");
  2398 + make_resource(y, "/F4", "r2.Y.F4");
  2399 + // Add a direct object
  2400 + y.replaceKey("/F5", QPDFObjectHandle::newString("direct r2.Y.F5"));
  2401 +
  2402 + std::map<std::string, std::map<std::string, std::string>> conflicts;
  2403 + auto show_conflicts = [&](std::string const& msg) {
  2404 + std::cout << msg << std::endl;
  2405 + for (auto const& i1: conflicts)
  2406 + {
  2407 + std::cout << i1.first << ":" << std::endl;
  2408 + for (auto const& i2: i1.second)
  2409 + {
  2410 + std::cout << " " << i2.first << " -> " << i2.second
  2411 + << std::endl;
  2412 + }
  2413 + }
  2414 + };
  2415 +
  2416 + r1.mergeResources(r2, &conflicts);
  2417 + show_conflicts("first merge");
  2418 + auto r3 = r1.shallowCopy();
  2419 + // Merge again. The direct object gets recopied. Everything
  2420 + // else is the same.
  2421 + r1.mergeResources(r2, &conflicts);
  2422 + show_conflicts("second merge");
  2423 +
  2424 + // Make all resources in r2 direct. Then merge two more times.
  2425 + // We should get the one previously direct object copied one
  2426 + // time as an indirect object.
  2427 + r2.makeResourcesIndirect(pdf);
  2428 + r1.mergeResources(r2, &conflicts);
  2429 + show_conflicts("third merge");
  2430 + r1.mergeResources(r2, &conflicts);
  2431 + show_conflicts("fourth merge");
  2432 +
  2433 + // The only differences between /QTest and /QTest3 should be
  2434 + // the direct objects merged from r2.
  2435 + pdf.getTrailer().replaceKey("/QTest1", r1);
  2436 + pdf.getTrailer().replaceKey("/QTest2", r2);
  2437 + pdf.getTrailer().replaceKey("/QTest3", r3);
2376 2438 QPDFWriter w(pdf, "a.pdf");
  2439 + w.setQDFMode(true);
2377 2440 w.setStaticID(true);
2378 2441 w.write();
2379 2442 }
... ...