Commit d7ffdfa994e3648a1e03fd1c1796c6e24bf00693
1 parent
e17585c2
Add optional conflict detection to mergeResources
Also improve behavior around direct vs. indirect resources.
Showing
9 changed files
with
557 additions
and
408 deletions
ChangeLog
| 1 | +2021-03-03 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Add QPDFObjectHandle::makeResourcesIndirect | |
| 4 | + | |
| 5 | +2021-03-02 Jay Berkenbilt <ejb@ql.org> | |
| 6 | + | |
| 7 | + * Add an optional resource_names argument to getUniqueResourceName | |
| 8 | + for added efficiency. | |
| 9 | + | |
| 10 | + * Add conflict detection QPDFObjectHandle::mergeResources. | |
| 11 | + | |
| 1 | 12 | 2021-03-01 Jay Berkenbilt <ejb@ql.org> |
| 2 | 13 | |
| 3 | 14 | * Improve code that finds unreferenced resources to ignore names | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -731,13 +731,27 @@ class QPDFObjectHandle |
| 731 | 731 | QPDF_DLL |
| 732 | 732 | bool isOrHasName(std::string const&); |
| 733 | 733 | |
| 734 | - // Merge resource dictionaries. Assumes resource dictionaries have | |
| 735 | - // the property that the collection of keys of all first-level | |
| 736 | - // dictionary members contains no duplicates. This method does | |
| 737 | - // nothing if both this object and the other object are not | |
| 738 | - // dictionaries. Otherwise, it has following behavior, where | |
| 739 | - // "object" refers to the object whose method is invoked, and | |
| 740 | - // "other" refers to the argument: | |
| 734 | + // Make all resources in a resource dictionary indirect. This just | |
| 735 | + // goes through all entries of top-level subdictionaries and | |
| 736 | + // converts any direct objects to indirect objects. This can be | |
| 737 | + // useful to call before mergeResources if it is going to be | |
| 738 | + // called multiple times to prevent resources from being copied | |
| 739 | + // multiple times. | |
| 740 | + QPDF_DLL | |
| 741 | + void makeResourcesIndirect(QPDF& owning_qpdf); | |
| 742 | + | |
| 743 | + // Merge resource dictionaries. If the "conflicts" parameter is | |
| 744 | + // provided, conflicts in dictionary subitems are resolved, and | |
| 745 | + // "conflicts" is initialized to a map such that | |
| 746 | + // conflicts[resource_type][old_key] == [new_key] | |
| 747 | + // | |
| 748 | + // See also makeResourcesIndirect, which can be useful to call | |
| 749 | + // before calling this. | |
| 750 | + // | |
| 751 | + // This method does nothing if both this object and the other | |
| 752 | + // object are not dictionaries. Otherwise, it has following | |
| 753 | + // behavior, where "object" refers to the object whose method is | |
| 754 | + // invoked, and "other" refers to the argument: | |
| 741 | 755 | // |
| 742 | 756 | // * For each key in "other" whose value is an array: |
| 743 | 757 | // * If "object" does not have that entry, shallow copy it. |
| ... | ... | @@ -747,20 +761,32 @@ class QPDFObjectHandle |
| 747 | 761 | // * For each key in "other" whose value is a dictionary: |
| 748 | 762 | // * If "object" does not have that entry, shallow copy it. |
| 749 | 763 | // * Otherwise, for each key in the subdictionary: |
| 750 | - // * If key is not present in "object"'s entry, shallow copy it. | |
| 751 | - // * Otherwise, ignore. Conflicts are not detected. | |
| 764 | + // * If key is not present in "object"'s entry, shallow copy | |
| 765 | + // it if direct or just add it if indirect. | |
| 766 | + // * Otherwise, if conflicts are being detected: | |
| 767 | + // * If there is a key (oldkey) already in the dictionary | |
| 768 | + // that points to the same indirect destination as key, | |
| 769 | + // indicate that key was replaced by oldkey. This would | |
| 770 | + // happen if these two resource dictionaries have | |
| 771 | + // previously been merged. | |
| 772 | + // * Otherwise pick a new key (newkey) that is unique within | |
| 773 | + // the resource dictionary, store that in the resource | |
| 774 | + // dictionary with key's destination as its destination, | |
| 775 | + // and indicate that key was replaced by newkey. | |
| 752 | 776 | // |
| 753 | 777 | // The primary purpose of this method is to facilitate merging of |
| 754 | 778 | // resource dictionaries that are supposed to have the same scope |
| 755 | 779 | // as each other. For example, this can be used to merge a form |
| 756 | - // XObject's /Resources dictionary with a form field's /DR. | |
| 757 | - // Conflicts are not detected. If, in the future, there should be | |
| 758 | - // a need to detect conflicts, this method could detect them and | |
| 759 | - // return a mapping from old to new names. This mapping could be | |
| 760 | - // used for filtering the stream. This would be necessary, for | |
| 761 | - // example, to merge a form XObject's resources with a page's | |
| 762 | - // resources with the intention of concatenating the content | |
| 763 | - // streams. | |
| 780 | + // XObject's /Resources dictionary with a form field's /DR or to | |
| 781 | + // merge two /DR dictionaries. The "conflicts" parameter may be | |
| 782 | + // previously initialized. This method adds to whatever is already | |
| 783 | + // there, which can be useful when merging with multiple things. | |
| 784 | + QPDF_DLL | |
| 785 | + void mergeResources( | |
| 786 | + QPDFObjectHandle other, | |
| 787 | + std::map<std::string, std::map<std::string, std::string>>* conflicts); | |
| 788 | + // ABI: eliminate version without conflicts and make conflicts | |
| 789 | + // default to nullptr. | |
| 764 | 790 | QPDF_DLL |
| 765 | 791 | void mergeResources(QPDFObjectHandle other); |
| 766 | 792 | |
| ... | ... | @@ -779,7 +805,19 @@ class QPDFObjectHandle |
| 779 | 805 | // increase efficiency if adding multiple items with the same |
| 780 | 806 | // prefix. (Why doesn't it set min_suffix to the next number? |
| 781 | 807 | // Well, maybe you aren't going to actually use the name it |
| 782 | - // returns.) | |
| 808 | + // returns.) If you are calling this multiple times on the same | |
| 809 | + // resource dictionary, you can initialize resource_names by | |
| 810 | + // calling getResourceNames(), incrementally update it as you add | |
| 811 | + // resources, and keep passing it in so that getUniqueResourceName | |
| 812 | + // doesn't have to traverse the resource dictionary each time it's | |
| 813 | + // called. | |
| 814 | + QPDF_DLL | |
| 815 | + std::string getUniqueResourceName( | |
| 816 | + std::string const& prefix, | |
| 817 | + int& min_suffix, | |
| 818 | + std::set<std::string>* resource_names); | |
| 819 | + // ABI: remove this version and make resource_names default to | |
| 820 | + // nullptr. | |
| 783 | 821 | QPDF_DLL |
| 784 | 822 | std::string getUniqueResourceName(std::string const& prefix, |
| 785 | 823 | int& min_suffix); | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -1057,59 +1057,142 @@ QPDFObjectHandle::isOrHasName(std::string const& value) |
| 1057 | 1057 | } |
| 1058 | 1058 | |
| 1059 | 1059 | void |
| 1060 | +QPDFObjectHandle::makeResourcesIndirect(QPDF& owning_qpdf) | |
| 1061 | +{ | |
| 1062 | + if (! isDictionary()) | |
| 1063 | + { | |
| 1064 | + return; | |
| 1065 | + } | |
| 1066 | + for (auto const& i1: ditems()) | |
| 1067 | + { | |
| 1068 | + QPDFObjectHandle sub = i1.second; | |
| 1069 | + if (! sub.isDictionary()) | |
| 1070 | + { | |
| 1071 | + continue; | |
| 1072 | + } | |
| 1073 | + for (auto i2: sub.ditems()) | |
| 1074 | + { | |
| 1075 | + std::string const& key = i2.first; | |
| 1076 | + QPDFObjectHandle val = i2.second; | |
| 1077 | + if (! val.isIndirect()) | |
| 1078 | + { | |
| 1079 | + sub.replaceKey(key, owning_qpdf.makeIndirectObject(val)); | |
| 1080 | + } | |
| 1081 | + } | |
| 1082 | + } | |
| 1083 | +} | |
| 1084 | + | |
| 1085 | +void | |
| 1060 | 1086 | QPDFObjectHandle::mergeResources(QPDFObjectHandle other) |
| 1061 | 1087 | { |
| 1088 | + mergeResources(other, nullptr); | |
| 1089 | +} | |
| 1090 | + | |
| 1091 | +void | |
| 1092 | +QPDFObjectHandle::mergeResources( | |
| 1093 | + QPDFObjectHandle other, | |
| 1094 | + std::map<std::string, std::map<std::string, std::string>>* conflicts) | |
| 1095 | +{ | |
| 1062 | 1096 | if (! (isDictionary() && other.isDictionary())) |
| 1063 | 1097 | { |
| 1064 | 1098 | QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch"); |
| 1065 | 1099 | return; |
| 1066 | 1100 | } |
| 1067 | - std::set<std::string> other_keys = other.getKeys(); | |
| 1068 | - for (std::set<std::string>::iterator iter = other_keys.begin(); | |
| 1069 | - iter != other_keys.end(); ++iter) | |
| 1101 | + | |
| 1102 | + auto make_og_to_name = []( | |
| 1103 | + QPDFObjectHandle& dict, | |
| 1104 | + std::map<QPDFObjGen, std::string>& og_to_name) | |
| 1070 | 1105 | { |
| 1071 | - std::string const& key = *iter; | |
| 1072 | - QPDFObjectHandle other_val = other.getKey(key); | |
| 1073 | - if (hasKey(key)) | |
| 1106 | + for (auto i: dict.ditems()) | |
| 1074 | 1107 | { |
| 1075 | - QPDFObjectHandle this_val = getKey(key); | |
| 1108 | + if (i.second.isIndirect()) | |
| 1109 | + { | |
| 1110 | + og_to_name[i.second.getObjGen()] = i.first; | |
| 1111 | + } | |
| 1112 | + } | |
| 1113 | + }; | |
| 1114 | + | |
| 1115 | + // This algorithm is described in comments in QPDFObjectHandle.hh | |
| 1116 | + // above the declaration of mergeResources. | |
| 1117 | + for (auto o_top: other.ditems()) | |
| 1118 | + { | |
| 1119 | + std::string const& rtype = o_top.first; | |
| 1120 | + QPDFObjectHandle other_val = o_top.second; | |
| 1121 | + if (hasKey(rtype)) | |
| 1122 | + { | |
| 1123 | + QPDFObjectHandle this_val = getKey(rtype); | |
| 1076 | 1124 | if (this_val.isDictionary() && other_val.isDictionary()) |
| 1077 | 1125 | { |
| 1078 | 1126 | if (this_val.isIndirect()) |
| 1079 | 1127 | { |
| 1128 | + // Do this even if there are no keys. Various | |
| 1129 | + // places in the code call mergeResources with | |
| 1130 | + // resource dictionaries that contain empty | |
| 1131 | + // subdictionaries just to get this shallow copy | |
| 1132 | + // functionality. | |
| 1080 | 1133 | QTC::TC("qpdf", "QPDFObjectHandle replace with copy"); |
| 1081 | 1134 | this_val = this_val.shallowCopy(); |
| 1082 | - replaceKey(key, this_val); | |
| 1135 | + replaceKey(rtype, this_val); | |
| 1083 | 1136 | } |
| 1084 | - std::set<std::string> other_val_keys = other_val.getKeys(); | |
| 1085 | - for (std::set<std::string>::iterator i2 = | |
| 1086 | - other_val_keys.begin(); | |
| 1087 | - i2 != other_val_keys.end(); ++i2) | |
| 1137 | + std::map<QPDFObjGen, std::string> og_to_name; | |
| 1138 | + std::set<std::string> rnames; | |
| 1139 | + int min_suffix = 1; | |
| 1140 | + bool initialized_maps = false; | |
| 1141 | + for (auto ov_iter: other_val.ditems()) | |
| 1088 | 1142 | { |
| 1089 | - if (! this_val.hasKey(*i2)) | |
| 1143 | + std::string const& key = ov_iter.first; | |
| 1144 | + QPDFObjectHandle rval = ov_iter.second; | |
| 1145 | + if (! this_val.hasKey(key)) | |
| 1090 | 1146 | { |
| 1091 | - QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy"); | |
| 1092 | - this_val.replaceKey( | |
| 1093 | - *i2, other_val.getKey(*i2).shallowCopy()); | |
| 1147 | + if (! rval.isIndirect()) | |
| 1148 | + { | |
| 1149 | + QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy"); | |
| 1150 | + rval = rval.shallowCopy(); | |
| 1151 | + } | |
| 1152 | + this_val.replaceKey(key, rval); | |
| 1153 | + } | |
| 1154 | + else if (conflicts) | |
| 1155 | + { | |
| 1156 | + if (! initialized_maps) | |
| 1157 | + { | |
| 1158 | + make_og_to_name(this_val, og_to_name); | |
| 1159 | + rnames = this_val.getResourceNames(); | |
| 1160 | + initialized_maps = true; | |
| 1161 | + } | |
| 1162 | + auto rval_og = rval.getObjGen(); | |
| 1163 | + if (rval.isIndirect() && | |
| 1164 | + og_to_name.count(rval_og)) | |
| 1165 | + { | |
| 1166 | + QTC::TC("qpdf", "QPDFObjectHandle merge reuse"); | |
| 1167 | + auto new_key = og_to_name[rval_og]; | |
| 1168 | + if (new_key != key) | |
| 1169 | + { | |
| 1170 | + (*conflicts)[rtype][key] = new_key; | |
| 1171 | + } | |
| 1172 | + } | |
| 1173 | + else | |
| 1174 | + { | |
| 1175 | + QTC::TC("qpdf", "QPDFObjectHandle merge generate"); | |
| 1176 | + std::string new_key = getUniqueResourceName( | |
| 1177 | + key + "_", min_suffix, &rnames); | |
| 1178 | + (*conflicts)[rtype][key] = new_key; | |
| 1179 | + this_val.replaceKey(new_key, rval); | |
| 1180 | + } | |
| 1094 | 1181 | } |
| 1095 | 1182 | } |
| 1096 | 1183 | } |
| 1097 | 1184 | else if (this_val.isArray() && other_val.isArray()) |
| 1098 | 1185 | { |
| 1099 | 1186 | std::set<std::string> scalars; |
| 1100 | - int n = this_val.getArrayNItems(); | |
| 1101 | - for (int i = 0; i < n; ++i) | |
| 1187 | + for (auto this_item: this_val.aitems()) | |
| 1102 | 1188 | { |
| 1103 | - QPDFObjectHandle this_item = this_val.getArrayItem(i); | |
| 1104 | 1189 | if (this_item.isScalar()) |
| 1105 | 1190 | { |
| 1106 | 1191 | scalars.insert(this_item.unparse()); |
| 1107 | 1192 | } |
| 1108 | 1193 | } |
| 1109 | - n = other_val.getArrayNItems(); | |
| 1110 | - for (int i = 0; i < n; ++i) | |
| 1194 | + for (auto other_item: other_val.aitems()) | |
| 1111 | 1195 | { |
| 1112 | - QPDFObjectHandle other_item = other_val.getArrayItem(i); | |
| 1113 | 1196 | if (other_item.isScalar()) |
| 1114 | 1197 | { |
| 1115 | 1198 | if (scalars.count(other_item.unparse()) == 0) |
| ... | ... | @@ -1128,7 +1211,7 @@ QPDFObjectHandle::mergeResources(QPDFObjectHandle other) |
| 1128 | 1211 | else |
| 1129 | 1212 | { |
| 1130 | 1213 | QTC::TC("qpdf", "QPDFObjectHandle merge copy from other"); |
| 1131 | - replaceKey(key, other_val.shallowCopy()); | |
| 1214 | + replaceKey(rtype, other_val.shallowCopy()); | |
| 1132 | 1215 | } |
| 1133 | 1216 | } |
| 1134 | 1217 | } |
| ... | ... | @@ -1165,7 +1248,16 @@ std::string |
| 1165 | 1248 | QPDFObjectHandle::getUniqueResourceName(std::string const& prefix, |
| 1166 | 1249 | int& min_suffix) |
| 1167 | 1250 | { |
| 1168 | - std::set<std::string> names = getResourceNames(); | |
| 1251 | + return getUniqueResourceName(prefix, min_suffix, nullptr); | |
| 1252 | +} | |
| 1253 | + | |
| 1254 | +std::string | |
| 1255 | +QPDFObjectHandle::getUniqueResourceName(std::string const& prefix, | |
| 1256 | + int& min_suffix, | |
| 1257 | + std::set<std::string>* namesp) | |
| 1258 | + | |
| 1259 | +{ | |
| 1260 | + std::set<std::string> names = (namesp ? *namesp : getResourceNames()); | |
| 1169 | 1261 | int max_suffix = min_suffix + QIntC::to_int(names.size()); |
| 1170 | 1262 | while (min_suffix <= max_suffix) |
| 1171 | 1263 | { | ... | ... |
qpdf/qpdf.testcov
qpdf/qtest/qpdf.test
| ... | ... | @@ -1598,7 +1598,7 @@ $td->runtest("merge dictionary", |
| 1598 | 1598 | $td->NORMALIZE_NEWLINES); |
| 1599 | 1599 | $td->runtest("unique resource name", |
| 1600 | 1600 | {$td->COMMAND => "test_driver 60 minimal.pdf"}, |
| 1601 | - {$td->STRING => "test 60 done\n", $td->EXIT_STATUS => 0}, | |
| 1601 | + {$td->FILE => "test60.out", $td->EXIT_STATUS => 0}, | |
| 1602 | 1602 | $td->NORMALIZE_NEWLINES); |
| 1603 | 1603 | $td->runtest("check output", |
| 1604 | 1604 | {$td->FILE => "a.pdf"}, | ... | ... |
qpdf/qtest/qpdf/form-filled-by-acrobat-out.pdf
No preview for this file type
qpdf/qtest/qpdf/test60.out
0 โ 100644
| 1 | +first merge | |
| 2 | +/Y: | |
| 3 | + /F3 -> /F3_1 | |
| 4 | +/Z: | |
| 5 | + /F2 -> /F2_1 | |
| 6 | +second merge | |
| 7 | +/Y: | |
| 8 | + /F3 -> /F3_1 | |
| 9 | + /F5 -> /F5_1 | |
| 10 | +/Z: | |
| 11 | + /F2 -> /F2_1 | |
| 12 | +third merge | |
| 13 | +/Y: | |
| 14 | + /F3 -> /F3_1 | |
| 15 | + /F5 -> /F5_1 | |
| 16 | +/Z: | |
| 17 | + /F2 -> /F2_1 | |
| 18 | +fourth merge | |
| 19 | +/Y: | |
| 20 | + /F3 -> /F3_1 | |
| 21 | + /F5 -> /F5_1 | |
| 22 | +/Z: | |
| 23 | + /F2 -> /F2_1 | |
| 24 | +test 60 done | ... | ... |
qpdf/qtest/qpdf/unique-resources.pdf
qpdf/test_driver.cc
| ... | ... | @@ -2362,7 +2362,9 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 2362 | 2362 | } |
| 2363 | 2363 | else if (n == 60) |
| 2364 | 2364 | { |
| 2365 | - // Boundary condition testing for getUniqueResourceName | |
| 2365 | + // Boundary condition testing for getUniqueResourceName; | |
| 2366 | + // additional testing of mergeResources with conflict | |
| 2367 | + // detection | |
| 2366 | 2368 | QPDFObjectHandle r1 = QPDFObjectHandle::newDictionary(); |
| 2367 | 2369 | int min_suffix = 1; |
| 2368 | 2370 | for (int i = 1; i < 3; ++i) |
| ... | ... | @@ -2372,8 +2374,69 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 2372 | 2374 | r1.getKey("/Z").replaceKey( |
| 2373 | 2375 | name, QPDFObjectHandle::newString("moo")); |
| 2374 | 2376 | } |
| 2375 | - pdf.getTrailer().replaceKey("/QTest", r1); | |
| 2377 | + auto make_resource = [&](QPDFObjectHandle& dict, | |
| 2378 | + std::string const& key, | |
| 2379 | + std::string const& str) { | |
| 2380 | + auto o1 = QPDFObjectHandle::newArray(); | |
| 2381 | + o1.appendItem(QPDFObjectHandle::newString(str)); | |
| 2382 | + dict.replaceKey(key, pdf.makeIndirectObject(o1)); | |
| 2383 | + }; | |
| 2384 | + | |
| 2385 | + auto z = r1.getKey("/Z"); | |
| 2386 | + r1.replaceKey("/Y", QPDFObjectHandle::newDictionary()); | |
| 2387 | + auto y = r1.getKey("/Y"); | |
| 2388 | + make_resource(z, "/F1", "r1.Z.F1"); | |
| 2389 | + make_resource(z, "/F2", "r1.Z.F2"); | |
| 2390 | + make_resource(y, "/F2", "r1.Y.F2"); | |
| 2391 | + make_resource(y, "/F3", "r1.Y.F3"); | |
| 2392 | + QPDFObjectHandle r2 = | |
| 2393 | + QPDFObjectHandle::parse("<< /Z << >> /Y << >> >>"); | |
| 2394 | + z = r2.getKey("/Z"); | |
| 2395 | + y = r2.getKey("/Y"); | |
| 2396 | + make_resource(z, "/F2", "r2.Z.F2"); | |
| 2397 | + make_resource(y, "/F3", "r2.Y.F3"); | |
| 2398 | + make_resource(y, "/F4", "r2.Y.F4"); | |
| 2399 | + // Add a direct object | |
| 2400 | + y.replaceKey("/F5", QPDFObjectHandle::newString("direct r2.Y.F5")); | |
| 2401 | + | |
| 2402 | + std::map<std::string, std::map<std::string, std::string>> conflicts; | |
| 2403 | + auto show_conflicts = [&](std::string const& msg) { | |
| 2404 | + std::cout << msg << std::endl; | |
| 2405 | + for (auto const& i1: conflicts) | |
| 2406 | + { | |
| 2407 | + std::cout << i1.first << ":" << std::endl; | |
| 2408 | + for (auto const& i2: i1.second) | |
| 2409 | + { | |
| 2410 | + std::cout << " " << i2.first << " -> " << i2.second | |
| 2411 | + << std::endl; | |
| 2412 | + } | |
| 2413 | + } | |
| 2414 | + }; | |
| 2415 | + | |
| 2416 | + r1.mergeResources(r2, &conflicts); | |
| 2417 | + show_conflicts("first merge"); | |
| 2418 | + auto r3 = r1.shallowCopy(); | |
| 2419 | + // Merge again. The direct object gets recopied. Everything | |
| 2420 | + // else is the same. | |
| 2421 | + r1.mergeResources(r2, &conflicts); | |
| 2422 | + show_conflicts("second merge"); | |
| 2423 | + | |
| 2424 | + // Make all resources in r2 direct. Then merge two more times. | |
| 2425 | + // We should get the one previously direct object copied one | |
| 2426 | + // time as an indirect object. | |
| 2427 | + r2.makeResourcesIndirect(pdf); | |
| 2428 | + r1.mergeResources(r2, &conflicts); | |
| 2429 | + show_conflicts("third merge"); | |
| 2430 | + r1.mergeResources(r2, &conflicts); | |
| 2431 | + show_conflicts("fourth merge"); | |
| 2432 | + | |
| 2433 | + // The only differences between /QTest and /QTest3 should be | |
| 2434 | + // the direct objects merged from r2. | |
| 2435 | + pdf.getTrailer().replaceKey("/QTest1", r1); | |
| 2436 | + pdf.getTrailer().replaceKey("/QTest2", r2); | |
| 2437 | + pdf.getTrailer().replaceKey("/QTest3", r3); | |
| 2376 | 2438 | QPDFWriter w(pdf, "a.pdf"); |
| 2439 | + w.setQDFMode(true); | |
| 2377 | 2440 | w.setStaticID(true); |
| 2378 | 2441 | w.write(); |
| 2379 | 2442 | } | ... | ... |