Commit 95d6b17a8904b110c3625e66538368c433f48042
1 parent
104fd6da
Add QPDFObjectHandle::mergeDictionary()
Showing
8 changed files
with
347 additions
and
0 deletions
ChangeLog
| 1 | 2018-12-31 Jay Berkenbilt <ejb@ql.org> | 1 | 2018-12-31 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * Add method QPDFObjectHandle::mergeDictionary(), which | ||
| 4 | + recursively merges dictionaries with semantics designed for | ||
| 5 | + merging resource dictionaries. See detailed description in | ||
| 6 | + QPDFObjectHandle.hh. | ||
| 7 | + | ||
| 3 | * Add QPDFObjectHandle::Matrix, similar to | 8 | * Add QPDFObjectHandle::Matrix, similar to |
| 4 | QPDFObjectHandle::Rectangle, as a convenience class for | 9 | QPDFObjectHandle::Rectangle, as a convenience class for |
| 5 | six-element arrays that are used as matrices. | 10 | six-element arrays that are used as matrices. |
include/qpdf/QPDFObjectHandle.hh
| @@ -559,6 +559,28 @@ class QPDFObjectHandle | @@ -559,6 +559,28 @@ class QPDFObjectHandle | ||
| 559 | QPDF_DLL | 559 | QPDF_DLL |
| 560 | bool isOrHasName(std::string const&); | 560 | bool isOrHasName(std::string const&); |
| 561 | 561 | ||
| 562 | + // Merge dictionaries with the following behavior, where "object" | ||
| 563 | + // refers to the object whose method is invoked, and "other" | ||
| 564 | + // refers to the argument: | ||
| 565 | + // * If either object or other is not a dictionary, do nothing | ||
| 566 | + // * Otherwise | ||
| 567 | + // * For each key in other | ||
| 568 | + // * If key is absent in object, insert it | ||
| 569 | + // * If key is present in object | ||
| 570 | + // * If both values are dictionaries, merge the dictionary from | ||
| 571 | + // other into the one from object | ||
| 572 | + // * If both values are arrays, append scalar elements from | ||
| 573 | + // other's that are not present in object's onto object's, | ||
| 574 | + // and ignore non-scalar elements in other's | ||
| 575 | + // * Otherwise ignore | ||
| 576 | + // The primary purpose of this method is to facilitate merging of | ||
| 577 | + // resource dictionaries. Conflicts are ignored. If needed, a | ||
| 578 | + // future version of qpdf may provide some mechanism for conflict | ||
| 579 | + // resolution, such as providing a handler that is invoked with | ||
| 580 | + // the path to the conflict. | ||
| 581 | + QPDF_DLL | ||
| 582 | + void mergeDictionary(QPDFObjectHandle other); | ||
| 583 | + | ||
| 562 | // Return the QPDF object that owns an indirect object. Returns | 584 | // Return the QPDF object that owns an indirect object. Returns |
| 563 | // null for a direct object. | 585 | // null for a direct object. |
| 564 | QPDF_DLL | 586 | QPDF_DLL |
| @@ -970,6 +992,10 @@ class QPDFObjectHandle | @@ -970,6 +992,10 @@ class QPDFObjectHandle | ||
| 970 | ParserCallbacks* callbacks); | 992 | ParserCallbacks* callbacks); |
| 971 | std::vector<QPDFObjectHandle> arrayOrStreamToStreamArray( | 993 | std::vector<QPDFObjectHandle> arrayOrStreamToStreamArray( |
| 972 | std::string const& description, std::string& all_description); | 994 | std::string const& description, std::string& all_description); |
| 995 | + void mergeDictionaryInternal( | ||
| 996 | + QPDFObjectHandle other, | ||
| 997 | + std::set<QPDFObjGen>& visiting, | ||
| 998 | + int depth); | ||
| 973 | static void warn(QPDF*, QPDFExc const&); | 999 | static void warn(QPDF*, QPDFExc const&); |
| 974 | 1000 | ||
| 975 | class Members | 1001 | class Members |
libqpdf/QPDFObjectHandle.cc
| @@ -825,6 +825,109 @@ QPDFObjectHandle::isOrHasName(std::string const& value) | @@ -825,6 +825,109 @@ QPDFObjectHandle::isOrHasName(std::string const& value) | ||
| 825 | return false; | 825 | return false; |
| 826 | } | 826 | } |
| 827 | 827 | ||
| 828 | +void | ||
| 829 | +QPDFObjectHandle::mergeDictionary(QPDFObjectHandle other) | ||
| 830 | +{ | ||
| 831 | + std::set<QPDFObjGen> visiting; | ||
| 832 | + mergeDictionaryInternal(other, visiting, 0); | ||
| 833 | +} | ||
| 834 | + | ||
| 835 | +void | ||
| 836 | +QPDFObjectHandle::mergeDictionaryInternal( | ||
| 837 | + QPDFObjectHandle other, | ||
| 838 | + std::set<QPDFObjGen>& visiting, | ||
| 839 | + int depth) | ||
| 840 | +{ | ||
| 841 | + if (depth > 100) | ||
| 842 | + { | ||
| 843 | + // Arbitrarily limit depth to avoid stack overflow | ||
| 844 | + return; | ||
| 845 | + } | ||
| 846 | + if (! (isDictionary() && other.isDictionary())) | ||
| 847 | + { | ||
| 848 | + QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch"); | ||
| 849 | + return; | ||
| 850 | + } | ||
| 851 | + std::set<std::string> other_keys = other.getKeys(); | ||
| 852 | + for (std::set<std::string>::iterator iter = other_keys.begin(); | ||
| 853 | + iter != other_keys.end(); ++iter) | ||
| 854 | + { | ||
| 855 | + std::string const& key = *iter; | ||
| 856 | + QPDFObjectHandle other_val = other.getKey(key); | ||
| 857 | + if (hasKey(key)) | ||
| 858 | + { | ||
| 859 | + QPDFObjectHandle this_val = getKey(key); | ||
| 860 | + if (this_val.isDictionary() && other_val.isDictionary()) | ||
| 861 | + { | ||
| 862 | + if (this_val.isIndirect() && other_val.isIndirect() && | ||
| 863 | + (this_val.getObjGen() == other_val.getObjGen())) | ||
| 864 | + { | ||
| 865 | + QTC::TC("qpdf", "QPDFObjectHandle merge equal indirect"); | ||
| 866 | + } | ||
| 867 | + else if (this_val.isIndirect() && | ||
| 868 | + (visiting.count(this_val.getObjGen()))) | ||
| 869 | + { | ||
| 870 | + QTC::TC("qpdf", "QPDFObjectHandle merge loop"); | ||
| 871 | + } | ||
| 872 | + else | ||
| 873 | + { | ||
| 874 | + QPDFObjGen loop; | ||
| 875 | + if (this_val.isIndirect()) | ||
| 876 | + { | ||
| 877 | + loop = this_val.getObjGen(); | ||
| 878 | + visiting.insert(loop); | ||
| 879 | + QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy"); | ||
| 880 | + this_val = this_val.shallowCopy(); | ||
| 881 | + replaceKey(key, this_val); | ||
| 882 | + } | ||
| 883 | + QTC::TC("qpdf", "QPDFObjectHandle nested merge"); | ||
| 884 | + this_val.mergeDictionaryInternal( | ||
| 885 | + other_val, visiting, 1 + depth); | ||
| 886 | + if (loop.getObj()) | ||
| 887 | + { | ||
| 888 | + visiting.erase(loop); | ||
| 889 | + } | ||
| 890 | + } | ||
| 891 | + } | ||
| 892 | + else if (this_val.isArray() && other_val.isArray()) | ||
| 893 | + { | ||
| 894 | + std::set<std::string> scalars; | ||
| 895 | + int n = this_val.getArrayNItems(); | ||
| 896 | + for (int i = 0; i < n; ++i) | ||
| 897 | + { | ||
| 898 | + QPDFObjectHandle this_item = this_val.getArrayItem(i); | ||
| 899 | + if (this_item.isScalar()) | ||
| 900 | + { | ||
| 901 | + scalars.insert(this_item.unparse()); | ||
| 902 | + } | ||
| 903 | + } | ||
| 904 | + n = other_val.getArrayNItems(); | ||
| 905 | + for (int i = 0; i < n; ++i) | ||
| 906 | + { | ||
| 907 | + QPDFObjectHandle other_item = other_val.getArrayItem(i); | ||
| 908 | + if (other_item.isScalar()) | ||
| 909 | + { | ||
| 910 | + if (scalars.count(other_item.unparse()) == 0) | ||
| 911 | + { | ||
| 912 | + QTC::TC("qpdf", "QPDFObjectHandle merge array"); | ||
| 913 | + this_val.appendItem(other_item); | ||
| 914 | + } | ||
| 915 | + else | ||
| 916 | + { | ||
| 917 | + QTC::TC("qpdf", "QPDFObjectHandle merge array dup"); | ||
| 918 | + } | ||
| 919 | + } | ||
| 920 | + } | ||
| 921 | + } | ||
| 922 | + } | ||
| 923 | + else | ||
| 924 | + { | ||
| 925 | + QTC::TC("qpdf", "QPDFObjectHandle merge copy from other"); | ||
| 926 | + replaceKey(key, other_val); | ||
| 927 | + } | ||
| 928 | + } | ||
| 929 | +} | ||
| 930 | + | ||
| 828 | // Indirect object accessors | 931 | // Indirect object accessors |
| 829 | QPDF* | 932 | QPDF* |
| 830 | QPDFObjectHandle::getOwningQPDF() | 933 | QPDFObjectHandle::getOwningQPDF() |
qpdf/qpdf.testcov
| @@ -369,3 +369,11 @@ QPDFOutlineDocumentHelper string named dest 0 | @@ -369,3 +369,11 @@ QPDFOutlineDocumentHelper string named dest 0 | ||
| 369 | QPDFOutlineObjectHelper loop 0 | 369 | QPDFOutlineObjectHelper loop 0 |
| 370 | qpdf required parameter 0 | 370 | qpdf required parameter 0 |
| 371 | qpdf required choices 0 | 371 | qpdf required choices 0 |
| 372 | +QPDFObjectHandle merge top type mismatch 0 | ||
| 373 | +QPDFObjectHandle merge shallow copy 0 | ||
| 374 | +QPDFObjectHandle nested merge 0 | ||
| 375 | +QPDFObjectHandle merge array 0 | ||
| 376 | +QPDFObjectHandle merge array dup 0 | ||
| 377 | +QPDFObjectHandle merge copy from other 0 | ||
| 378 | +QPDFObjectHandle merge loop 0 | ||
| 379 | +QPDFObjectHandle merge equal indirect 0 |
qpdf/qtest/qpdf.test
| @@ -895,6 +895,16 @@ $td->runtest("detect foreign object in write", | @@ -895,6 +895,16 @@ $td->runtest("detect foreign object in write", | ||
| 895 | 895 | ||
| 896 | show_ntests(); | 896 | show_ntests(); |
| 897 | # ---------- | 897 | # ---------- |
| 898 | +$td->notify("--- Merge Dictionary ---"); | ||
| 899 | +$n_tests += 1; | ||
| 900 | + | ||
| 901 | +$td->runtest("merge dictionary", | ||
| 902 | + {$td->COMMAND => "test_driver 50 merge-dict.pdf"}, | ||
| 903 | + {$td->FILE => "merge-dict.out", $td->EXIT_STATUS => 0}, | ||
| 904 | + $td->NORMALIZE_NEWLINES); | ||
| 905 | + | ||
| 906 | +show_ntests(); | ||
| 907 | +# ---------- | ||
| 898 | $td->notify("--- Parsing ---"); | 908 | $td->notify("--- Parsing ---"); |
| 899 | $n_tests += 17; | 909 | $n_tests += 17; |
| 900 | 910 |
qpdf/qtest/qpdf/merge-dict.out
0 โ 100644
| 1 | +{ | ||
| 2 | + "/k1": "scalar1", | ||
| 3 | + "/k2": 16059, | ||
| 4 | + "/k3": { | ||
| 5 | + "/a": "a", | ||
| 6 | + "/b": "conflict: seen", | ||
| 7 | + "/c": [ | ||
| 8 | + 2, | ||
| 9 | + 3, | ||
| 10 | + 1 | ||
| 11 | + ], | ||
| 12 | + "/d": { | ||
| 13 | + "/x": 24, | ||
| 14 | + "/y": 25, | ||
| 15 | + "/z": 26 | ||
| 16 | + }, | ||
| 17 | + "/e": "e" | ||
| 18 | + }, | ||
| 19 | + "/k4": { | ||
| 20 | + "/A": 65, | ||
| 21 | + "/B": 66, | ||
| 22 | + "/C": 67, | ||
| 23 | + "/indirect2": "8 0 R", | ||
| 24 | + "/recursive": "9 0 R" | ||
| 25 | + }, | ||
| 26 | + "/k5": [ | ||
| 27 | + "/one", | ||
| 28 | + 2, | ||
| 29 | + "three", | ||
| 30 | + [ | ||
| 31 | + "/four" | ||
| 32 | + ], | ||
| 33 | + "two" | ||
| 34 | + ] | ||
| 35 | +} | ||
| 36 | +test 50 done |
qpdf/qtest/qpdf/merge-dict.pdf
0 โ 100644
| 1 | +%PDF-1.3 | ||
| 2 | +%ยฟรทยขรพ | ||
| 3 | +%QDF-1.0 | ||
| 4 | + | ||
| 5 | +1 0 obj | ||
| 6 | +<< | ||
| 7 | + /Pages 2 0 R | ||
| 8 | + /Type /Catalog | ||
| 9 | +>> | ||
| 10 | +endobj | ||
| 11 | + | ||
| 12 | +2 0 obj | ||
| 13 | +<< | ||
| 14 | + /Count 1 | ||
| 15 | + /Kids [ | ||
| 16 | + 3 0 R | ||
| 17 | + ] | ||
| 18 | + /Type /Pages | ||
| 19 | +>> | ||
| 20 | +endobj | ||
| 21 | + | ||
| 22 | +%% Page 1 | ||
| 23 | +3 0 obj | ||
| 24 | +<< | ||
| 25 | + /Contents 4 0 R | ||
| 26 | + /MediaBox [ | ||
| 27 | + 0 | ||
| 28 | + 0 | ||
| 29 | + 612 | ||
| 30 | + 792 | ||
| 31 | + ] | ||
| 32 | + /Parent 2 0 R | ||
| 33 | + /Resources << | ||
| 34 | + /Font << | ||
| 35 | + /F1 6 0 R | ||
| 36 | + >> | ||
| 37 | + /ProcSet 7 0 R | ||
| 38 | + >> | ||
| 39 | + /Type /Page | ||
| 40 | +>> | ||
| 41 | +endobj | ||
| 42 | + | ||
| 43 | +%% Contents for page 1 | ||
| 44 | +4 0 obj | ||
| 45 | +<< | ||
| 46 | + /Length 5 0 R | ||
| 47 | +>> | ||
| 48 | +stream | ||
| 49 | +BT | ||
| 50 | + /F1 24 Tf | ||
| 51 | + 72 720 Td | ||
| 52 | + (Potato) Tj | ||
| 53 | +ET | ||
| 54 | +endstream | ||
| 55 | +endobj | ||
| 56 | + | ||
| 57 | +5 0 obj | ||
| 58 | +44 | ||
| 59 | +endobj | ||
| 60 | + | ||
| 61 | +6 0 obj | ||
| 62 | +<< | ||
| 63 | + /BaseFont /Helvetica | ||
| 64 | + /Encoding /WinAnsiEncoding | ||
| 65 | + /Name /F1 | ||
| 66 | + /Subtype /Type1 | ||
| 67 | + /Type /Font | ||
| 68 | +>> | ||
| 69 | +endobj | ||
| 70 | + | ||
| 71 | +7 0 obj | ||
| 72 | +[ | ||
| 73 | |||
| 74 | + /Text | ||
| 75 | +] | ||
| 76 | +endobj | ||
| 77 | + | ||
| 78 | +8 0 obj | ||
| 79 | +<< | ||
| 80 | + /a (a) | ||
| 81 | + /b (b) | ||
| 82 | + /c [1 2] | ||
| 83 | + /d << /x 24 /y (not seen) >> | ||
| 84 | +>> | ||
| 85 | +endobj | ||
| 86 | + | ||
| 87 | +9 0 obj | ||
| 88 | +<< | ||
| 89 | + /A 65 | ||
| 90 | + /B 66 | ||
| 91 | + /indirect2 8 0 R | ||
| 92 | + /recursive 9 0 R | ||
| 93 | +>> | ||
| 94 | +endobj | ||
| 95 | + | ||
| 96 | +xref | ||
| 97 | +0 10 | ||
| 98 | +0000000000 65535 f | ||
| 99 | +0000000025 00000 n | ||
| 100 | +0000000079 00000 n | ||
| 101 | +0000000161 00000 n | ||
| 102 | +0000000376 00000 n | ||
| 103 | +0000000475 00000 n | ||
| 104 | +0000000494 00000 n | ||
| 105 | +0000000612 00000 n | ||
| 106 | +0000000647 00000 n | ||
| 107 | +0000000729 00000 n | ||
| 108 | +trailer << | ||
| 109 | + /Root 1 0 R | ||
| 110 | + /Size 10 | ||
| 111 | + /ID [<f8c8da17f88e0dccac9f73ad9d0ee411><f8c8da17f88e0dccac9f73ad9d0ee411>] | ||
| 112 | + /Dict1 << | ||
| 113 | + /k1 (scalar1) | ||
| 114 | + /k3 << | ||
| 115 | + /b (conflict: seen) | ||
| 116 | + /c [2 3] | ||
| 117 | + /d << /y 25 /z 26 >> | ||
| 118 | + /e (e) | ||
| 119 | + >> | ||
| 120 | + /k4 9 0 R | ||
| 121 | + /k5 [ | ||
| 122 | + /one | ||
| 123 | + 2 | ||
| 124 | + (three) | ||
| 125 | + [ /four ] | ||
| 126 | + ] | ||
| 127 | + >> | ||
| 128 | + /Dict2 << | ||
| 129 | + /k1 (other: conflict: not seen) | ||
| 130 | + /k2 16059 | ||
| 131 | + /k3 8 0 R | ||
| 132 | + /k4 << | ||
| 133 | + /B (not seen) | ||
| 134 | + /C 67 | ||
| 135 | + /indirect2 8 0 R | ||
| 136 | + /recursive 8 0 R | ||
| 137 | + >> | ||
| 138 | + /k5 [ | ||
| 139 | + /one | ||
| 140 | + (two) | ||
| 141 | + << /six 6 >> | ||
| 142 | + [ /five ] | ||
| 143 | + ] | ||
| 144 | + >> | ||
| 145 | +>> | ||
| 146 | +startxref | ||
| 147 | +805 | ||
| 148 | +%%EOF |
qpdf/test_driver.cc
| @@ -1754,6 +1754,17 @@ void runtest(int n, char const* filename1, char const* arg2) | @@ -1754,6 +1754,17 @@ void runtest(int n, char const* filename1, char const* arg2) | ||
| 1754 | } | 1754 | } |
| 1755 | } | 1755 | } |
| 1756 | } | 1756 | } |
| 1757 | + else if (n == 50) | ||
| 1758 | + { | ||
| 1759 | + // Test dictionary merge. This test is crafted to work with | ||
| 1760 | + // merge-dict.pdf | ||
| 1761 | + QPDFObjectHandle d1 = pdf.getTrailer().getKey("/Dict1"); | ||
| 1762 | + QPDFObjectHandle d2 = pdf.getTrailer().getKey("/Dict2"); | ||
| 1763 | + d1.mergeDictionary(d2); | ||
| 1764 | + std::cout << d1.getJSON().unparse() << std::endl; | ||
| 1765 | + // Top-level type mismatch | ||
| 1766 | + d1.mergeDictionary(d2.getKey("/k1")); | ||
| 1767 | + } | ||
| 1757 | else | 1768 | else |
| 1758 | { | 1769 | { |
| 1759 | throw std::runtime_error(std::string("invalid test ") + | 1770 | throw std::runtime_error(std::string("invalid test ") + |