Commit 95d6b17a8904b110c3625e66538368c433f48042

Authored by Jay Berkenbilt
1 parent 104fd6da

Add QPDFObjectHandle::mergeDictionary()

ChangeLog
1 1 2018-12-31 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * Add method QPDFObjectHandle::mergeDictionary(), which
  4 + recursively merges dictionaries with semantics designed for
  5 + merging resource dictionaries. See detailed description in
  6 + QPDFObjectHandle.hh.
  7 +
3 8 * Add QPDFObjectHandle::Matrix, similar to
4 9 QPDFObjectHandle::Rectangle, as a convenience class for
5 10 six-element arrays that are used as matrices.
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -559,6 +559,28 @@ class QPDFObjectHandle
559 559 QPDF_DLL
560 560 bool isOrHasName(std::string const&);
561 561  
  562 + // Merge dictionaries with the following behavior, where "object"
  563 + // refers to the object whose method is invoked, and "other"
  564 + // refers to the argument:
  565 + // * If either object or other is not a dictionary, do nothing
  566 + // * Otherwise
  567 + // * For each key in other
  568 + // * If key is absent in object, insert it
  569 + // * If key is present in object
  570 + // * If both values are dictionaries, merge the dictionary from
  571 + // other into the one from object
  572 + // * If both values are arrays, append scalar elements from
  573 + // other's that are not present in object's onto object's,
  574 + // and ignore non-scalar elements in other's
  575 + // * Otherwise ignore
  576 + // The primary purpose of this method is to facilitate merging of
  577 + // resource dictionaries. Conflicts are ignored. If needed, a
  578 + // future version of qpdf may provide some mechanism for conflict
  579 + // resolution, such as providing a handler that is invoked with
  580 + // the path to the conflict.
  581 + QPDF_DLL
  582 + void mergeDictionary(QPDFObjectHandle other);
  583 +
562 584 // Return the QPDF object that owns an indirect object. Returns
563 585 // null for a direct object.
564 586 QPDF_DLL
... ... @@ -970,6 +992,10 @@ class QPDFObjectHandle
970 992 ParserCallbacks* callbacks);
971 993 std::vector<QPDFObjectHandle> arrayOrStreamToStreamArray(
972 994 std::string const& description, std::string& all_description);
  995 + void mergeDictionaryInternal(
  996 + QPDFObjectHandle other,
  997 + std::set<QPDFObjGen>& visiting,
  998 + int depth);
973 999 static void warn(QPDF*, QPDFExc const&);
974 1000  
975 1001 class Members
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -825,6 +825,109 @@ QPDFObjectHandle::isOrHasName(std::string const&amp; value)
825 825 return false;
826 826 }
827 827  
  828 +void
  829 +QPDFObjectHandle::mergeDictionary(QPDFObjectHandle other)
  830 +{
  831 + std::set<QPDFObjGen> visiting;
  832 + mergeDictionaryInternal(other, visiting, 0);
  833 +}
  834 +
  835 +void
  836 +QPDFObjectHandle::mergeDictionaryInternal(
  837 + QPDFObjectHandle other,
  838 + std::set<QPDFObjGen>& visiting,
  839 + int depth)
  840 +{
  841 + if (depth > 100)
  842 + {
  843 + // Arbitrarily limit depth to avoid stack overflow
  844 + return;
  845 + }
  846 + if (! (isDictionary() && other.isDictionary()))
  847 + {
  848 + QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch");
  849 + return;
  850 + }
  851 + std::set<std::string> other_keys = other.getKeys();
  852 + for (std::set<std::string>::iterator iter = other_keys.begin();
  853 + iter != other_keys.end(); ++iter)
  854 + {
  855 + std::string const& key = *iter;
  856 + QPDFObjectHandle other_val = other.getKey(key);
  857 + if (hasKey(key))
  858 + {
  859 + QPDFObjectHandle this_val = getKey(key);
  860 + if (this_val.isDictionary() && other_val.isDictionary())
  861 + {
  862 + if (this_val.isIndirect() && other_val.isIndirect() &&
  863 + (this_val.getObjGen() == other_val.getObjGen()))
  864 + {
  865 + QTC::TC("qpdf", "QPDFObjectHandle merge equal indirect");
  866 + }
  867 + else if (this_val.isIndirect() &&
  868 + (visiting.count(this_val.getObjGen())))
  869 + {
  870 + QTC::TC("qpdf", "QPDFObjectHandle merge loop");
  871 + }
  872 + else
  873 + {
  874 + QPDFObjGen loop;
  875 + if (this_val.isIndirect())
  876 + {
  877 + loop = this_val.getObjGen();
  878 + visiting.insert(loop);
  879 + QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy");
  880 + this_val = this_val.shallowCopy();
  881 + replaceKey(key, this_val);
  882 + }
  883 + QTC::TC("qpdf", "QPDFObjectHandle nested merge");
  884 + this_val.mergeDictionaryInternal(
  885 + other_val, visiting, 1 + depth);
  886 + if (loop.getObj())
  887 + {
  888 + visiting.erase(loop);
  889 + }
  890 + }
  891 + }
  892 + else if (this_val.isArray() && other_val.isArray())
  893 + {
  894 + std::set<std::string> scalars;
  895 + int n = this_val.getArrayNItems();
  896 + for (int i = 0; i < n; ++i)
  897 + {
  898 + QPDFObjectHandle this_item = this_val.getArrayItem(i);
  899 + if (this_item.isScalar())
  900 + {
  901 + scalars.insert(this_item.unparse());
  902 + }
  903 + }
  904 + n = other_val.getArrayNItems();
  905 + for (int i = 0; i < n; ++i)
  906 + {
  907 + QPDFObjectHandle other_item = other_val.getArrayItem(i);
  908 + if (other_item.isScalar())
  909 + {
  910 + if (scalars.count(other_item.unparse()) == 0)
  911 + {
  912 + QTC::TC("qpdf", "QPDFObjectHandle merge array");
  913 + this_val.appendItem(other_item);
  914 + }
  915 + else
  916 + {
  917 + QTC::TC("qpdf", "QPDFObjectHandle merge array dup");
  918 + }
  919 + }
  920 + }
  921 + }
  922 + }
  923 + else
  924 + {
  925 + QTC::TC("qpdf", "QPDFObjectHandle merge copy from other");
  926 + replaceKey(key, other_val);
  927 + }
  928 + }
  929 +}
  930 +
828 931 // Indirect object accessors
829 932 QPDF*
830 933 QPDFObjectHandle::getOwningQPDF()
... ...
qpdf/qpdf.testcov
... ... @@ -369,3 +369,11 @@ QPDFOutlineDocumentHelper string named dest 0
369 369 QPDFOutlineObjectHelper loop 0
370 370 qpdf required parameter 0
371 371 qpdf required choices 0
  372 +QPDFObjectHandle merge top type mismatch 0
  373 +QPDFObjectHandle merge shallow copy 0
  374 +QPDFObjectHandle nested merge 0
  375 +QPDFObjectHandle merge array 0
  376 +QPDFObjectHandle merge array dup 0
  377 +QPDFObjectHandle merge copy from other 0
  378 +QPDFObjectHandle merge loop 0
  379 +QPDFObjectHandle merge equal indirect 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -895,6 +895,16 @@ $td-&gt;runtest(&quot;detect foreign object in write&quot;,
895 895  
896 896 show_ntests();
897 897 # ----------
  898 +$td->notify("--- Merge Dictionary ---");
  899 +$n_tests += 1;
  900 +
  901 +$td->runtest("merge dictionary",
  902 + {$td->COMMAND => "test_driver 50 merge-dict.pdf"},
  903 + {$td->FILE => "merge-dict.out", $td->EXIT_STATUS => 0},
  904 + $td->NORMALIZE_NEWLINES);
  905 +
  906 +show_ntests();
  907 +# ----------
898 908 $td->notify("--- Parsing ---");
899 909 $n_tests += 17;
900 910  
... ...
qpdf/qtest/qpdf/merge-dict.out 0 โ†’ 100644
  1 +{
  2 + "/k1": "scalar1",
  3 + "/k2": 16059,
  4 + "/k3": {
  5 + "/a": "a",
  6 + "/b": "conflict: seen",
  7 + "/c": [
  8 + 2,
  9 + 3,
  10 + 1
  11 + ],
  12 + "/d": {
  13 + "/x": 24,
  14 + "/y": 25,
  15 + "/z": 26
  16 + },
  17 + "/e": "e"
  18 + },
  19 + "/k4": {
  20 + "/A": 65,
  21 + "/B": 66,
  22 + "/C": 67,
  23 + "/indirect2": "8 0 R",
  24 + "/recursive": "9 0 R"
  25 + },
  26 + "/k5": [
  27 + "/one",
  28 + 2,
  29 + "three",
  30 + [
  31 + "/four"
  32 + ],
  33 + "two"
  34 + ]
  35 +}
  36 +test 50 done
... ...
qpdf/qtest/qpdf/merge-dict.pdf 0 โ†’ 100644
  1 +%PDF-1.3
  2 +%ยฟรทยขรพ
  3 +%QDF-1.0
  4 +
  5 +1 0 obj
  6 +<<
  7 + /Pages 2 0 R
  8 + /Type /Catalog
  9 +>>
  10 +endobj
  11 +
  12 +2 0 obj
  13 +<<
  14 + /Count 1
  15 + /Kids [
  16 + 3 0 R
  17 + ]
  18 + /Type /Pages
  19 +>>
  20 +endobj
  21 +
  22 +%% Page 1
  23 +3 0 obj
  24 +<<
  25 + /Contents 4 0 R
  26 + /MediaBox [
  27 + 0
  28 + 0
  29 + 612
  30 + 792
  31 + ]
  32 + /Parent 2 0 R
  33 + /Resources <<
  34 + /Font <<
  35 + /F1 6 0 R
  36 + >>
  37 + /ProcSet 7 0 R
  38 + >>
  39 + /Type /Page
  40 +>>
  41 +endobj
  42 +
  43 +%% Contents for page 1
  44 +4 0 obj
  45 +<<
  46 + /Length 5 0 R
  47 +>>
  48 +stream
  49 +BT
  50 + /F1 24 Tf
  51 + 72 720 Td
  52 + (Potato) Tj
  53 +ET
  54 +endstream
  55 +endobj
  56 +
  57 +5 0 obj
  58 +44
  59 +endobj
  60 +
  61 +6 0 obj
  62 +<<
  63 + /BaseFont /Helvetica
  64 + /Encoding /WinAnsiEncoding
  65 + /Name /F1
  66 + /Subtype /Type1
  67 + /Type /Font
  68 +>>
  69 +endobj
  70 +
  71 +7 0 obj
  72 +[
  73 + /PDF
  74 + /Text
  75 +]
  76 +endobj
  77 +
  78 +8 0 obj
  79 +<<
  80 + /a (a)
  81 + /b (b)
  82 + /c [1 2]
  83 + /d << /x 24 /y (not seen) >>
  84 +>>
  85 +endobj
  86 +
  87 +9 0 obj
  88 +<<
  89 + /A 65
  90 + /B 66
  91 + /indirect2 8 0 R
  92 + /recursive 9 0 R
  93 +>>
  94 +endobj
  95 +
  96 +xref
  97 +0 10
  98 +0000000000 65535 f
  99 +0000000025 00000 n
  100 +0000000079 00000 n
  101 +0000000161 00000 n
  102 +0000000376 00000 n
  103 +0000000475 00000 n
  104 +0000000494 00000 n
  105 +0000000612 00000 n
  106 +0000000647 00000 n
  107 +0000000729 00000 n
  108 +trailer <<
  109 + /Root 1 0 R
  110 + /Size 10
  111 + /ID [<f8c8da17f88e0dccac9f73ad9d0ee411><f8c8da17f88e0dccac9f73ad9d0ee411>]
  112 + /Dict1 <<
  113 + /k1 (scalar1)
  114 + /k3 <<
  115 + /b (conflict: seen)
  116 + /c [2 3]
  117 + /d << /y 25 /z 26 >>
  118 + /e (e)
  119 + >>
  120 + /k4 9 0 R
  121 + /k5 [
  122 + /one
  123 + 2
  124 + (three)
  125 + [ /four ]
  126 + ]
  127 + >>
  128 + /Dict2 <<
  129 + /k1 (other: conflict: not seen)
  130 + /k2 16059
  131 + /k3 8 0 R
  132 + /k4 <<
  133 + /B (not seen)
  134 + /C 67
  135 + /indirect2 8 0 R
  136 + /recursive 8 0 R
  137 + >>
  138 + /k5 [
  139 + /one
  140 + (two)
  141 + << /six 6 >>
  142 + [ /five ]
  143 + ]
  144 + >>
  145 +>>
  146 +startxref
  147 +805
  148 +%%EOF
... ...
qpdf/test_driver.cc
... ... @@ -1754,6 +1754,17 @@ void runtest(int n, char const* filename1, char const* arg2)
1754 1754 }
1755 1755 }
1756 1756 }
  1757 + else if (n == 50)
  1758 + {
  1759 + // Test dictionary merge. This test is crafted to work with
  1760 + // merge-dict.pdf
  1761 + QPDFObjectHandle d1 = pdf.getTrailer().getKey("/Dict1");
  1762 + QPDFObjectHandle d2 = pdf.getTrailer().getKey("/Dict2");
  1763 + d1.mergeDictionary(d2);
  1764 + std::cout << d1.getJSON().unparse() << std::endl;
  1765 + // Top-level type mismatch
  1766 + d1.mergeDictionary(d2.getKey("/k1"));
  1767 + }
1757 1768 else
1758 1769 {
1759 1770 throw std::runtime_error(std::string("invalid test ") +
... ...