Commit 009767d97a0dfebbb9bb71efb4b894b25fb59dd8

Authored by Jay Berkenbilt
1 parent 2d32f4db

Handle inheritable page attributes

Add getAttribute for handling inheritable page attributes, and fix
getPageImages and annotation flattening code to use it.
ChangeLog
  1 +2019-01-25 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add new method QPDFPageObjectHelper::getAttribute() that
  4 + properly handles inherited attributes and allows for creation of a
  5 + copy of shared attributes. This is very useful if you are getting
  6 + an attribute of a page dictionary with the intent to modify it
  7 + privately for that page.
  8 +
  9 + * Fix QPDFPageObjectHelper::getPageImages (and the legacy
  10 + QPDFObjectHandle::getPageImages()) to properly handle images in
  11 + inherited resources dictionaries.
  12 +
1 13 2019-01-20 Jay Berkenbilt <ejb@ql.org>
2 14  
3 15 * Tweak the content code generated for variable text fields to
... ...
include/qpdf/QPDFPageObjectHelper.hh
... ... @@ -39,13 +39,22 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
39 39 {
40 40 }
41 41  
  42 + // Return the effective value of this attribute for the page. If
  43 + // the requested attribute is not present on the page but is
  44 + // inheritable, look up through the page's ancestors in the page
  45 + // tree. If copy_if_shared is true, then this method will replace
  46 + // the attribute with a shallow copy if it is in indirect or
  47 + // inherited and return the copy. You should do this if you are
  48 + // going to modify the returned object and want the modifications
  49 + // to apply to the current page only.
  50 + QPDF_DLL
  51 + QPDFObjectHandle
  52 + getAttribute(std::string const& name, bool copy_if_shared);
  53 +
42 54 // Returns an empty map if there are no images or no resources.
43   - // This function does not presently support inherited resources.
44   - // If this is a significant concern, call
45   - // pushInheritedAttributesToPage() on the QPDF object that owns
46   - // this page. See comment in the source for details. Return value
47   - // is a map from XObject name to the image object, which is always
48   - // a stream.
  55 + // Prior to qpdf 8.4.0, this function did not support inherited
  56 + // resources, but it does now. Return value is a map from XObject
  57 + // name to the image object, which is always a stream.
49 58 QPDF_DLL
50 59 std::map<std::string, QPDFObjectHandle> getPageImages();
51 60  
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -18,6 +18,7 @@
18 18 #include <qpdf/Pl_QPDFTokenizer.hh>
19 19 #include <qpdf/BufferInputSource.hh>
20 20 #include <qpdf/QPDFExc.hh>
  21 +#include <qpdf/QPDFPageObjectHelper.hh>
21 22  
22 23 #include <qpdf/QTC.hh>
23 24 #include <qpdf/QUtil.hh>
... ... @@ -1109,24 +1110,11 @@ QPDFObjectHandle::getGeneration() const
1109 1110 std::map<std::string, QPDFObjectHandle>
1110 1111 QPDFObjectHandle::getPageImages()
1111 1112 {
1112   - // Note: this code doesn't handle inherited resources. If this
1113   - // page dictionary doesn't have a /Resources key or has one whose
1114   - // value is null or an empty dictionary, you are supposed to walk
1115   - // up the page tree until you find a /Resources dictionary. As of
1116   - // this writing, I don't have any test files that use inherited
1117   - // resources, and hand-generating one won't be a good test because
1118   - // any mistakes in my understanding would be present in both the
1119   - // code and the test file.
1120   -
1121   - // NOTE: If support of inherited resources (see above comment) is
1122   - // implemented, edit comment in QPDFObjectHandle.hh for this
1123   - // function. Also remove call to pushInheritedAttributesToPage
1124   - // from qpdf.cc when show_page_images is true.
1125   -
1126 1113 std::map<std::string, QPDFObjectHandle> result;
1127   - if (this->hasKey("/Resources"))
  1114 + QPDFObjectHandle resources =
  1115 + QPDFPageObjectHelper(*this).getAttribute("/Resources", false);
  1116 + if (resources.isDictionary())
1128 1117 {
1129   - QPDFObjectHandle resources = this->getKey("/Resources");
1130 1118 if (resources.hasKey("/XObject"))
1131 1119 {
1132 1120 QPDFObjectHandle xobject = resources.getKey("/XObject");
... ...
libqpdf/QPDFPageDocumentHelper.cc
... ... @@ -79,20 +79,12 @@ QPDFPageDocumentHelper::flattenAnnotations(
79 79 "document does not have updated appearance streams,"
80 80 " so form fields will not be flattened");
81 81 }
82   - pushInheritedAttributesToPage();
83 82 std::vector<QPDFPageObjectHelper> pages = getAllPages();
84 83 for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
85 84 iter != pages.end(); ++iter)
86 85 {
87 86 QPDFPageObjectHelper ph(*iter);
88   - QPDFObjectHandle page_oh = ph.getObjectHandle();
89   - if (page_oh.getKey("/Resources").isIndirect())
90   - {
91   - QTC::TC("qpdf", "QPDFPageDocumentHelper indirect resources");
92   - page_oh.replaceKey("/Resources",
93   - page_oh.getKey("/Resources").shallowCopy());
94   - }
95   - QPDFObjectHandle resources = ph.getObjectHandle().getKey("/Resources");
  87 + QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
96 88 if (! resources.isDictionary())
97 89 {
98 90 // This should never happen and is not exercised in the
... ...
libqpdf/QPDFPageObjectHelper.cc
... ... @@ -15,6 +15,42 @@ QPDFPageObjectHelper::QPDFPageObjectHelper(QPDFObjectHandle oh) :
15 15 {
16 16 }
17 17  
  18 +QPDFObjectHandle
  19 +QPDFPageObjectHelper::getAttribute(std::string const& name,
  20 + bool copy_if_shared)
  21 +{
  22 + bool inheritable = ((name == "/MediaBox") || (name == "/CropBox") ||
  23 + (name == "/Resources") || (name == "/Rotate"));
  24 +
  25 + QPDFObjectHandle node = this->oh;
  26 + QPDFObjectHandle result(node.getKey(name));
  27 + std::set<QPDFObjGen> seen;
  28 + bool inherited = false;
  29 + while (inheritable && result.isNull() && node.hasKey("/Parent"))
  30 + {
  31 + seen.insert(node.getObjGen());
  32 + node = node.getKey("/Parent");
  33 + if (seen.count(node.getObjGen()))
  34 + {
  35 + break;
  36 + }
  37 + result = node.getKey(name);
  38 + if (! result.isNull())
  39 + {
  40 + QTC::TC("qpdf", "QPDFPageObjectHelper non-trivial inheritance");
  41 + inherited = true;
  42 + }
  43 + }
  44 + if (copy_if_shared && (inherited || result.isIndirect()))
  45 + {
  46 + QTC::TC("qpdf", "QPDFPageObjectHelper copy shared attribute");
  47 + result = result.shallowCopy();
  48 + this->oh.replaceKey(name, result);
  49 + }
  50 + return result;
  51 +}
  52 +
  53 +
18 54 std::map<std::string, QPDFObjectHandle>
19 55 QPDFPageObjectHelper::getPageImages()
20 56 {
... ... @@ -159,12 +195,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
159 195 std::vector<std::string> to_filter;
160 196 to_filter.push_back("/Font");
161 197 to_filter.push_back("/XObject");
162   - QPDFObjectHandle resources = this->oh.getKey("/Resources");
163   - if (resources.isDictionary())
164   - {
165   - resources = resources.shallowCopy();
166   - this->oh.replaceKey("/Resources", resources);
167   - }
  198 + QPDFObjectHandle resources = getAttribute("/Resources", true);
168 199 for (std::vector<std::string>::iterator d_iter = to_filter.begin();
169 200 d_iter != to_filter.end(); ++d_iter)
170 201 {
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -208,10 +208,10 @@ QPDF::pushInheritedAttributesToPageInternal2(
208 208  
209 209 if (type == "/Pages")
210 210 {
211   - // Make a list of inheritable keys. Any key other than /Type,
212   - // /Parent, Kids, or /Count is an inheritable attribute. Push
213   - // this object onto the stack of pages nodes that have values
214   - // for this attribute.
  211 + // Make a list of inheritable keys. Only the keys /MediaBox,
  212 + // /CropBox, /Resources, and /Rotate are inheritable
  213 + // attributes. Push this object onto the stack of pages nodes
  214 + // that have values for this attribute.
215 215  
216 216 std::set<std::string> inheritable_keys;
217 217 std::set<std::string> keys = cur_pages.getKeys();
... ...
qpdf/qpdf.cc
... ... @@ -3070,10 +3070,6 @@ static void do_show_obj(QPDF&amp; pdf, Options&amp; o, int&amp; exit_code)
3070 3070 static void do_show_pages(QPDF& pdf, Options& o)
3071 3071 {
3072 3072 QPDFPageDocumentHelper dh(pdf);
3073   - if (o.show_page_images)
3074   - {
3075   - dh.pushInheritedAttributesToPage();
3076   - }
3077 3073 std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
3078 3074 int pageno = 0;
3079 3075 for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
... ... @@ -3862,7 +3858,6 @@ static void handle_transformations(QPDF&amp; pdf, Options&amp; o)
3862 3858 QPDFPageDocumentHelper dh(pdf);
3863 3859 if (o.optimize_images)
3864 3860 {
3865   - dh.pushInheritedAttributesToPage();
3866 3861 int pageno = 0;
3867 3862 std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
3868 3863 for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
... ... @@ -3891,8 +3886,9 @@ static void handle_transformations(QPDF&amp; pdf, Options&amp; o)
3891 3886 sdp,
3892 3887 QPDFObjectHandle::newName("/DCTDecode"),
3893 3888 QPDFObjectHandle::newNull());
3894   - page.getKey("/Resources").getKey("/XObject").replaceKey(
3895   - name, new_image);
  3889 + ph.getAttribute("/Resources", true).
  3890 + getKey("/XObject").replaceKey(
  3891 + name, new_image);
3896 3892 }
3897 3893 }
3898 3894 }
... ... @@ -4054,7 +4050,6 @@ static void handle_page_specs(QPDF&amp; pdf, Options&amp; o)
4054 4050 cis->stayOpen(true);
4055 4051 }
4056 4052 QPDFPageDocumentHelper dh(*((*iter).second));
4057   - dh.pushInheritedAttributesToPage();
4058 4053 dh.removeUnreferencedResources();
4059 4054 if (cis)
4060 4055 {
... ... @@ -4532,7 +4527,6 @@ static void write_outfile(QPDF&amp; pdf, Options&amp; o)
4532 4527 if (! o.preserve_unreferenced_page_resources)
4533 4528 {
4534 4529 QPDFPageDocumentHelper dh(pdf);
4535   - dh.pushInheritedAttributesToPage();
4536 4530 dh.removeUnreferencedResources();
4537 4531 }
4538 4532 QPDFPageLabelDocumentHelper pldh(pdf);
... ...
qpdf/qpdf.testcov
... ... @@ -378,7 +378,6 @@ QPDFAnnotationObjectHelper default matrix 0
378 378 QPDFAnnotationObjectHelper rotate 90 0
379 379 QPDFAnnotationObjectHelper rotate 180 0
380 380 QPDFAnnotationObjectHelper rotate 270 0
381   -QPDFPageDocumentHelper indirect resources 0
382 381 QPDFPageDocumentHelper skip widget need appearances 0
383 382 QPDFPageDocumentHelper merge DR 0
384 383 QPDFPageDocumentHelper non-widget annotation 0
... ... @@ -426,3 +425,5 @@ QPDFFormFieldObjectHelper create AS from scratch 0
426 425 QPDFFormFieldObjectHelper create AP from scratch 0
427 426 QPDFFormFieldObjectHelper replaced BMC at EOF 0
428 427 QPDFFormFieldObjectHelper fallback Tf 0
  428 +QPDFPageObjectHelper non-trivial inheritance 0
  429 +QPDFPageObjectHelper copy shared attribute 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -2409,7 +2409,7 @@ $td-&gt;runtest(&quot;stream detected&quot;,
2409 2409 show_ntests();
2410 2410 # ----------
2411 2411 $td->notify("--- Extraction Tests ---");
2412   -$n_tests += 12;
  2412 +$n_tests += 13;
2413 2413  
2414 2414 $td->runtest("show xref",
2415 2415 {$td->COMMAND => "qpdf encrypted-with-images.pdf" .
... ... @@ -2432,6 +2432,13 @@ $td-&gt;runtest(&quot;show-pages-images&quot;,
2432 2432 $td->EXIT_STATUS => 0},
2433 2433 $td->NORMALIZE_NEWLINES);
2434 2434  
  2435 +$td->runtest("show-pages-images",
  2436 + {$td->COMMAND => "qpdf shared-images.pdf" .
  2437 + " --show-pages --with-images"},
  2438 + {$td->FILE => "shared-images-show.out",
  2439 + $td->EXIT_STATUS => 0},
  2440 + $td->NORMALIZE_NEWLINES);
  2441 +
2435 2442 $td->runtest("show-page-1",
2436 2443 {$td->COMMAND => "qpdf encrypted-with-images.pdf" .
2437 2444 " --show-object=5,0"},
... ...
qpdf/qtest/qpdf/appearances-a-more.pdf
No preview for this file type
qpdf/qtest/qpdf/appearances-a-more2.pdf
No preview for this file type
qpdf/qtest/qpdf/appearances-a.pdf
No preview for this file type
qpdf/qtest/qpdf/form-filled-by-acrobat-out.pdf
No preview for this file type
qpdf/qtest/qpdf/need-appearances-more-out.pdf
No preview for this file type
qpdf/qtest/qpdf/need-appearances-out.pdf
No preview for this file type
qpdf/qtest/qpdf/sample-form-out.pdf
No preview for this file type
qpdf/qtest/qpdf/shared-images-show.out 0 โ†’ 100644
  1 +page 1: 3 0 R
  2 + images:
  3 + /Im1: 17 0 R, 50 x 50
  4 + /Im10: 53 0 R, 50 x 50
  5 + /Im2: 21 0 R, 50 x 50
  6 + /Im3: 25 0 R, 50 x 50
  7 + /Im4: 29 0 R, 50 x 50
  8 + /Im5: 33 0 R, 50 x 50
  9 + /Im6: 37 0 R, 50 x 50
  10 + /Im7: 41 0 R, 50 x 50
  11 + /Im8: 45 0 R, 50 x 50
  12 + /Im9: 49 0 R, 50 x 50
  13 + content:
  14 + 13 0 R
  15 +page 2: 4 0 R
  16 + images:
  17 + /Im1: 17 0 R, 50 x 50
  18 + /Im10: 53 0 R, 50 x 50
  19 + /Im2: 21 0 R, 50 x 50
  20 + /Im3: 25 0 R, 50 x 50
  21 + /Im4: 29 0 R, 50 x 50
  22 + /Im5: 33 0 R, 50 x 50
  23 + /Im6: 37 0 R, 50 x 50
  24 + /Im7: 41 0 R, 50 x 50
  25 + /Im8: 45 0 R, 50 x 50
  26 + /Im9: 49 0 R, 50 x 50
  27 + content:
  28 + 19 0 R
  29 +page 3: 5 0 R
  30 + images:
  31 + /Im1: 17 0 R, 50 x 50
  32 + /Im10: 53 0 R, 50 x 50
  33 + /Im2: 21 0 R, 50 x 50
  34 + /Im3: 25 0 R, 50 x 50
  35 + /Im4: 29 0 R, 50 x 50
  36 + /Im5: 33 0 R, 50 x 50
  37 + /Im6: 37 0 R, 50 x 50
  38 + /Im7: 41 0 R, 50 x 50
  39 + /Im8: 45 0 R, 50 x 50
  40 + /Im9: 49 0 R, 50 x 50
  41 + content:
  42 + 23 0 R
  43 +page 4: 6 0 R
  44 + images:
  45 + /Im1: 17 0 R, 50 x 50
  46 + /Im10: 53 0 R, 50 x 50
  47 + /Im2: 21 0 R, 50 x 50
  48 + /Im3: 25 0 R, 50 x 50
  49 + /Im4: 29 0 R, 50 x 50
  50 + /Im5: 33 0 R, 50 x 50
  51 + /Im6: 37 0 R, 50 x 50
  52 + /Im7: 41 0 R, 50 x 50
  53 + /Im8: 45 0 R, 50 x 50
  54 + /Im9: 49 0 R, 50 x 50
  55 + content:
  56 + 27 0 R
  57 +page 5: 7 0 R
  58 + images:
  59 + /Im1: 17 0 R, 50 x 50
  60 + /Im10: 53 0 R, 50 x 50
  61 + /Im2: 21 0 R, 50 x 50
  62 + /Im3: 25 0 R, 50 x 50
  63 + /Im4: 29 0 R, 50 x 50
  64 + /Im5: 33 0 R, 50 x 50
  65 + /Im6: 37 0 R, 50 x 50
  66 + /Im7: 41 0 R, 50 x 50
  67 + /Im8: 45 0 R, 50 x 50
  68 + /Im9: 49 0 R, 50 x 50
  69 + content:
  70 + 31 0 R
  71 +page 6: 8 0 R
  72 + images:
  73 + /Im1: 17 0 R, 50 x 50
  74 + /Im10: 53 0 R, 50 x 50
  75 + /Im2: 21 0 R, 50 x 50
  76 + /Im3: 25 0 R, 50 x 50
  77 + /Im4: 29 0 R, 50 x 50
  78 + /Im5: 33 0 R, 50 x 50
  79 + /Im6: 37 0 R, 50 x 50
  80 + /Im7: 41 0 R, 50 x 50
  81 + /Im8: 45 0 R, 50 x 50
  82 + /Im9: 49 0 R, 50 x 50
  83 + content:
  84 + 35 0 R
  85 +page 7: 9 0 R
  86 + images:
  87 + /Im1: 17 0 R, 50 x 50
  88 + /Im10: 53 0 R, 50 x 50
  89 + /Im2: 21 0 R, 50 x 50
  90 + /Im3: 25 0 R, 50 x 50
  91 + /Im4: 29 0 R, 50 x 50
  92 + /Im5: 33 0 R, 50 x 50
  93 + /Im6: 37 0 R, 50 x 50
  94 + /Im7: 41 0 R, 50 x 50
  95 + /Im8: 45 0 R, 50 x 50
  96 + /Im9: 49 0 R, 50 x 50
  97 + content:
  98 + 39 0 R
  99 +page 8: 10 0 R
  100 + images:
  101 + /Im1: 17 0 R, 50 x 50
  102 + /Im10: 53 0 R, 50 x 50
  103 + /Im2: 21 0 R, 50 x 50
  104 + /Im3: 25 0 R, 50 x 50
  105 + /Im4: 29 0 R, 50 x 50
  106 + /Im5: 33 0 R, 50 x 50
  107 + /Im6: 37 0 R, 50 x 50
  108 + /Im7: 41 0 R, 50 x 50
  109 + /Im8: 45 0 R, 50 x 50
  110 + /Im9: 49 0 R, 50 x 50
  111 + content:
  112 + 43 0 R
  113 +page 9: 11 0 R
  114 + images:
  115 + /Im1: 17 0 R, 50 x 50
  116 + /Im10: 53 0 R, 50 x 50
  117 + /Im2: 21 0 R, 50 x 50
  118 + /Im3: 25 0 R, 50 x 50
  119 + /Im4: 29 0 R, 50 x 50
  120 + /Im5: 33 0 R, 50 x 50
  121 + /Im6: 37 0 R, 50 x 50
  122 + /Im7: 41 0 R, 50 x 50
  123 + /Im8: 45 0 R, 50 x 50
  124 + /Im9: 49 0 R, 50 x 50
  125 + content:
  126 + 47 0 R
  127 +page 10: 12 0 R
  128 + images:
  129 + /Im1: 17 0 R, 50 x 50
  130 + /Im10: 53 0 R, 50 x 50
  131 + /Im2: 21 0 R, 50 x 50
  132 + /Im3: 25 0 R, 50 x 50
  133 + /Im4: 29 0 R, 50 x 50
  134 + /Im5: 33 0 R, 50 x 50
  135 + /Im6: 37 0 R, 50 x 50
  136 + /Im7: 41 0 R, 50 x 50
  137 + /Im8: 45 0 R, 50 x 50
  138 + /Im9: 49 0 R, 50 x 50
  139 + content:
  140 + 51 0 R
... ...