Commit 3d029fb17ef6b8ea9094394741f103608f698bad

Authored by Jay Berkenbilt
Committed by GitHub
2 parents f8fd7d60 903a8664

Merge pull request #730 from m-holger/allpages

Tidy QPDF::getAllPagesInternal and QPDF::pushInheritedAttributesToPageInternal
include/qpdf/QPDF.hh
... ... @@ -1240,7 +1240,6 @@ class QPDF
1240 1240  
1241 1241 void getAllPagesInternal(
1242 1242 QPDFObjectHandle cur_pages,
1243   - std::vector<QPDFObjectHandle>& result,
1244 1243 std::set<QPDFObjGen>& visited,
1245 1244 std::set<QPDFObjGen>& seen);
1246 1245 void insertPage(QPDFObjectHandle newpage, int pos);
... ... @@ -1627,10 +1626,8 @@ class QPDF
1627 1626 void pushInheritedAttributesToPageInternal(
1628 1627 QPDFObjectHandle,
1629 1628 std::map<std::string, std::vector<QPDFObjectHandle>>&,
1630   - std::vector<QPDFObjectHandle>& all_pages,
1631 1629 bool allow_changes,
1632   - bool warn_skipped_keys,
1633   - std::set<QPDFObjGen>& visited);
  1630 + bool warn_skipped_keys);
1634 1631 void updateObjectMaps(
1635 1632 ObjUser const& ou,
1636 1633 QPDFObjectHandle oh,
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -148,15 +148,11 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
148 148 // key_ancestors is a mapping of page attribute keys to a stack of
149 149 // Pages nodes that contain values for them.
150 150 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
151   - this->m->all_pages.clear();
152   - std::set<QPDFObjGen> visited;
153 151 pushInheritedAttributesToPageInternal(
154 152 this->m->trailer.getKey("/Root").getKey("/Pages"),
155 153 key_ancestors,
156   - this->m->all_pages,
157 154 allow_changes,
158   - warn_skipped_keys,
159   - visited);
  155 + warn_skipped_keys);
160 156 if (!key_ancestors.empty()) {
161 157 throw std::logic_error("key_ancestors not empty after"
162 158 " pushing inherited attributes to pages");
... ... @@ -169,154 +165,112 @@ void
169 165 QPDF::pushInheritedAttributesToPageInternal(
170 166 QPDFObjectHandle cur_pages,
171 167 std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors,
172   - std::vector<QPDFObjectHandle>& pages,
173 168 bool allow_changes,
174   - bool warn_skipped_keys,
175   - std::set<QPDFObjGen>& visited)
  169 + bool warn_skipped_keys)
176 170 {
177   - QPDFObjGen this_og = cur_pages.getObjGen();
178   - if (visited.count(this_og) > 0) {
179   - throw QPDFExc(
180   - qpdf_e_pages,
181   - this->m->file->getName(),
182   - this->m->last_object_description,
183   - 0,
184   - "Loop detected in /Pages structure (inherited attributes)");
185   - }
186   - visited.insert(this_og);
187   -
188   - if (!cur_pages.isDictionary()) {
189   - throw QPDFExc(
190   - qpdf_e_damaged_pdf,
191   - this->m->file->getName(),
192   - this->m->last_object_description,
193   - this->m->file->getLastOffset(),
194   - "invalid object in page tree");
195   - }
196   -
197   - // Extract the underlying dictionary object
198   - std::string type = cur_pages.getKey("/Type").getName();
199   -
200   - if (type == "/Pages") {
201   - // Make a list of inheritable keys. Only the keys /MediaBox,
202   - // /CropBox, /Resources, and /Rotate are inheritable
203   - // attributes. Push this object onto the stack of pages nodes
204   - // that have values for this attribute.
205   -
206   - std::set<std::string> inheritable_keys;
207   - for (auto const& key: cur_pages.getKeys()) {
208   - if ((key == "/MediaBox") || (key == "/CropBox") ||
209   - (key == "/Resources") || (key == "/Rotate")) {
210   - if (!allow_changes) {
211   - throw QPDFExc(
212   - qpdf_e_internal,
213   - this->m->file->getName(),
214   - this->m->last_object_description,
215   - this->m->file->getLastOffset(),
216   - "optimize detected an "
217   - "inheritable attribute when called "
218   - "in no-change mode");
219   - }
  171 + // Make a list of inheritable keys. Only the keys /MediaBox,
  172 + // /CropBox, /Resources, and /Rotate are inheritable
  173 + // attributes. Push this object onto the stack of pages nodes
  174 + // that have values for this attribute.
  175 +
  176 + std::set<std::string> inheritable_keys;
  177 + for (auto const& key: cur_pages.getKeys()) {
  178 + if ((key == "/MediaBox") || (key == "/CropBox") ||
  179 + (key == "/Resources") || (key == "/Rotate")) {
  180 + if (!allow_changes) {
  181 + throw QPDFExc(
  182 + qpdf_e_internal,
  183 + this->m->file->getName(),
  184 + this->m->last_object_description,
  185 + this->m->file->getLastOffset(),
  186 + "optimize detected an "
  187 + "inheritable attribute when called "
  188 + "in no-change mode");
  189 + }
220 190  
221   - // This is an inheritable resource
222   - inheritable_keys.insert(key);
223   - QPDFObjectHandle oh = cur_pages.getKey(key);
224   - QTC::TC(
225   - "qpdf",
226   - "QPDF opt direct pages resource",
227   - oh.isIndirect() ? 0 : 1);
228   - if (!oh.isIndirect()) {
229   - if (!oh.isScalar()) {
230   - // Replace shared direct object non-scalar
231   - // resources with indirect objects to avoid
232   - // copying large structures around.
233   - cur_pages.replaceKey(key, makeIndirectObject(oh));
234   - oh = cur_pages.getKey(key);
235   - } else {
236   - // It's okay to copy scalars.
237   - QTC::TC("qpdf", "QPDF opt inherited scalar");
238   - }
239   - }
240   - key_ancestors[key].push_back(oh);
241   - if (key_ancestors[key].size() > 1) {
242   - QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
243   - }
244   - // Remove this resource from this node. It will be
245   - // reattached at the page level.
246   - cur_pages.removeKey(key);
247   - } else if (!((key == "/Type") || (key == "/Parent") ||
248   - (key == "/Kids") || (key == "/Count"))) {
249   - // Warn when flattening, but not if the key is at the top
250   - // level (i.e. "/Parent" not set), as we don't change these;
251   - // but flattening removes intermediate /Pages nodes.
252   - if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
253   - QTC::TC("qpdf", "QPDF unknown key not inherited");
254   - setLastObjectDescription(
255   - "Pages object", cur_pages.getObjGen());
256   - warn(
257   - qpdf_e_pages,
258   - this->m->last_object_description,
259   - 0,
260   - ("Unknown key " + key +
261   - " in /Pages object"
262   - " is being discarded as a result of"
263   - " flattening the /Pages tree"));
  191 + // This is an inheritable resource
  192 + inheritable_keys.insert(key);
  193 + QPDFObjectHandle oh = cur_pages.getKey(key);
  194 + QTC::TC(
  195 + "qpdf",
  196 + "QPDF opt direct pages resource",
  197 + oh.isIndirect() ? 0 : 1);
  198 + if (!oh.isIndirect()) {
  199 + if (!oh.isScalar()) {
  200 + // Replace shared direct object non-scalar
  201 + // resources with indirect objects to avoid
  202 + // copying large structures around.
  203 + cur_pages.replaceKey(key, makeIndirectObject(oh));
  204 + oh = cur_pages.getKey(key);
  205 + } else {
  206 + // It's okay to copy scalars.
  207 + QTC::TC("qpdf", "QPDF opt inherited scalar");
264 208 }
265 209 }
  210 + key_ancestors[key].push_back(oh);
  211 + if (key_ancestors[key].size() > 1) {
  212 + QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
  213 + }
  214 + // Remove this resource from this node. It will be
  215 + // reattached at the page level.
  216 + cur_pages.removeKey(key);
  217 + } else if (!((key == "/Type") || (key == "/Parent") ||
  218 + (key == "/Kids") || (key == "/Count"))) {
  219 + // Warn when flattening, but not if the key is at the top
  220 + // level (i.e. "/Parent" not set), as we don't change these;
  221 + // but flattening removes intermediate /Pages nodes.
  222 + if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
  223 + QTC::TC("qpdf", "QPDF unknown key not inherited");
  224 + setLastObjectDescription("Pages object", cur_pages.getObjGen());
  225 + warn(
  226 + qpdf_e_pages,
  227 + this->m->last_object_description,
  228 + 0,
  229 + ("Unknown key " + key +
  230 + " in /Pages object"
  231 + " is being discarded as a result of"
  232 + " flattening the /Pages tree"));
  233 + }
266 234 }
  235 + }
267 236  
268   - // Visit descendant nodes.
269   - QPDFObjectHandle kids = cur_pages.getKey("/Kids");
270   - int n = kids.getArrayNItems();
271   - for (int i = 0; i < n; ++i) {
  237 + // Process descendant nodes.
  238 + for (auto& kid: cur_pages.getKey("/Kids").aitems()) {
  239 + if (kid.isDictionaryOfType("/Pages")) {
272 240 pushInheritedAttributesToPageInternal(
273   - kids.getArrayItem(i),
274   - key_ancestors,
275   - pages,
276   - allow_changes,
277   - warn_skipped_keys,
278   - visited);
279   - }
280   -
281   - // For each inheritable key, pop the stack. If the stack
282   - // becomes empty, remove it from the map. That way, the
283   - // invariant that the list of keys in key_ancestors is exactly
284   - // those keys for which inheritable attributes are available.
285   -
286   - if (!inheritable_keys.empty()) {
287   - QTC::TC("qpdf", "QPDF opt inheritable keys");
288   - for (auto const& key: inheritable_keys) {
289   - key_ancestors[key].pop_back();
290   - if (key_ancestors[key].empty()) {
291   - QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
292   - key_ancestors.erase(key);
  241 + kid, key_ancestors, allow_changes, warn_skipped_keys);
  242 + } else {
  243 + // Add all available inheritable attributes not present in
  244 + // this object to this object.
  245 + for (auto const& iter: key_ancestors) {
  246 + std::string const& key = iter.first;
  247 + if (!kid.hasKey(key)) {
  248 + QTC::TC("qpdf", "QPDF opt resource inherited");
  249 + kid.replaceKey(key, iter.second.back());
  250 + } else {
  251 + QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
293 252 }
294 253 }
295   - } else {
296   - QTC::TC("qpdf", "QPDF opt no inheritable keys");
297 254 }
298   - } else if (type == "/Page") {
299   - // Add all available inheritable attributes not present in
300   - // this object to this object.
301   - for (auto const& iter: key_ancestors) {
302   - std::string const& key = iter.first;
303   - if (!cur_pages.hasKey(key)) {
304   - QTC::TC("qpdf", "QPDF opt resource inherited");
305   - cur_pages.replaceKey(key, iter.second.back());
306   - } else {
307   - QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
  255 + }
  256 +
  257 + // For each inheritable key, pop the stack. If the stack
  258 + // becomes empty, remove it from the map. That way, the
  259 + // invariant that the list of keys in key_ancestors is exactly
  260 + // those keys for which inheritable attributes are available.
  261 +
  262 + if (!inheritable_keys.empty()) {
  263 + QTC::TC("qpdf", "QPDF opt inheritable keys");
  264 + for (auto const& key: inheritable_keys) {
  265 + key_ancestors[key].pop_back();
  266 + if (key_ancestors[key].empty()) {
  267 + QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
  268 + key_ancestors.erase(key);
308 269 }
309 270 }
310   - pages.push_back(cur_pages);
311 271 } else {
312   - throw QPDFExc(
313   - qpdf_e_damaged_pdf,
314   - this->m->file->getName(),
315   - this->m->last_object_description,
316   - this->m->file->getLastOffset(),
317   - "invalid Type " + type + " in page tree");
  272 + QTC::TC("qpdf", "QPDF opt no inheritable keys");
318 273 }
319   - visited.erase(this_og);
320 274 }
321 275  
322 276 void
... ...
libqpdf/QPDF_pages.cc
... ... @@ -82,7 +82,10 @@ QPDF::getAllPages()
82 82 getRoot().replaceKey("/Pages", pages);
83 83 }
84 84 seen.clear();
85   - getAllPagesInternal(pages, this->m->all_pages, visited, seen);
  85 + if (pages.hasKey("/Kids")) {
  86 + // Ensure we actually found a /Pages object.
  87 + getAllPagesInternal(pages, visited, seen);
  88 + }
86 89 }
87 90 return this->m->all_pages;
88 91 }
... ... @@ -90,12 +93,11 @@ QPDF::getAllPages()
90 93 void
91 94 QPDF::getAllPagesInternal(
92 95 QPDFObjectHandle cur_node,
93   - std::vector<QPDFObjectHandle>& result,
94 96 std::set<QPDFObjGen>& visited,
95 97 std::set<QPDFObjGen>& seen)
96 98 {
97   - QPDFObjGen this_og = cur_node.getObjGen();
98   - if (visited.count(this_og) > 0) {
  99 + QPDFObjGen cur_node_og = cur_node.getObjGen();
  100 + if (visited.count(cur_node_og) > 0) {
99 101 throw QPDFExc(
100 102 qpdf_e_pages,
101 103 this->m->file->getName(),
... ... @@ -103,14 +105,19 @@ QPDF::getAllPagesInternal(
103 105 0,
104 106 "Loop detected in /Pages structure (getAllPages)");
105 107 }
106   - visited.insert(this_og);
107   - std::string wanted_type;
108   - if (cur_node.hasKey("/Kids")) {
109   - wanted_type = "/Pages";
110   - QPDFObjectHandle kids = cur_node.getKey("/Kids");
111   - int n = kids.getArrayNItems();
112   - for (int i = 0; i < n; ++i) {
113   - QPDFObjectHandle kid = kids.getArrayItem(i);
  108 + visited.insert(cur_node_og);
  109 + if (!cur_node.isDictionaryOfType("/Pages")) {
  110 + cur_node.warnIfPossible(
  111 + "/Type key should be /Pages but is not; overriding");
  112 + cur_node.replaceKey("/Type", "/Pages"_qpdf);
  113 + }
  114 + auto kids = cur_node.getKey("/Kids");
  115 + int n = kids.getArrayNItems();
  116 + for (int i = 0; i < n; ++i) {
  117 + auto kid = kids.getArrayItem(i);
  118 + if (kid.hasKey("/Kids")) {
  119 + getAllPagesInternal(kid, visited, seen);
  120 + } else {
114 121 if (!kid.isIndirect()) {
115 122 QTC::TC("qpdf", "QPDF handle direct page object");
116 123 cur_node.warnIfPossible(
... ... @@ -129,23 +136,15 @@ QPDF::getAllPagesInternal(
129 136 kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
130 137 kids.setArrayItem(i, kid);
131 138 }
132   - getAllPagesInternal(kid, result, visited, seen);
  139 + if (!kid.isDictionaryOfType("/Page")) {
  140 + kid.warnIfPossible(
  141 + "/Type key should be /Page but is not; overriding");
  142 + kid.replaceKey("/Type", "/Page"_qpdf);
  143 + }
  144 + seen.insert(kid.getObjGen());
  145 + m->all_pages.push_back(kid);
133 146 }
134   - } else {
135   - wanted_type = "/Page";
136   - seen.insert(this_og);
137   - result.push_back(cur_node);
138   - }
139   -
140   - if (!cur_node.isDictionaryOfType(wanted_type)) {
141   - warn(
142   - qpdf_e_damaged_pdf,
143   - "page tree node",
144   - this->m->file->getLastOffset(),
145   - "/Type key should be " + wanted_type + " but is not; overriding");
146   - cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
147 147 }
148   - visited.erase(this_og);
149 148 }
150 149  
151 150 void
... ...
qpdf/qtest/qpdf/no-pages-types-fix.out
1   -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
2   -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
  1 +WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
  2 +WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
3 3 qpdf: operation succeeded with warnings; resulting file may have some problems
... ...
qpdf/qtest/qpdf/no-pages-types.out
... ... @@ -2,6 +2,6 @@ checking no-pages-types.pdf
2 2 PDF Version: 1.3
3 3 File is not encrypted
4 4 File is not linearized
5   -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
6   -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
  5 +WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
  6 +WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
7 7 qpdf: operation succeeded with warnings
... ...