Commit 3d029fb17ef6b8ea9094394741f103608f698bad

Authored by Jay Berkenbilt
Committed by GitHub
2 parents f8fd7d60 903a8664

Merge pull request #730 from m-holger/allpages

Tidy QPDF::getAllPagesInternal and QPDF::pushInheritedAttributesToPageInternal
include/qpdf/QPDF.hh
@@ -1240,7 +1240,6 @@ class QPDF @@ -1240,7 +1240,6 @@ class QPDF
1240 1240
1241 void getAllPagesInternal( 1241 void getAllPagesInternal(
1242 QPDFObjectHandle cur_pages, 1242 QPDFObjectHandle cur_pages,
1243 - std::vector<QPDFObjectHandle>& result,  
1244 std::set<QPDFObjGen>& visited, 1243 std::set<QPDFObjGen>& visited,
1245 std::set<QPDFObjGen>& seen); 1244 std::set<QPDFObjGen>& seen);
1246 void insertPage(QPDFObjectHandle newpage, int pos); 1245 void insertPage(QPDFObjectHandle newpage, int pos);
@@ -1627,10 +1626,8 @@ class QPDF @@ -1627,10 +1626,8 @@ class QPDF
1627 void pushInheritedAttributesToPageInternal( 1626 void pushInheritedAttributesToPageInternal(
1628 QPDFObjectHandle, 1627 QPDFObjectHandle,
1629 std::map<std::string, std::vector<QPDFObjectHandle>>&, 1628 std::map<std::string, std::vector<QPDFObjectHandle>>&,
1630 - std::vector<QPDFObjectHandle>& all_pages,  
1631 bool allow_changes, 1629 bool allow_changes,
1632 - bool warn_skipped_keys,  
1633 - std::set<QPDFObjGen>& visited); 1630 + bool warn_skipped_keys);
1634 void updateObjectMaps( 1631 void updateObjectMaps(
1635 ObjUser const& ou, 1632 ObjUser const& ou,
1636 QPDFObjectHandle oh, 1633 QPDFObjectHandle oh,
libqpdf/QPDF_optimization.cc
@@ -148,15 +148,11 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) @@ -148,15 +148,11 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
148 // key_ancestors is a mapping of page attribute keys to a stack of 148 // key_ancestors is a mapping of page attribute keys to a stack of
149 // Pages nodes that contain values for them. 149 // Pages nodes that contain values for them.
150 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; 150 std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
151 - this->m->all_pages.clear();  
152 - std::set<QPDFObjGen> visited;  
153 pushInheritedAttributesToPageInternal( 151 pushInheritedAttributesToPageInternal(
154 this->m->trailer.getKey("/Root").getKey("/Pages"), 152 this->m->trailer.getKey("/Root").getKey("/Pages"),
155 key_ancestors, 153 key_ancestors,
156 - this->m->all_pages,  
157 allow_changes, 154 allow_changes,
158 - warn_skipped_keys,  
159 - visited); 155 + warn_skipped_keys);
160 if (!key_ancestors.empty()) { 156 if (!key_ancestors.empty()) {
161 throw std::logic_error("key_ancestors not empty after" 157 throw std::logic_error("key_ancestors not empty after"
162 " pushing inherited attributes to pages"); 158 " pushing inherited attributes to pages");
@@ -169,154 +165,112 @@ void @@ -169,154 +165,112 @@ void
169 QPDF::pushInheritedAttributesToPageInternal( 165 QPDF::pushInheritedAttributesToPageInternal(
170 QPDFObjectHandle cur_pages, 166 QPDFObjectHandle cur_pages,
171 std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors, 167 std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors,
172 - std::vector<QPDFObjectHandle>& pages,  
173 bool allow_changes, 168 bool allow_changes,
174 - bool warn_skipped_keys,  
175 - std::set<QPDFObjGen>& visited) 169 + bool warn_skipped_keys)
176 { 170 {
177 - QPDFObjGen this_og = cur_pages.getObjGen();  
178 - if (visited.count(this_og) > 0) {  
179 - throw QPDFExc(  
180 - qpdf_e_pages,  
181 - this->m->file->getName(),  
182 - this->m->last_object_description,  
183 - 0,  
184 - "Loop detected in /Pages structure (inherited attributes)");  
185 - }  
186 - visited.insert(this_og);  
187 -  
188 - if (!cur_pages.isDictionary()) {  
189 - throw QPDFExc(  
190 - qpdf_e_damaged_pdf,  
191 - this->m->file->getName(),  
192 - this->m->last_object_description,  
193 - this->m->file->getLastOffset(),  
194 - "invalid object in page tree");  
195 - }  
196 -  
197 - // Extract the underlying dictionary object  
198 - std::string type = cur_pages.getKey("/Type").getName();  
199 -  
200 - if (type == "/Pages") {  
201 - // Make a list of inheritable keys. Only the keys /MediaBox,  
202 - // /CropBox, /Resources, and /Rotate are inheritable  
203 - // attributes. Push this object onto the stack of pages nodes  
204 - // that have values for this attribute.  
205 -  
206 - std::set<std::string> inheritable_keys;  
207 - for (auto const& key: cur_pages.getKeys()) {  
208 - if ((key == "/MediaBox") || (key == "/CropBox") ||  
209 - (key == "/Resources") || (key == "/Rotate")) {  
210 - if (!allow_changes) {  
211 - throw QPDFExc(  
212 - qpdf_e_internal,  
213 - this->m->file->getName(),  
214 - this->m->last_object_description,  
215 - this->m->file->getLastOffset(),  
216 - "optimize detected an "  
217 - "inheritable attribute when called "  
218 - "in no-change mode");  
219 - } 171 + // Make a list of inheritable keys. Only the keys /MediaBox,
  172 + // /CropBox, /Resources, and /Rotate are inheritable
  173 + // attributes. Push this object onto the stack of pages nodes
  174 + // that have values for this attribute.
  175 +
  176 + std::set<std::string> inheritable_keys;
  177 + for (auto const& key: cur_pages.getKeys()) {
  178 + if ((key == "/MediaBox") || (key == "/CropBox") ||
  179 + (key == "/Resources") || (key == "/Rotate")) {
  180 + if (!allow_changes) {
  181 + throw QPDFExc(
  182 + qpdf_e_internal,
  183 + this->m->file->getName(),
  184 + this->m->last_object_description,
  185 + this->m->file->getLastOffset(),
  186 + "optimize detected an "
  187 + "inheritable attribute when called "
  188 + "in no-change mode");
  189 + }
220 190
221 - // This is an inheritable resource  
222 - inheritable_keys.insert(key);  
223 - QPDFObjectHandle oh = cur_pages.getKey(key);  
224 - QTC::TC(  
225 - "qpdf",  
226 - "QPDF opt direct pages resource",  
227 - oh.isIndirect() ? 0 : 1);  
228 - if (!oh.isIndirect()) {  
229 - if (!oh.isScalar()) {  
230 - // Replace shared direct object non-scalar  
231 - // resources with indirect objects to avoid  
232 - // copying large structures around.  
233 - cur_pages.replaceKey(key, makeIndirectObject(oh));  
234 - oh = cur_pages.getKey(key);  
235 - } else {  
236 - // It's okay to copy scalars.  
237 - QTC::TC("qpdf", "QPDF opt inherited scalar");  
238 - }  
239 - }  
240 - key_ancestors[key].push_back(oh);  
241 - if (key_ancestors[key].size() > 1) {  
242 - QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");  
243 - }  
244 - // Remove this resource from this node. It will be  
245 - // reattached at the page level.  
246 - cur_pages.removeKey(key);  
247 - } else if (!((key == "/Type") || (key == "/Parent") ||  
248 - (key == "/Kids") || (key == "/Count"))) {  
249 - // Warn when flattening, but not if the key is at the top  
250 - // level (i.e. "/Parent" not set), as we don't change these;  
251 - // but flattening removes intermediate /Pages nodes.  
252 - if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {  
253 - QTC::TC("qpdf", "QPDF unknown key not inherited");  
254 - setLastObjectDescription(  
255 - "Pages object", cur_pages.getObjGen());  
256 - warn(  
257 - qpdf_e_pages,  
258 - this->m->last_object_description,  
259 - 0,  
260 - ("Unknown key " + key +  
261 - " in /Pages object"  
262 - " is being discarded as a result of"  
263 - " flattening the /Pages tree")); 191 + // This is an inheritable resource
  192 + inheritable_keys.insert(key);
  193 + QPDFObjectHandle oh = cur_pages.getKey(key);
  194 + QTC::TC(
  195 + "qpdf",
  196 + "QPDF opt direct pages resource",
  197 + oh.isIndirect() ? 0 : 1);
  198 + if (!oh.isIndirect()) {
  199 + if (!oh.isScalar()) {
  200 + // Replace shared direct object non-scalar
  201 + // resources with indirect objects to avoid
  202 + // copying large structures around.
  203 + cur_pages.replaceKey(key, makeIndirectObject(oh));
  204 + oh = cur_pages.getKey(key);
  205 + } else {
  206 + // It's okay to copy scalars.
  207 + QTC::TC("qpdf", "QPDF opt inherited scalar");
264 } 208 }
265 } 209 }
  210 + key_ancestors[key].push_back(oh);
  211 + if (key_ancestors[key].size() > 1) {
  212 + QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
  213 + }
  214 + // Remove this resource from this node. It will be
  215 + // reattached at the page level.
  216 + cur_pages.removeKey(key);
  217 + } else if (!((key == "/Type") || (key == "/Parent") ||
  218 + (key == "/Kids") || (key == "/Count"))) {
  219 + // Warn when flattening, but not if the key is at the top
  220 + // level (i.e. "/Parent" not set), as we don't change these;
  221 + // but flattening removes intermediate /Pages nodes.
  222 + if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
  223 + QTC::TC("qpdf", "QPDF unknown key not inherited");
  224 + setLastObjectDescription("Pages object", cur_pages.getObjGen());
  225 + warn(
  226 + qpdf_e_pages,
  227 + this->m->last_object_description,
  228 + 0,
  229 + ("Unknown key " + key +
  230 + " in /Pages object"
  231 + " is being discarded as a result of"
  232 + " flattening the /Pages tree"));
  233 + }
266 } 234 }
  235 + }
267 236
268 - // Visit descendant nodes.  
269 - QPDFObjectHandle kids = cur_pages.getKey("/Kids");  
270 - int n = kids.getArrayNItems();  
271 - for (int i = 0; i < n; ++i) { 237 + // Process descendant nodes.
  238 + for (auto& kid: cur_pages.getKey("/Kids").aitems()) {
  239 + if (kid.isDictionaryOfType("/Pages")) {
272 pushInheritedAttributesToPageInternal( 240 pushInheritedAttributesToPageInternal(
273 - kids.getArrayItem(i),  
274 - key_ancestors,  
275 - pages,  
276 - allow_changes,  
277 - warn_skipped_keys,  
278 - visited);  
279 - }  
280 -  
281 - // For each inheritable key, pop the stack. If the stack  
282 - // becomes empty, remove it from the map. That way, the  
283 - // invariant that the list of keys in key_ancestors is exactly  
284 - // those keys for which inheritable attributes are available.  
285 -  
286 - if (!inheritable_keys.empty()) {  
287 - QTC::TC("qpdf", "QPDF opt inheritable keys");  
288 - for (auto const& key: inheritable_keys) {  
289 - key_ancestors[key].pop_back();  
290 - if (key_ancestors[key].empty()) {  
291 - QTC::TC("qpdf", "QPDF opt erase empty key ancestor");  
292 - key_ancestors.erase(key); 241 + kid, key_ancestors, allow_changes, warn_skipped_keys);
  242 + } else {
  243 + // Add all available inheritable attributes not present in
  244 + // this object to this object.
  245 + for (auto const& iter: key_ancestors) {
  246 + std::string const& key = iter.first;
  247 + if (!kid.hasKey(key)) {
  248 + QTC::TC("qpdf", "QPDF opt resource inherited");
  249 + kid.replaceKey(key, iter.second.back());
  250 + } else {
  251 + QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
293 } 252 }
294 } 253 }
295 - } else {  
296 - QTC::TC("qpdf", "QPDF opt no inheritable keys");  
297 } 254 }
298 - } else if (type == "/Page") {  
299 - // Add all available inheritable attributes not present in  
300 - // this object to this object.  
301 - for (auto const& iter: key_ancestors) {  
302 - std::string const& key = iter.first;  
303 - if (!cur_pages.hasKey(key)) {  
304 - QTC::TC("qpdf", "QPDF opt resource inherited");  
305 - cur_pages.replaceKey(key, iter.second.back());  
306 - } else {  
307 - QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); 255 + }
  256 +
  257 + // For each inheritable key, pop the stack. If the stack
  258 + // becomes empty, remove it from the map. That way, the
  259 + // invariant that the list of keys in key_ancestors is exactly
  260 + // those keys for which inheritable attributes are available.
  261 +
  262 + if (!inheritable_keys.empty()) {
  263 + QTC::TC("qpdf", "QPDF opt inheritable keys");
  264 + for (auto const& key: inheritable_keys) {
  265 + key_ancestors[key].pop_back();
  266 + if (key_ancestors[key].empty()) {
  267 + QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
  268 + key_ancestors.erase(key);
308 } 269 }
309 } 270 }
310 - pages.push_back(cur_pages);  
311 } else { 271 } else {
312 - throw QPDFExc(  
313 - qpdf_e_damaged_pdf,  
314 - this->m->file->getName(),  
315 - this->m->last_object_description,  
316 - this->m->file->getLastOffset(),  
317 - "invalid Type " + type + " in page tree"); 272 + QTC::TC("qpdf", "QPDF opt no inheritable keys");
318 } 273 }
319 - visited.erase(this_og);  
320 } 274 }
321 275
322 void 276 void
libqpdf/QPDF_pages.cc
@@ -82,7 +82,10 @@ QPDF::getAllPages() @@ -82,7 +82,10 @@ QPDF::getAllPages()
82 getRoot().replaceKey("/Pages", pages); 82 getRoot().replaceKey("/Pages", pages);
83 } 83 }
84 seen.clear(); 84 seen.clear();
85 - getAllPagesInternal(pages, this->m->all_pages, visited, seen); 85 + if (pages.hasKey("/Kids")) {
  86 + // Ensure we actually found a /Pages object.
  87 + getAllPagesInternal(pages, visited, seen);
  88 + }
86 } 89 }
87 return this->m->all_pages; 90 return this->m->all_pages;
88 } 91 }
@@ -90,12 +93,11 @@ QPDF::getAllPages() @@ -90,12 +93,11 @@ QPDF::getAllPages()
90 void 93 void
91 QPDF::getAllPagesInternal( 94 QPDF::getAllPagesInternal(
92 QPDFObjectHandle cur_node, 95 QPDFObjectHandle cur_node,
93 - std::vector<QPDFObjectHandle>& result,  
94 std::set<QPDFObjGen>& visited, 96 std::set<QPDFObjGen>& visited,
95 std::set<QPDFObjGen>& seen) 97 std::set<QPDFObjGen>& seen)
96 { 98 {
97 - QPDFObjGen this_og = cur_node.getObjGen();  
98 - if (visited.count(this_og) > 0) { 99 + QPDFObjGen cur_node_og = cur_node.getObjGen();
  100 + if (visited.count(cur_node_og) > 0) {
99 throw QPDFExc( 101 throw QPDFExc(
100 qpdf_e_pages, 102 qpdf_e_pages,
101 this->m->file->getName(), 103 this->m->file->getName(),
@@ -103,14 +105,19 @@ QPDF::getAllPagesInternal( @@ -103,14 +105,19 @@ QPDF::getAllPagesInternal(
103 0, 105 0,
104 "Loop detected in /Pages structure (getAllPages)"); 106 "Loop detected in /Pages structure (getAllPages)");
105 } 107 }
106 - visited.insert(this_og);  
107 - std::string wanted_type;  
108 - if (cur_node.hasKey("/Kids")) {  
109 - wanted_type = "/Pages";  
110 - QPDFObjectHandle kids = cur_node.getKey("/Kids");  
111 - int n = kids.getArrayNItems();  
112 - for (int i = 0; i < n; ++i) {  
113 - QPDFObjectHandle kid = kids.getArrayItem(i); 108 + visited.insert(cur_node_og);
  109 + if (!cur_node.isDictionaryOfType("/Pages")) {
  110 + cur_node.warnIfPossible(
  111 + "/Type key should be /Pages but is not; overriding");
  112 + cur_node.replaceKey("/Type", "/Pages"_qpdf);
  113 + }
  114 + auto kids = cur_node.getKey("/Kids");
  115 + int n = kids.getArrayNItems();
  116 + for (int i = 0; i < n; ++i) {
  117 + auto kid = kids.getArrayItem(i);
  118 + if (kid.hasKey("/Kids")) {
  119 + getAllPagesInternal(kid, visited, seen);
  120 + } else {
114 if (!kid.isIndirect()) { 121 if (!kid.isIndirect()) {
115 QTC::TC("qpdf", "QPDF handle direct page object"); 122 QTC::TC("qpdf", "QPDF handle direct page object");
116 cur_node.warnIfPossible( 123 cur_node.warnIfPossible(
@@ -129,23 +136,15 @@ QPDF::getAllPagesInternal( @@ -129,23 +136,15 @@ QPDF::getAllPagesInternal(
129 kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); 136 kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
130 kids.setArrayItem(i, kid); 137 kids.setArrayItem(i, kid);
131 } 138 }
132 - getAllPagesInternal(kid, result, visited, seen); 139 + if (!kid.isDictionaryOfType("/Page")) {
  140 + kid.warnIfPossible(
  141 + "/Type key should be /Page but is not; overriding");
  142 + kid.replaceKey("/Type", "/Page"_qpdf);
  143 + }
  144 + seen.insert(kid.getObjGen());
  145 + m->all_pages.push_back(kid);
133 } 146 }
134 - } else {  
135 - wanted_type = "/Page";  
136 - seen.insert(this_og);  
137 - result.push_back(cur_node);  
138 - }  
139 -  
140 - if (!cur_node.isDictionaryOfType(wanted_type)) {  
141 - warn(  
142 - qpdf_e_damaged_pdf,  
143 - "page tree node",  
144 - this->m->file->getLastOffset(),  
145 - "/Type key should be " + wanted_type + " but is not; overriding");  
146 - cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));  
147 } 147 }
148 - visited.erase(this_og);  
149 } 148 }
150 149
151 void 150 void
qpdf/qtest/qpdf/no-pages-types-fix.out
1 -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding  
2 -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding 1 +WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
  2 +WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
3 qpdf: operation succeeded with warnings; resulting file may have some problems 3 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/no-pages-types.out
@@ -2,6 +2,6 @@ checking no-pages-types.pdf @@ -2,6 +2,6 @@ checking no-pages-types.pdf
2 PDF Version: 1.3 2 PDF Version: 1.3
3 File is not encrypted 3 File is not encrypted
4 File is not linearized 4 File is not linearized
5 -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding  
6 -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding 5 +WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
  6 +WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
7 qpdf: operation succeeded with warnings 7 qpdf: operation succeeded with warnings