Commit 3d029fb17ef6b8ea9094394741f103608f698bad
Committed by
GitHub
Merge pull request #730 from m-holger/allpages
Tidy QPDF::getAllPagesInternal and QPDF::pushInheritedAttributesToPageInternal
Showing
5 changed files
with
123 additions
and
173 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -1240,7 +1240,6 @@ class QPDF |
| 1240 | 1240 | |
| 1241 | 1241 | void getAllPagesInternal( |
| 1242 | 1242 | QPDFObjectHandle cur_pages, |
| 1243 | - std::vector<QPDFObjectHandle>& result, | |
| 1244 | 1243 | std::set<QPDFObjGen>& visited, |
| 1245 | 1244 | std::set<QPDFObjGen>& seen); |
| 1246 | 1245 | void insertPage(QPDFObjectHandle newpage, int pos); |
| ... | ... | @@ -1627,10 +1626,8 @@ class QPDF |
| 1627 | 1626 | void pushInheritedAttributesToPageInternal( |
| 1628 | 1627 | QPDFObjectHandle, |
| 1629 | 1628 | std::map<std::string, std::vector<QPDFObjectHandle>>&, |
| 1630 | - std::vector<QPDFObjectHandle>& all_pages, | |
| 1631 | 1629 | bool allow_changes, |
| 1632 | - bool warn_skipped_keys, | |
| 1633 | - std::set<QPDFObjGen>& visited); | |
| 1630 | + bool warn_skipped_keys); | |
| 1634 | 1631 | void updateObjectMaps( |
| 1635 | 1632 | ObjUser const& ou, |
| 1636 | 1633 | QPDFObjectHandle oh, | ... | ... |
libqpdf/QPDF_optimization.cc
| ... | ... | @@ -148,15 +148,11 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) |
| 148 | 148 | // key_ancestors is a mapping of page attribute keys to a stack of |
| 149 | 149 | // Pages nodes that contain values for them. |
| 150 | 150 | std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; |
| 151 | - this->m->all_pages.clear(); | |
| 152 | - std::set<QPDFObjGen> visited; | |
| 153 | 151 | pushInheritedAttributesToPageInternal( |
| 154 | 152 | this->m->trailer.getKey("/Root").getKey("/Pages"), |
| 155 | 153 | key_ancestors, |
| 156 | - this->m->all_pages, | |
| 157 | 154 | allow_changes, |
| 158 | - warn_skipped_keys, | |
| 159 | - visited); | |
| 155 | + warn_skipped_keys); | |
| 160 | 156 | if (!key_ancestors.empty()) { |
| 161 | 157 | throw std::logic_error("key_ancestors not empty after" |
| 162 | 158 | " pushing inherited attributes to pages"); |
| ... | ... | @@ -169,154 +165,112 @@ void |
| 169 | 165 | QPDF::pushInheritedAttributesToPageInternal( |
| 170 | 166 | QPDFObjectHandle cur_pages, |
| 171 | 167 | std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors, |
| 172 | - std::vector<QPDFObjectHandle>& pages, | |
| 173 | 168 | bool allow_changes, |
| 174 | - bool warn_skipped_keys, | |
| 175 | - std::set<QPDFObjGen>& visited) | |
| 169 | + bool warn_skipped_keys) | |
| 176 | 170 | { |
| 177 | - QPDFObjGen this_og = cur_pages.getObjGen(); | |
| 178 | - if (visited.count(this_og) > 0) { | |
| 179 | - throw QPDFExc( | |
| 180 | - qpdf_e_pages, | |
| 181 | - this->m->file->getName(), | |
| 182 | - this->m->last_object_description, | |
| 183 | - 0, | |
| 184 | - "Loop detected in /Pages structure (inherited attributes)"); | |
| 185 | - } | |
| 186 | - visited.insert(this_og); | |
| 187 | - | |
| 188 | - if (!cur_pages.isDictionary()) { | |
| 189 | - throw QPDFExc( | |
| 190 | - qpdf_e_damaged_pdf, | |
| 191 | - this->m->file->getName(), | |
| 192 | - this->m->last_object_description, | |
| 193 | - this->m->file->getLastOffset(), | |
| 194 | - "invalid object in page tree"); | |
| 195 | - } | |
| 196 | - | |
| 197 | - // Extract the underlying dictionary object | |
| 198 | - std::string type = cur_pages.getKey("/Type").getName(); | |
| 199 | - | |
| 200 | - if (type == "/Pages") { | |
| 201 | - // Make a list of inheritable keys. Only the keys /MediaBox, | |
| 202 | - // /CropBox, /Resources, and /Rotate are inheritable | |
| 203 | - // attributes. Push this object onto the stack of pages nodes | |
| 204 | - // that have values for this attribute. | |
| 205 | - | |
| 206 | - std::set<std::string> inheritable_keys; | |
| 207 | - for (auto const& key: cur_pages.getKeys()) { | |
| 208 | - if ((key == "/MediaBox") || (key == "/CropBox") || | |
| 209 | - (key == "/Resources") || (key == "/Rotate")) { | |
| 210 | - if (!allow_changes) { | |
| 211 | - throw QPDFExc( | |
| 212 | - qpdf_e_internal, | |
| 213 | - this->m->file->getName(), | |
| 214 | - this->m->last_object_description, | |
| 215 | - this->m->file->getLastOffset(), | |
| 216 | - "optimize detected an " | |
| 217 | - "inheritable attribute when called " | |
| 218 | - "in no-change mode"); | |
| 219 | - } | |
| 171 | + // Make a list of inheritable keys. Only the keys /MediaBox, | |
| 172 | + // /CropBox, /Resources, and /Rotate are inheritable | |
| 173 | + // attributes. Push this object onto the stack of pages nodes | |
| 174 | + // that have values for this attribute. | |
| 175 | + | |
| 176 | + std::set<std::string> inheritable_keys; | |
| 177 | + for (auto const& key: cur_pages.getKeys()) { | |
| 178 | + if ((key == "/MediaBox") || (key == "/CropBox") || | |
| 179 | + (key == "/Resources") || (key == "/Rotate")) { | |
| 180 | + if (!allow_changes) { | |
| 181 | + throw QPDFExc( | |
| 182 | + qpdf_e_internal, | |
| 183 | + this->m->file->getName(), | |
| 184 | + this->m->last_object_description, | |
| 185 | + this->m->file->getLastOffset(), | |
| 186 | + "optimize detected an " | |
| 187 | + "inheritable attribute when called " | |
| 188 | + "in no-change mode"); | |
| 189 | + } | |
| 220 | 190 | |
| 221 | - // This is an inheritable resource | |
| 222 | - inheritable_keys.insert(key); | |
| 223 | - QPDFObjectHandle oh = cur_pages.getKey(key); | |
| 224 | - QTC::TC( | |
| 225 | - "qpdf", | |
| 226 | - "QPDF opt direct pages resource", | |
| 227 | - oh.isIndirect() ? 0 : 1); | |
| 228 | - if (!oh.isIndirect()) { | |
| 229 | - if (!oh.isScalar()) { | |
| 230 | - // Replace shared direct object non-scalar | |
| 231 | - // resources with indirect objects to avoid | |
| 232 | - // copying large structures around. | |
| 233 | - cur_pages.replaceKey(key, makeIndirectObject(oh)); | |
| 234 | - oh = cur_pages.getKey(key); | |
| 235 | - } else { | |
| 236 | - // It's okay to copy scalars. | |
| 237 | - QTC::TC("qpdf", "QPDF opt inherited scalar"); | |
| 238 | - } | |
| 239 | - } | |
| 240 | - key_ancestors[key].push_back(oh); | |
| 241 | - if (key_ancestors[key].size() > 1) { | |
| 242 | - QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); | |
| 243 | - } | |
| 244 | - // Remove this resource from this node. It will be | |
| 245 | - // reattached at the page level. | |
| 246 | - cur_pages.removeKey(key); | |
| 247 | - } else if (!((key == "/Type") || (key == "/Parent") || | |
| 248 | - (key == "/Kids") || (key == "/Count"))) { | |
| 249 | - // Warn when flattening, but not if the key is at the top | |
| 250 | - // level (i.e. "/Parent" not set), as we don't change these; | |
| 251 | - // but flattening removes intermediate /Pages nodes. | |
| 252 | - if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { | |
| 253 | - QTC::TC("qpdf", "QPDF unknown key not inherited"); | |
| 254 | - setLastObjectDescription( | |
| 255 | - "Pages object", cur_pages.getObjGen()); | |
| 256 | - warn( | |
| 257 | - qpdf_e_pages, | |
| 258 | - this->m->last_object_description, | |
| 259 | - 0, | |
| 260 | - ("Unknown key " + key + | |
| 261 | - " in /Pages object" | |
| 262 | - " is being discarded as a result of" | |
| 263 | - " flattening the /Pages tree")); | |
| 191 | + // This is an inheritable resource | |
| 192 | + inheritable_keys.insert(key); | |
| 193 | + QPDFObjectHandle oh = cur_pages.getKey(key); | |
| 194 | + QTC::TC( | |
| 195 | + "qpdf", | |
| 196 | + "QPDF opt direct pages resource", | |
| 197 | + oh.isIndirect() ? 0 : 1); | |
| 198 | + if (!oh.isIndirect()) { | |
| 199 | + if (!oh.isScalar()) { | |
| 200 | + // Replace shared direct object non-scalar | |
| 201 | + // resources with indirect objects to avoid | |
| 202 | + // copying large structures around. | |
| 203 | + cur_pages.replaceKey(key, makeIndirectObject(oh)); | |
| 204 | + oh = cur_pages.getKey(key); | |
| 205 | + } else { | |
| 206 | + // It's okay to copy scalars. | |
| 207 | + QTC::TC("qpdf", "QPDF opt inherited scalar"); | |
| 264 | 208 | } |
| 265 | 209 | } |
| 210 | + key_ancestors[key].push_back(oh); | |
| 211 | + if (key_ancestors[key].size() > 1) { | |
| 212 | + QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); | |
| 213 | + } | |
| 214 | + // Remove this resource from this node. It will be | |
| 215 | + // reattached at the page level. | |
| 216 | + cur_pages.removeKey(key); | |
| 217 | + } else if (!((key == "/Type") || (key == "/Parent") || | |
| 218 | + (key == "/Kids") || (key == "/Count"))) { | |
| 219 | + // Warn when flattening, but not if the key is at the top | |
| 220 | + // level (i.e. "/Parent" not set), as we don't change these; | |
| 221 | + // but flattening removes intermediate /Pages nodes. | |
| 222 | + if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { | |
| 223 | + QTC::TC("qpdf", "QPDF unknown key not inherited"); | |
| 224 | + setLastObjectDescription("Pages object", cur_pages.getObjGen()); | |
| 225 | + warn( | |
| 226 | + qpdf_e_pages, | |
| 227 | + this->m->last_object_description, | |
| 228 | + 0, | |
| 229 | + ("Unknown key " + key + | |
| 230 | + " in /Pages object" | |
| 231 | + " is being discarded as a result of" | |
| 232 | + " flattening the /Pages tree")); | |
| 233 | + } | |
| 266 | 234 | } |
| 235 | + } | |
| 267 | 236 | |
| 268 | - // Visit descendant nodes. | |
| 269 | - QPDFObjectHandle kids = cur_pages.getKey("/Kids"); | |
| 270 | - int n = kids.getArrayNItems(); | |
| 271 | - for (int i = 0; i < n; ++i) { | |
| 237 | + // Process descendant nodes. | |
| 238 | + for (auto& kid: cur_pages.getKey("/Kids").aitems()) { | |
| 239 | + if (kid.isDictionaryOfType("/Pages")) { | |
| 272 | 240 | pushInheritedAttributesToPageInternal( |
| 273 | - kids.getArrayItem(i), | |
| 274 | - key_ancestors, | |
| 275 | - pages, | |
| 276 | - allow_changes, | |
| 277 | - warn_skipped_keys, | |
| 278 | - visited); | |
| 279 | - } | |
| 280 | - | |
| 281 | - // For each inheritable key, pop the stack. If the stack | |
| 282 | - // becomes empty, remove it from the map. That way, the | |
| 283 | - // invariant that the list of keys in key_ancestors is exactly | |
| 284 | - // those keys for which inheritable attributes are available. | |
| 285 | - | |
| 286 | - if (!inheritable_keys.empty()) { | |
| 287 | - QTC::TC("qpdf", "QPDF opt inheritable keys"); | |
| 288 | - for (auto const& key: inheritable_keys) { | |
| 289 | - key_ancestors[key].pop_back(); | |
| 290 | - if (key_ancestors[key].empty()) { | |
| 291 | - QTC::TC("qpdf", "QPDF opt erase empty key ancestor"); | |
| 292 | - key_ancestors.erase(key); | |
| 241 | + kid, key_ancestors, allow_changes, warn_skipped_keys); | |
| 242 | + } else { | |
| 243 | + // Add all available inheritable attributes not present in | |
| 244 | + // this object to this object. | |
| 245 | + for (auto const& iter: key_ancestors) { | |
| 246 | + std::string const& key = iter.first; | |
| 247 | + if (!kid.hasKey(key)) { | |
| 248 | + QTC::TC("qpdf", "QPDF opt resource inherited"); | |
| 249 | + kid.replaceKey(key, iter.second.back()); | |
| 250 | + } else { | |
| 251 | + QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); | |
| 293 | 252 | } |
| 294 | 253 | } |
| 295 | - } else { | |
| 296 | - QTC::TC("qpdf", "QPDF opt no inheritable keys"); | |
| 297 | 254 | } |
| 298 | - } else if (type == "/Page") { | |
| 299 | - // Add all available inheritable attributes not present in | |
| 300 | - // this object to this object. | |
| 301 | - for (auto const& iter: key_ancestors) { | |
| 302 | - std::string const& key = iter.first; | |
| 303 | - if (!cur_pages.hasKey(key)) { | |
| 304 | - QTC::TC("qpdf", "QPDF opt resource inherited"); | |
| 305 | - cur_pages.replaceKey(key, iter.second.back()); | |
| 306 | - } else { | |
| 307 | - QTC::TC("qpdf", "QPDF opt page resource hides ancestor"); | |
| 255 | + } | |
| 256 | + | |
| 257 | + // For each inheritable key, pop the stack. If the stack | |
| 258 | + // becomes empty, remove it from the map. That way, the | |
| 259 | + // invariant that the list of keys in key_ancestors is exactly | |
| 260 | + // those keys for which inheritable attributes are available. | |
| 261 | + | |
| 262 | + if (!inheritable_keys.empty()) { | |
| 263 | + QTC::TC("qpdf", "QPDF opt inheritable keys"); | |
| 264 | + for (auto const& key: inheritable_keys) { | |
| 265 | + key_ancestors[key].pop_back(); | |
| 266 | + if (key_ancestors[key].empty()) { | |
| 267 | + QTC::TC("qpdf", "QPDF opt erase empty key ancestor"); | |
| 268 | + key_ancestors.erase(key); | |
| 308 | 269 | } |
| 309 | 270 | } |
| 310 | - pages.push_back(cur_pages); | |
| 311 | 271 | } else { |
| 312 | - throw QPDFExc( | |
| 313 | - qpdf_e_damaged_pdf, | |
| 314 | - this->m->file->getName(), | |
| 315 | - this->m->last_object_description, | |
| 316 | - this->m->file->getLastOffset(), | |
| 317 | - "invalid Type " + type + " in page tree"); | |
| 272 | + QTC::TC("qpdf", "QPDF opt no inheritable keys"); | |
| 318 | 273 | } |
| 319 | - visited.erase(this_og); | |
| 320 | 274 | } |
| 321 | 275 | |
| 322 | 276 | void | ... | ... |
libqpdf/QPDF_pages.cc
| ... | ... | @@ -82,7 +82,10 @@ QPDF::getAllPages() |
| 82 | 82 | getRoot().replaceKey("/Pages", pages); |
| 83 | 83 | } |
| 84 | 84 | seen.clear(); |
| 85 | - getAllPagesInternal(pages, this->m->all_pages, visited, seen); | |
| 85 | + if (pages.hasKey("/Kids")) { | |
| 86 | + // Ensure we actually found a /Pages object. | |
| 87 | + getAllPagesInternal(pages, visited, seen); | |
| 88 | + } | |
| 86 | 89 | } |
| 87 | 90 | return this->m->all_pages; |
| 88 | 91 | } |
| ... | ... | @@ -90,12 +93,11 @@ QPDF::getAllPages() |
| 90 | 93 | void |
| 91 | 94 | QPDF::getAllPagesInternal( |
| 92 | 95 | QPDFObjectHandle cur_node, |
| 93 | - std::vector<QPDFObjectHandle>& result, | |
| 94 | 96 | std::set<QPDFObjGen>& visited, |
| 95 | 97 | std::set<QPDFObjGen>& seen) |
| 96 | 98 | { |
| 97 | - QPDFObjGen this_og = cur_node.getObjGen(); | |
| 98 | - if (visited.count(this_og) > 0) { | |
| 99 | + QPDFObjGen cur_node_og = cur_node.getObjGen(); | |
| 100 | + if (visited.count(cur_node_og) > 0) { | |
| 99 | 101 | throw QPDFExc( |
| 100 | 102 | qpdf_e_pages, |
| 101 | 103 | this->m->file->getName(), |
| ... | ... | @@ -103,14 +105,19 @@ QPDF::getAllPagesInternal( |
| 103 | 105 | 0, |
| 104 | 106 | "Loop detected in /Pages structure (getAllPages)"); |
| 105 | 107 | } |
| 106 | - visited.insert(this_og); | |
| 107 | - std::string wanted_type; | |
| 108 | - if (cur_node.hasKey("/Kids")) { | |
| 109 | - wanted_type = "/Pages"; | |
| 110 | - QPDFObjectHandle kids = cur_node.getKey("/Kids"); | |
| 111 | - int n = kids.getArrayNItems(); | |
| 112 | - for (int i = 0; i < n; ++i) { | |
| 113 | - QPDFObjectHandle kid = kids.getArrayItem(i); | |
| 108 | + visited.insert(cur_node_og); | |
| 109 | + if (!cur_node.isDictionaryOfType("/Pages")) { | |
| 110 | + cur_node.warnIfPossible( | |
| 111 | + "/Type key should be /Pages but is not; overriding"); | |
| 112 | + cur_node.replaceKey("/Type", "/Pages"_qpdf); | |
| 113 | + } | |
| 114 | + auto kids = cur_node.getKey("/Kids"); | |
| 115 | + int n = kids.getArrayNItems(); | |
| 116 | + for (int i = 0; i < n; ++i) { | |
| 117 | + auto kid = kids.getArrayItem(i); | |
| 118 | + if (kid.hasKey("/Kids")) { | |
| 119 | + getAllPagesInternal(kid, visited, seen); | |
| 120 | + } else { | |
| 114 | 121 | if (!kid.isIndirect()) { |
| 115 | 122 | QTC::TC("qpdf", "QPDF handle direct page object"); |
| 116 | 123 | cur_node.warnIfPossible( |
| ... | ... | @@ -129,23 +136,15 @@ QPDF::getAllPagesInternal( |
| 129 | 136 | kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); |
| 130 | 137 | kids.setArrayItem(i, kid); |
| 131 | 138 | } |
| 132 | - getAllPagesInternal(kid, result, visited, seen); | |
| 139 | + if (!kid.isDictionaryOfType("/Page")) { | |
| 140 | + kid.warnIfPossible( | |
| 141 | + "/Type key should be /Page but is not; overriding"); | |
| 142 | + kid.replaceKey("/Type", "/Page"_qpdf); | |
| 143 | + } | |
| 144 | + seen.insert(kid.getObjGen()); | |
| 145 | + m->all_pages.push_back(kid); | |
| 133 | 146 | } |
| 134 | - } else { | |
| 135 | - wanted_type = "/Page"; | |
| 136 | - seen.insert(this_og); | |
| 137 | - result.push_back(cur_node); | |
| 138 | - } | |
| 139 | - | |
| 140 | - if (!cur_node.isDictionaryOfType(wanted_type)) { | |
| 141 | - warn( | |
| 142 | - qpdf_e_damaged_pdf, | |
| 143 | - "page tree node", | |
| 144 | - this->m->file->getLastOffset(), | |
| 145 | - "/Type key should be " + wanted_type + " but is not; overriding"); | |
| 146 | - cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type)); | |
| 147 | 147 | } |
| 148 | - visited.erase(this_og); | |
| 149 | 148 | } |
| 150 | 149 | |
| 151 | 150 | void | ... | ... |
qpdf/qtest/qpdf/no-pages-types-fix.out
| 1 | -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding | |
| 2 | -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding | |
| 1 | +WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding | |
| 2 | +WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding | |
| 3 | 3 | qpdf: operation succeeded with warnings; resulting file may have some problems | ... | ... |
qpdf/qtest/qpdf/no-pages-types.out
| ... | ... | @@ -2,6 +2,6 @@ checking no-pages-types.pdf |
| 2 | 2 | PDF Version: 1.3 |
| 3 | 3 | File is not encrypted |
| 4 | 4 | File is not linearized |
| 5 | -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding | |
| 6 | -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding | |
| 5 | +WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding | |
| 6 | +WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding | |
| 7 | 7 | qpdf: operation succeeded with warnings | ... | ... |