Commit 9dea7d308069eeb5af3edd44198efa3d80ac83bf
1 parent
4ccca20d
Tune QPDF::getAllPagesInternal
Avoid calling getAllPagesInternal for each /Page object.
Showing
3 changed files
with
34 additions
and
28 deletions
libqpdf/QPDF_pages.cc
| @@ -82,7 +82,10 @@ QPDF::getAllPages() | @@ -82,7 +82,10 @@ QPDF::getAllPages() | ||
| 82 | getRoot().replaceKey("/Pages", pages); | 82 | getRoot().replaceKey("/Pages", pages); |
| 83 | } | 83 | } |
| 84 | seen.clear(); | 84 | seen.clear(); |
| 85 | - getAllPagesInternal(pages, visited, seen); | 85 | + if (pages.hasKey("/Kids")) { |
| 86 | + // Ensure we actually found a /Pages object. | ||
| 87 | + getAllPagesInternal(pages, visited, seen); | ||
| 88 | + } | ||
| 86 | } | 89 | } |
| 87 | return this->m->all_pages; | 90 | return this->m->all_pages; |
| 88 | } | 91 | } |
| @@ -93,8 +96,8 @@ QPDF::getAllPagesInternal( | @@ -93,8 +96,8 @@ QPDF::getAllPagesInternal( | ||
| 93 | std::set<QPDFObjGen>& visited, | 96 | std::set<QPDFObjGen>& visited, |
| 94 | std::set<QPDFObjGen>& seen) | 97 | std::set<QPDFObjGen>& seen) |
| 95 | { | 98 | { |
| 96 | - QPDFObjGen this_og = cur_node.getObjGen(); | ||
| 97 | - if (visited.count(this_og) > 0) { | 99 | + QPDFObjGen cur_node_og = cur_node.getObjGen(); |
| 100 | + if (visited.count(cur_node_og) > 0) { | ||
| 98 | throw QPDFExc( | 101 | throw QPDFExc( |
| 99 | qpdf_e_pages, | 102 | qpdf_e_pages, |
| 100 | this->m->file->getName(), | 103 | this->m->file->getName(), |
| @@ -102,14 +105,22 @@ QPDF::getAllPagesInternal( | @@ -102,14 +105,22 @@ QPDF::getAllPagesInternal( | ||
| 102 | 0, | 105 | 0, |
| 103 | "Loop detected in /Pages structure (getAllPages)"); | 106 | "Loop detected in /Pages structure (getAllPages)"); |
| 104 | } | 107 | } |
| 105 | - visited.insert(this_og); | ||
| 106 | - std::string wanted_type; | ||
| 107 | - if (cur_node.hasKey("/Kids")) { | ||
| 108 | - wanted_type = "/Pages"; | ||
| 109 | - QPDFObjectHandle kids = cur_node.getKey("/Kids"); | ||
| 110 | - int n = kids.getArrayNItems(); | ||
| 111 | - for (int i = 0; i < n; ++i) { | ||
| 112 | - QPDFObjectHandle kid = kids.getArrayItem(i); | 108 | + visited.insert(cur_node_og); |
| 109 | + if (!cur_node.isDictionaryOfType("/Pages")) { | ||
| 110 | + warn( | ||
| 111 | + qpdf_e_damaged_pdf, | ||
| 112 | + "page tree node", | ||
| 113 | + m->file->getLastOffset(), | ||
| 114 | + "/Type key should be /Pages but is not; overriding"); | ||
| 115 | + cur_node.replaceKey("/Type", "/Pages"_qpdf); | ||
| 116 | + } | ||
| 117 | + auto kids = cur_node.getKey("/Kids"); | ||
| 118 | + int n = kids.getArrayNItems(); | ||
| 119 | + for (int i = 0; i < n; ++i) { | ||
| 120 | + auto kid = kids.getArrayItem(i); | ||
| 121 | + if (kid.hasKey("/Kids")) { | ||
| 122 | + getAllPagesInternal(kid, visited, seen); | ||
| 123 | + } else { | ||
| 113 | if (!kid.isIndirect()) { | 124 | if (!kid.isIndirect()) { |
| 114 | QTC::TC("qpdf", "QPDF handle direct page object"); | 125 | QTC::TC("qpdf", "QPDF handle direct page object"); |
| 115 | cur_node.warnIfPossible( | 126 | cur_node.warnIfPossible( |
| @@ -128,23 +139,18 @@ QPDF::getAllPagesInternal( | @@ -128,23 +139,18 @@ QPDF::getAllPagesInternal( | ||
| 128 | kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); | 139 | kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); |
| 129 | kids.setArrayItem(i, kid); | 140 | kids.setArrayItem(i, kid); |
| 130 | } | 141 | } |
| 131 | - getAllPagesInternal(kid, visited, seen); | 142 | + if (!kid.isDictionaryOfType("/Page")) { |
| 143 | + warn( | ||
| 144 | + qpdf_e_damaged_pdf, | ||
| 145 | + "page tree node", | ||
| 146 | + this->m->file->getLastOffset(), | ||
| 147 | + "/Type key should be /Page but is not; overriding"); | ||
| 148 | + kid.replaceKey("/Type", "/Page"_qpdf); | ||
| 149 | + } | ||
| 150 | + seen.insert(kid.getObjGen()); | ||
| 151 | + m->all_pages.push_back(kid); | ||
| 132 | } | 152 | } |
| 133 | - } else { | ||
| 134 | - wanted_type = "/Page"; | ||
| 135 | - seen.insert(this_og); | ||
| 136 | - m->all_pages.push_back(cur_node); | ||
| 137 | - } | ||
| 138 | - | ||
| 139 | - if (!cur_node.isDictionaryOfType(wanted_type)) { | ||
| 140 | - warn( | ||
| 141 | - qpdf_e_damaged_pdf, | ||
| 142 | - "page tree node", | ||
| 143 | - this->m->file->getLastOffset(), | ||
| 144 | - "/Type key should be " + wanted_type + " but is not; overriding"); | ||
| 145 | - cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type)); | ||
| 146 | } | 153 | } |
| 147 | - visited.erase(this_og); | ||
| 148 | } | 154 | } |
| 149 | 155 | ||
| 150 | void | 156 | void |
qpdf/qtest/qpdf/no-pages-types-fix.out
| 1 | -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding | ||
| 2 | WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding | 1 | WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding |
| 2 | +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding | ||
| 3 | qpdf: operation succeeded with warnings; resulting file may have some problems | 3 | qpdf: operation succeeded with warnings; resulting file may have some problems |
qpdf/qtest/qpdf/no-pages-types.out
| @@ -2,6 +2,6 @@ checking no-pages-types.pdf | @@ -2,6 +2,6 @@ checking no-pages-types.pdf | ||
| 2 | PDF Version: 1.3 | 2 | PDF Version: 1.3 |
| 3 | File is not encrypted | 3 | File is not encrypted |
| 4 | File is not linearized | 4 | File is not linearized |
| 5 | +WARNING: no-pages-types.pdf (page tree node, offset 135): /Type key should be /Pages but is not; overriding | ||
| 5 | WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding | 6 | WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding |
| 6 | -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding | ||
| 7 | qpdf: operation succeeded with warnings | 7 | qpdf: operation succeeded with warnings |