Commit 9dea7d308069eeb5af3edd44198efa3d80ac83bf

Authored by m-holger
1 parent 4ccca20d

Tune QPDF::getAllPagesInternal

Avoid calling getAllPagesInternal for each /Page object.
libqpdf/QPDF_pages.cc
@@ -82,7 +82,10 @@ QPDF::getAllPages() @@ -82,7 +82,10 @@ QPDF::getAllPages()
82 getRoot().replaceKey("/Pages", pages); 82 getRoot().replaceKey("/Pages", pages);
83 } 83 }
84 seen.clear(); 84 seen.clear();
85 - getAllPagesInternal(pages, visited, seen); 85 + if (pages.hasKey("/Kids")) {
  86 + // Ensure we actually found a /Pages object.
  87 + getAllPagesInternal(pages, visited, seen);
  88 + }
86 } 89 }
87 return this->m->all_pages; 90 return this->m->all_pages;
88 } 91 }
@@ -93,8 +96,8 @@ QPDF::getAllPagesInternal( @@ -93,8 +96,8 @@ QPDF::getAllPagesInternal(
93 std::set<QPDFObjGen>& visited, 96 std::set<QPDFObjGen>& visited,
94 std::set<QPDFObjGen>& seen) 97 std::set<QPDFObjGen>& seen)
95 { 98 {
96 - QPDFObjGen this_og = cur_node.getObjGen();  
97 - if (visited.count(this_og) > 0) { 99 + QPDFObjGen cur_node_og = cur_node.getObjGen();
  100 + if (visited.count(cur_node_og) > 0) {
98 throw QPDFExc( 101 throw QPDFExc(
99 qpdf_e_pages, 102 qpdf_e_pages,
100 this->m->file->getName(), 103 this->m->file->getName(),
@@ -102,14 +105,22 @@ QPDF::getAllPagesInternal( @@ -102,14 +105,22 @@ QPDF::getAllPagesInternal(
102 0, 105 0,
103 "Loop detected in /Pages structure (getAllPages)"); 106 "Loop detected in /Pages structure (getAllPages)");
104 } 107 }
105 - visited.insert(this_og);  
106 - std::string wanted_type;  
107 - if (cur_node.hasKey("/Kids")) {  
108 - wanted_type = "/Pages";  
109 - QPDFObjectHandle kids = cur_node.getKey("/Kids");  
110 - int n = kids.getArrayNItems();  
111 - for (int i = 0; i < n; ++i) {  
112 - QPDFObjectHandle kid = kids.getArrayItem(i); 108 + visited.insert(cur_node_og);
  109 + if (!cur_node.isDictionaryOfType("/Pages")) {
  110 + warn(
  111 + qpdf_e_damaged_pdf,
  112 + "page tree node",
  113 + m->file->getLastOffset(),
  114 + "/Type key should be /Pages but is not; overriding");
  115 + cur_node.replaceKey("/Type", "/Pages"_qpdf);
  116 + }
  117 + auto kids = cur_node.getKey("/Kids");
  118 + int n = kids.getArrayNItems();
  119 + for (int i = 0; i < n; ++i) {
  120 + auto kid = kids.getArrayItem(i);
  121 + if (kid.hasKey("/Kids")) {
  122 + getAllPagesInternal(kid, visited, seen);
  123 + } else {
113 if (!kid.isIndirect()) { 124 if (!kid.isIndirect()) {
114 QTC::TC("qpdf", "QPDF handle direct page object"); 125 QTC::TC("qpdf", "QPDF handle direct page object");
115 cur_node.warnIfPossible( 126 cur_node.warnIfPossible(
@@ -128,23 +139,18 @@ QPDF::getAllPagesInternal( @@ -128,23 +139,18 @@ QPDF::getAllPagesInternal(
128 kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); 139 kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
129 kids.setArrayItem(i, kid); 140 kids.setArrayItem(i, kid);
130 } 141 }
131 - getAllPagesInternal(kid, visited, seen); 142 + if (!kid.isDictionaryOfType("/Page")) {
  143 + warn(
  144 + qpdf_e_damaged_pdf,
  145 + "page tree node",
  146 + this->m->file->getLastOffset(),
  147 + "/Type key should be /Page but is not; overriding");
  148 + kid.replaceKey("/Type", "/Page"_qpdf);
  149 + }
  150 + seen.insert(kid.getObjGen());
  151 + m->all_pages.push_back(kid);
132 } 152 }
133 - } else {  
134 - wanted_type = "/Page";  
135 - seen.insert(this_og);  
136 - m->all_pages.push_back(cur_node);  
137 - }  
138 -  
139 - if (!cur_node.isDictionaryOfType(wanted_type)) {  
140 - warn(  
141 - qpdf_e_damaged_pdf,  
142 - "page tree node",  
143 - this->m->file->getLastOffset(),  
144 - "/Type key should be " + wanted_type + " but is not; overriding");  
145 - cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));  
146 } 153 }
147 - visited.erase(this_og);  
148 } 154 }
149 155
150 void 156 void
qpdf/qtest/qpdf/no-pages-types-fix.out
1 -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding  
2 WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding 1 WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
  2 +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
3 qpdf: operation succeeded with warnings; resulting file may have some problems 3 qpdf: operation succeeded with warnings; resulting file may have some problems
qpdf/qtest/qpdf/no-pages-types.out
@@ -2,6 +2,6 @@ checking no-pages-types.pdf @@ -2,6 +2,6 @@ checking no-pages-types.pdf
2 PDF Version: 1.3 2 PDF Version: 1.3
3 File is not encrypted 3 File is not encrypted
4 File is not linearized 4 File is not linearized
  5 +WARNING: no-pages-types.pdf (page tree node, offset 135): /Type key should be /Pages but is not; overriding
5 WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding 6 WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
6 -WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding  
7 qpdf: operation succeeded with warnings 7 qpdf: operation succeeded with warnings