Commit 0f07ecdd6cdd921b22daf64137ecfc33b780d5ab

Authored by m-holger
1 parent 50f21ae7

Create `Pages` class in `QPDF::Doc` and update references

Relocate `Pages` methods to `QPDF::Doc` for improved encapsulation of page-related logic. Adjust all references and methods to use the new placement.
include/qpdf/QPDF.hh
... ... @@ -827,6 +827,12 @@ class QPDF
827 827 int& O,
828 828 bool compressed);
829 829  
  830 + // Get a list of objects that would be permitted in an object stream.
  831 + template <typename T>
  832 + std::vector<T> getCompressibleObjGens();
  833 + std::vector<QPDFObjGen> getCompressibleObjVector();
  834 + std::vector<bool> getCompressibleObjSet();
  835 +
830 836 // methods to support page handling
831 837  
832 838 void getAllPagesInternal(
... ... @@ -927,12 +933,6 @@ class QPDF
927 933  
928 934 // Methods to support optimization
929 935  
930   - void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys);
931   - void pushInheritedAttributesToPageInternal(
932   - QPDFObjectHandle,
933   - std::map<std::string, std::vector<QPDFObjectHandle>>&,
934   - bool allow_changes,
935   - bool warn_skipped_keys);
936 936 void updateObjectMaps(
937 937 ObjUser const& ou,
938 938 QPDFObjectHandle oh,
... ...
libqpdf/QPDF.cc
... ... @@ -181,6 +181,7 @@ QPDF::QPDFVersion()
181 181 QPDF::Members::Members(QPDF& qpdf) :
182 182 doc(qpdf, *this),
183 183 objects(doc.objects()),
  184 + pages(doc.pages()),
184 185 log(QPDFLogger::defaultLogger()),
185 186 file(new InvalidInputSource()),
186 187 encp(new EncryptionParameters)
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -7,6 +7,8 @@
7 7 #include <qpdf/QPDFWriter_private.hh>
8 8 #include <qpdf/QTC.hh>
9 9  
  10 +using Pages = QPDF::Doc::Pages;
  11 +
10 12 QPDF::ObjUser::ObjUser(user_e type) :
11 13 ou_type(type)
12 14 {
... ... @@ -86,14 +88,13 @@ QPDF::optimize_internal(
86 88 if (root.getKey("/Outlines").isDictionary()) {
87 89 QPDFObjectHandle outlines = root.getKey("/Outlines");
88 90 if (!outlines.isIndirect()) {
89   - QTC::TC("qpdf", "QPDF_optimization indirect outlines");
90 91 root.replaceKey("/Outlines", makeIndirectObject(outlines));
91 92 }
92 93 }
93 94  
94 95 // Traverse pages tree pushing all inherited resources down to the page level. This also
95 96 // initializes m->all_pages.
96   - pushInheritedAttributesToPage(allow_changes, false);
  97 + m->pages.pushInheritedAttributesToPage(allow_changes, false);
97 98  
98 99 // Traverse pages
99 100 size_t n = m->all_pages.size();
... ... @@ -136,11 +137,11 @@ void
136 137 QPDF::pushInheritedAttributesToPage()
137 138 {
138 139 // Public API should not have access to allow_changes.
139   - pushInheritedAttributesToPage(true, false);
  140 + m->pages.pushInheritedAttributesToPage(true, false);
140 141 }
141 142  
142 143 void
143   -QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
  144 +Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
144 145 {
145 146 // Traverse pages tree pushing all inherited resources down to the page level.
146 147  
... ... @@ -152,7 +153,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
152 153  
153 154 // Calling getAllPages() resolves any duplicated page objects, repairs broken nodes, and detects
154 155 // loops, so we don't have to do those activities here.
155   - getAllPages();
  156 + qpdf.getAllPages();
156 157  
157 158 // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain
158 159 // values for them.
... ... @@ -171,7 +172,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
171 172 }
172 173  
173 174 void
174   -QPDF::pushInheritedAttributesToPageInternal(
  175 +Pages ::pushInheritedAttributesToPageInternal(
175 176 QPDFObjectHandle cur_pages,
176 177 std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors,
177 178 bool allow_changes,
... ... @@ -183,8 +184,7 @@ QPDF::pushInheritedAttributesToPageInternal(
183 184  
184 185 std::set<std::string> inheritable_keys;
185 186 for (auto const& key: cur_pages.getKeys()) {
186   - if ((key == "/MediaBox") || (key == "/CropBox") || (key == "/Resources") ||
187   - (key == "/Rotate")) {
  187 + if (key == "/MediaBox" || key == "/CropBox" || key == "/Resources" || key == "/Rotate") {
188 188 if (!allow_changes) {
189 189 throw QPDFExc(
190 190 qpdf_e_internal,
... ... @@ -197,21 +197,19 @@ QPDF::pushInheritedAttributesToPageInternal(
197 197 // This is an inheritable resource
198 198 inheritable_keys.insert(key);
199 199 QPDFObjectHandle oh = cur_pages.getKey(key);
200   - QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1);
201   - if (!oh.isIndirect()) {
  200 + QTC::TC("qpdf", "QPDF opt direct pages resource", oh.indirect() ? 0 : 1);
  201 + if (!oh.indirect()) {
202 202 if (!oh.isScalar()) {
203 203 // Replace shared direct object non-scalar resources with indirect objects to
204 204 // avoid copying large structures around.
205   - cur_pages.replaceKey(key, makeIndirectObject(oh));
  205 + cur_pages.replaceKey(key, qpdf.makeIndirectObject(oh));
206 206 oh = cur_pages.getKey(key);
207 207 } else {
208 208 // It's okay to copy scalars.
209   - QTC::TC("qpdf", "QPDF opt inherited scalar");
210 209 }
211 210 }
212 211 key_ancestors[key].push_back(oh);
213 212 if (key_ancestors[key].size() > 1) {
214   - QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
215 213 }
216 214 // Remove this resource from this node. It will be reattached at the page level.
217 215 cur_pages.removeKey(key);
... ... @@ -219,7 +217,7 @@ QPDF::pushInheritedAttributesToPageInternal(
219 217 // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not
220 218 // set), as we don't change these; but flattening removes intermediate /Pages nodes.
221 219 if (warn_skipped_keys && cur_pages.hasKey("/Parent")) {
222   - warn(
  220 + qpdf.warn(
223 221 qpdf_e_pages,
224 222 "Pages object: object " + cur_pages.id_gen().unparse(' '),
225 223 0,
... ... @@ -242,7 +240,6 @@ QPDF::pushInheritedAttributesToPageInternal(
242 240 for (auto const& iter: key_ancestors) {
243 241 std::string const& key = iter.first;
244 242 if (!kid.hasKey(key)) {
245   - QTC::TC("qpdf", "QPDF opt resource inherited");
246 243 kid.replaceKey(key, iter.second.back());
247 244 } else {
248 245 QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
... ... @@ -256,11 +253,9 @@ QPDF::pushInheritedAttributesToPageInternal(
256 253 // which inheritable attributes are available.
257 254  
258 255 if (!inheritable_keys.empty()) {
259   - QTC::TC("qpdf", "QPDF opt inheritable keys");
260 256 for (auto const& key: inheritable_keys) {
261 257 key_ancestors[key].pop_back();
262 258 if (key_ancestors[key].empty()) {
263   - QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
264 259 key_ancestors.erase(key);
265 260 }
266 261 }
... ...
libqpdf/QPDF_pages.cc
... ... @@ -37,6 +37,8 @@
37 37 // insertPage, and removePage, along with methods they call, are concerned with it. Everything else
38 38 // goes through one of those methods.
39 39  
  40 +using Pages = QPDF::Doc::Pages;
  41 +
40 42 std::vector<QPDFObjectHandle> const&
41 43 QPDF::getAllPages()
42 44 {
... ... @@ -75,14 +77,14 @@ QPDF::getAllPages()
75 77 qpdf_e_pages, m->file->getName(), "", 0, "root of pages tree has no /Kids array");
76 78 }
77 79 try {
78   - getAllPagesInternal(pages, visited, seen, false, false);
  80 + m->pages.getAllPagesInternal(pages, visited, seen, false, false);
79 81 } catch (...) {
80 82 m->all_pages.clear();
81 83 m->invalid_page_found = false;
82 84 throw;
83 85 }
84 86 if (m->invalid_page_found) {
85   - flattenPagesTree();
  87 + m->pages.flattenPagesTree();
86 88 m->invalid_page_found = false;
87 89 }
88 90 }
... ... @@ -90,7 +92,7 @@ QPDF::getAllPages()
90 92 }
91 93  
92 94 void
93   -QPDF::getAllPagesInternal(
  95 +Pages::getAllPagesInternal(
94 96 QPDFObjectHandle cur_node,
95 97 QPDFObjGen::set& visited,
96 98 QPDFObjGen::set& seen,
... ... @@ -139,17 +141,15 @@ QPDF::getAllPagesInternal(
139 141 continue;
140 142 }
141 143 if (!kid.isIndirect()) {
142   - QTC::TC("qpdf", "QPDF handle direct page object");
143 144 cur_node.warn(
144 145 "kid " + std::to_string(i) + " (from 0) is direct; converting to indirect");
145   - kid = makeIndirectObject(kid);
  146 + kid = qpdf.makeIndirectObject(kid);
146 147 ++errors;
147 148 }
148 149 if (kid.hasKey("/Kids")) {
149 150 getAllPagesInternal(kid, visited, seen, media_box, resources);
150 151 } else {
151 152 if (!media_box && !kid.getKey("/MediaBox").isRectangle()) {
152   - QTC::TC("qpdf", "QPDF missing mediabox");
153 153 kid.warn(
154 154 "kid " + std::to_string(i) +
155 155 " (from 0) MediaBox is undefined; setting to letter / ANSI A");
... ... @@ -193,7 +193,6 @@ QPDF::getAllPagesInternal(
193 193 if (!seen.add(kid)) {
194 194 // Make a copy of the page. This does the same as shallowCopyPage in
195 195 // QPDFPageObjectHelper.
196   - QTC::TC("qpdf", "QPDF resolve duplicated page object");
197 196 if (!m->reconstructed_xref) {
198 197 cur_node.warn(
199 198 "kid " + std::to_string(i) +
... ... @@ -201,7 +200,7 @@ QPDF::getAllPagesInternal(
201 200 " creating a new page object as a copy");
202 201 // This needs to be fixed. shallowCopy does not necessarily produce a valid
203 202 // page.
204   - kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
  203 + kid = qpdf.makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
205 204 seen.add(kid);
206 205 } else {
207 206 cur_node.warn(
... ... @@ -239,7 +238,6 @@ QPDF::updateAllPagesCache()
239 238 // Force regeneration of the pages cache. We force immediate recalculation of all_pages since
240 239 // users may have references to it that they got from calls to getAllPages(). We can defer
241 240 // recalculation of pageobj_to_pages_pos until needed.
242   - QTC::TC("qpdf", "QPDF updateAllPagesCache");
243 241 m->all_pages.clear();
244 242 m->pageobj_to_pages_pos.clear();
245 243 m->pushed_inherited_attributes_to_pages = false;
... ... @@ -247,7 +245,7 @@ QPDF::updateAllPagesCache()
247 245 }
248 246  
249 247 void
250   -QPDF::flattenPagesTree()
  248 +Pages::flattenPagesTree()
251 249 {
252 250 // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos.
253 251  
... ... @@ -259,7 +257,7 @@ QPDF::flattenPagesTree()
259 257 // generated.
260 258 pushInheritedAttributesToPage(true, true);
261 259  
262   - QPDFObjectHandle pages = getRoot().getKey("/Pages");
  260 + QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
263 261  
264 262 size_t const len = m->all_pages.size();
265 263 for (size_t pos = 0; pos < len; ++pos) {
... ... @@ -282,7 +280,7 @@ QPDF::flattenPagesTree()
282 280 }
283 281  
284 282 void
285   -QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate)
  283 +Pages::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate)
286 284 {
287 285 QPDFObjGen og(obj.getObjGen());
288 286 if (check_duplicate) {
... ... @@ -301,24 +299,22 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupli
301 299 }
302 300  
303 301 void
304   -QPDF::insertPage(QPDFObjectHandle newpage, int pos)
  302 +Pages::insertPage(QPDFObjectHandle newpage, int pos)
305 303 {
306 304 // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end.
307 305  
308 306 flattenPagesTree();
309 307  
310 308 if (!newpage.isIndirect()) {
311   - QTC::TC("qpdf", "QPDF insert non-indirect page");
312   - newpage = makeIndirectObject(newpage);
313   - } else if (newpage.getOwningQPDF() != this) {
314   - QTC::TC("qpdf", "QPDF insert foreign page");
  309 + newpage = qpdf.makeIndirectObject(newpage);
  310 + } else if (newpage.getOwningQPDF() != &qpdf) {
315 311 newpage.getQPDF().pushInheritedAttributesToPage();
316   - newpage = copyForeignObject(newpage);
  312 + newpage = qpdf.copyForeignObject(newpage);
317 313 } else {
318 314 QTC::TC("qpdf", "QPDF insert indirect page");
319 315 }
320 316  
321   - if ((pos < 0) || (toS(pos) > m->all_pages.size())) {
  317 + if (pos < 0 || toS(pos) > m->all_pages.size()) {
322 318 throw std::runtime_error("QPDF::insertPage called with pos out of range");
323 319 }
324 320  
... ... @@ -331,11 +327,10 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos)
331 327  
332 328 auto og = newpage.getObjGen();
333 329 if (m->pageobj_to_pages_pos.contains(og)) {
334   - QTC::TC("qpdf", "QPDF resolve duplicated page in insert");
335   - newpage = makeIndirectObject(QPDFObjectHandle(newpage).shallowCopy());
  330 + newpage = qpdf.makeIndirectObject(QPDFObjectHandle(newpage).shallowCopy());
336 331 }
337 332  
338   - QPDFObjectHandle pages = getRoot().getKey("/Pages");
  333 + QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
339 334 QPDFObjectHandle kids = pages.getKey("/Kids");
340 335  
341 336 newpage.replaceKey("/Parent", pages);
... ... @@ -369,7 +364,7 @@ QPDF::removePage(QPDFObjectHandle page)
369 364 m->all_pages.erase(m->all_pages.begin() + pos);
370 365 m->pageobj_to_pages_pos.erase(page.getObjGen());
371 366 for (int i = pos; i < npages; ++i) {
372   - insertPageobjToPage(m->all_pages.at(toS(i)), i, false);
  367 + m->pages.insertPageobjToPage(m->all_pages.at(toS(i)), i, false);
373 368 }
374 369 }
375 370  
... ... @@ -380,16 +375,17 @@ QPDF::addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage)
380 375 if (!before) {
381 376 ++refpos;
382 377 }
383   - insertPage(newpage, refpos);
  378 + m->pages.insertPage(newpage, refpos);
384 379 }
385 380  
386 381 void
387 382 QPDF::addPage(QPDFObjectHandle newpage, bool first)
388 383 {
389 384 if (first) {
390   - insertPage(newpage, 0);
  385 + m->pages.insertPage(newpage, 0);
391 386 } else {
392   - insertPage(newpage, getRoot().getKey("/Pages").getKey("/Count").getIntValueAsInt());
  387 + m->pages.insertPage(
  388 + newpage, getRoot().getKey("/Pages").getKey("/Count").getIntValueAsInt());
393 389 }
394 390 }
395 391  
... ... @@ -402,7 +398,7 @@ QPDF::findPage(QPDFObjectHandle&amp; page)
402 398 int
403 399 QPDF::findPage(QPDFObjGen og)
404 400 {
405   - flattenPagesTree();
  401 + m->pages.flattenPagesTree();
406 402 auto it = m->pageobj_to_pages_pos.find(og);
407 403 if (it == m->pageobj_to_pages_pos.end()) {
408 404 throw QPDFExc(
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -550,7 +550,45 @@ class QPDF::Doc
550 550 private:
551 551 QPDF& qpdf;
552 552 QPDF::Members* m;
553   - }; // class Objects
  553 + }; // class QPDF::Doc::Objects
  554 +
  555 + // This class is used to represent a PDF Pages tree.
  556 + class Pages
  557 + {
  558 + public:
  559 + Pages() = delete;
  560 + Pages(Pages const&) = delete;
  561 + Pages(Pages&&) = delete;
  562 + Pages& operator=(Pages const&) = delete;
  563 + Pages& operator=(Pages&&) = delete;
  564 + ~Pages() = default;
  565 +
  566 + Pages(QPDF& qpdf, QPDF::Members* m) :
  567 + qpdf(qpdf),
  568 + m(m)
  569 + {
  570 + }
  571 +
  572 + void getAllPagesInternal(
  573 + QPDFObjectHandle cur_pages,
  574 + QPDFObjGen::set& visited,
  575 + QPDFObjGen::set& seen,
  576 + bool media_box,
  577 + bool resources);
  578 + void insertPage(QPDFObjectHandle newpage, int pos);
  579 + void flattenPagesTree();
  580 + void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate);
  581 + void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys);
  582 + void pushInheritedAttributesToPageInternal(
  583 + QPDFObjectHandle,
  584 + std::map<std::string, std::vector<QPDFObjectHandle>>&,
  585 + bool allow_changes,
  586 + bool warn_skipped_keys);
  587 +
  588 + private:
  589 + QPDF& qpdf;
  590 + QPDF::Members* m;
  591 + }; // class QPDF::Doc::Pages
554 592  
555 593 // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
556 594 class StreamCopier
... ... @@ -575,7 +613,8 @@ class QPDF::Doc
575 613 Doc(QPDF& qpdf, QPDF::Members& m) :
576 614 qpdf(qpdf),
577 615 m(m),
578   - objects_(qpdf, &m)
  616 + objects_(qpdf, &m),
  617 + pages_(qpdf, &m)
579 618 {
580 619 }
581 620  
... ... @@ -585,6 +624,12 @@ class QPDF::Doc
585 624 return objects_;
586 625 };
587 626  
  627 + Pages&
  628 + pages()
  629 + {
  630 + return pages_;
  631 + }
  632 +
588 633 bool reconstructed_xref() const;
589 634  
590 635 QPDFAcroFormDocumentHelper&
... ... @@ -637,6 +682,7 @@ class QPDF::Doc
637 682 QPDF::Members& m;
638 683  
639 684 Objects objects_;
  685 + Pages pages_;
640 686  
641 687 // Document Helpers;
642 688 std::unique_ptr<QPDFAcroFormDocumentHelper> acroform_;
... ... @@ -659,6 +705,7 @@ class QPDF::Members
659 705 private:
660 706 Doc doc;
661 707 Doc::Objects& objects;
  708 + Doc::Pages& pages;
662 709 std::shared_ptr<QPDFLogger> log;
663 710 unsigned long long unique_id{0};
664 711 qpdf::Tokenizer tokenizer;
... ...
qpdf/qpdf.testcov
... ... @@ -2,12 +2,8 @@ ignored-scope: libtests
2 2 QPDF hint table length direct 0
3 3 QPDF P absent in lindict 1
4 4 QPDF opt direct pages resource 1
5   -QPDF opt inheritable keys 0
6 5 QPDF opt no inheritable keys 0
7   -QPDF opt erase empty key ancestor 0
8   -QPDF opt resource inherited 0
9 6 QPDF opt page resource hides ancestor 0
10   -QPDF opt key ancestors depth > 1 0
11 7 QPDF opt loop detected 0
12 8 QPDF categorize pagemode present 1
13 9 QPDF categorize pagemode outlines 1
... ... @@ -30,7 +26,6 @@ main QTest dictionary indirect 1
30 26 main QTest stream 0
31 27 QPDF lin write nshared_total > nshared_first_page 1
32 28 QPDFWriter encrypted hint stream 0
33   -QPDF opt inherited scalar 0
34 29 QPDF xref gen > 0 1
35 30 QPDF startxref more than 1024 before end 0
36 31 QPDFParser bad brace 0
... ... @@ -127,8 +122,6 @@ exercise processFile(FILE*) 0
127 122 exercise processMemoryFile 0
128 123 QPDF remove page 2
129 124 QPDF insert page 2
130   -QPDF updateAllPagesCache 0
131   -QPDF insert non-indirect page 0
132 125 QPDF insert indirect page 0
133 126 QPDF_Stream ERR shallow copy stream 0
134 127 QPDFObjectHandle newStream with string 0
... ... @@ -142,7 +135,6 @@ QPDF replace array 0
142 135 QPDF replace dictionary 0
143 136 QPDF replace stream 0
144 137 QPDF replace foreign indirect with null 0
145   -QPDF insert foreign page 0
146 138 QPDFWriter copy use_aes 1
147 139 QPDFParser indirect without context 0
148 140 QPDFObjectHandle trailing data in parse 0
... ... @@ -162,7 +154,6 @@ qpdf-c called qpdf_set_r6_encryption_parameters 0
162 154 QPDFObjectHandle EOF in inline image 0
163 155 QPDFObjectHandle inline image token 0
164 156 QPDF not caching overridden objstm object 0
165   -QPDF_optimization indirect outlines 0
166 157 QPDF xref space 2
167 158 QPDFJob pages range omitted in middle 0
168 159 QPDFWriter standard deterministic ID 1
... ... @@ -282,9 +273,6 @@ QPDFPageDocumentHelper ignore annotation with no appearance 0
282 273 QPDFFormFieldObjectHelper replaced BMC at EOF 0
283 274 QPDFFormFieldObjectHelper fallback Tf 0
284 275 QPDFPageObjectHelper copy shared attribute 1
285   -QPDF resolve duplicated page object 0
286   -QPDF handle direct page object 0
287   -QPDF missing mediabox 0
288 276 QPDF inherit mediabox 1
289 277 QPDFTokenizer finder found wrong word 0
290 278 QPDFTokenizer found EI by byte count 0
... ... @@ -402,7 +390,6 @@ QPDFAcroFormDocumentHelper /DA parse error 0
402 390 QPDFAcroFormDocumentHelper AP parse error 1
403 391 QPDFJob copy fields not this file 0
404 392 QPDFJob copy fields non-first from orig 0
405   -QPDF resolve duplicated page in insert 0
406 393 QPDFWriter exclude from object stream 0
407 394 QPDFJob weak crypto error 0
408 395 qpdf-c called qpdf_oh_is_initialized 0
... ...