Commit e9e2eef5c31396e3bbe3a0e6c54e596fb1629b6f

Authored by m-holger
1 parent eb0fec00

Refactor `Pages` cache management: move `all_pages` and related fields from `Doc…

…` to `Pages`, update methods to use relocated fields, and streamline cache handling.
libqpdf/QPDF.cc
@@ -889,13 +889,13 @@ Common::damagedPDF(std::string const& message) const @@ -889,13 +889,13 @@ Common::damagedPDF(std::string const& message) const
889 bool 889 bool
890 QPDF::everCalledGetAllPages() const 890 QPDF::everCalledGetAllPages() const
891 { 891 {
892 - return m->ever_called_get_all_pages; 892 + return m->pages.ever_called_get_all_pages();
893 } 893 }
894 894
895 bool 895 bool
896 QPDF::everPushedInheritedAttributesToPages() const 896 QPDF::everPushedInheritedAttributesToPages() const
897 { 897 {
898 - return m->ever_pushed_inherited_attributes_to_pages; 898 + return m->pages.ever_pushed_inherited_attributes_to_pages();
899 } 899 }
900 900
901 void 901 void
libqpdf/QPDF_optimization.cc
@@ -96,12 +96,12 @@ Lin::optimize_internal( @@ -96,12 +96,12 @@ Lin::optimize_internal(
96 // Traverse pages tree pushing all inherited resources down to the page level. This also 96 // Traverse pages tree pushing all inherited resources down to the page level. This also
97 // initializes m->all_pages. 97 // initializes m->all_pages.
98 m->pages.pushInheritedAttributesToPage(allow_changes, false); 98 m->pages.pushInheritedAttributesToPage(allow_changes, false);
99 -  
100 // Traverse pages 99 // Traverse pages
101 - size_t n = m->all_pages.size();  
102 - for (size_t pageno = 0; pageno < n; ++pageno) {  
103 - updateObjectMaps(  
104 - ObjUser(ObjUser::ou_page, pageno), m->all_pages.at(pageno), skip_stream_parameters); 100 +
  101 + size_t n = 0;
  102 + for (auto const& page: m->pages.all()) {
  103 + updateObjectMaps(ObjUser(ObjUser::ou_page, n), page, skip_stream_parameters);
  104 + ++n;
105 } 105 }
106 106
107 // Traverse document-level items 107 // Traverse document-level items
libqpdf/QPDF_pages.cc
@@ -49,8 +49,8 @@ std::vector&lt;QPDFObjectHandle&gt; const&amp; @@ -49,8 +49,8 @@ std::vector&lt;QPDFObjectHandle&gt; const&amp;
49 Pages::all() 49 Pages::all()
50 { 50 {
51 // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages. 51 // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages.
52 - if (m->all_pages.empty() && !m->invalid_page_found) {  
53 - m->ever_called_get_all_pages = true; 52 + if (all_pages.empty() && !invalid_page_found) {
  53 + ever_called_get_all_pages_ = true;
54 auto root = qpdf.getRoot(); 54 auto root = qpdf.getRoot();
55 QPDFObjGen::set visited; 55 QPDFObjGen::set visited;
56 QPDFObjGen::set seen; 56 QPDFObjGen::set seen;
@@ -85,16 +85,16 @@ Pages::all() @@ -85,16 +85,16 @@ Pages::all()
85 try { 85 try {
86 getAllPagesInternal(pages, visited, seen, false, false); 86 getAllPagesInternal(pages, visited, seen, false, false);
87 } catch (...) { 87 } catch (...) {
88 - m->all_pages.clear();  
89 - m->invalid_page_found = false; 88 + all_pages.clear();
  89 + invalid_page_found = false;
90 throw; 90 throw;
91 } 91 }
92 - if (m->invalid_page_found) { 92 + if (invalid_page_found) {
93 flattenPagesTree(); 93 flattenPagesTree();
94 - m->invalid_page_found = false; 94 + invalid_page_found = false;
95 } 95 }
96 } 96 }
97 - return m->all_pages; 97 + return all_pages;
98 } 98 }
99 99
100 void 100 void
@@ -143,7 +143,7 @@ Pages::getAllPagesInternal( @@ -143,7 +143,7 @@ Pages::getAllPagesInternal(
143 143
144 if (!kid.isDictionary()) { 144 if (!kid.isDictionary()) {
145 kid.warn("Pages tree includes non-dictionary object; ignoring"); 145 kid.warn("Pages tree includes non-dictionary object; ignoring");
146 - m->invalid_page_found = true; 146 + invalid_page_found = true;
147 continue; 147 continue;
148 } 148 }
149 if (!kid.isIndirect()) { 149 if (!kid.isIndirect()) {
@@ -212,7 +212,7 @@ Pages::getAllPagesInternal( @@ -212,7 +212,7 @@ Pages::getAllPagesInternal(
212 cur_node.warn( 212 cur_node.warn(
213 "kid " + std::to_string(i) + 213 "kid " + std::to_string(i) +
214 " (from 0) appears more than once in the pages tree; ignoring duplicate"); 214 " (from 0) appears more than once in the pages tree; ignoring duplicate");
215 - m->invalid_page_found = true; 215 + invalid_page_found = true;
216 kid = QPDFObjectHandle::newNull(); 216 kid = QPDFObjectHandle::newNull();
217 continue; 217 continue;
218 } 218 }
@@ -229,11 +229,11 @@ Pages::getAllPagesInternal( @@ -229,11 +229,11 @@ Pages::getAllPagesInternal(
229 if (m->reconstructed_xref && errors > 2) { 229 if (m->reconstructed_xref && errors > 2) {
230 cur_node.warn( 230 cur_node.warn(
231 "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page"); 231 "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page");
232 - m->invalid_page_found = true; 232 + invalid_page_found = true;
233 kid = QPDFObjectHandle::newNull(); 233 kid = QPDFObjectHandle::newNull();
234 continue; 234 continue;
235 } 235 }
236 - m->all_pages.emplace_back(kid); 236 + all_pages.emplace_back(kid);
237 } 237 }
238 } 238 }
239 } 239 }
@@ -250,9 +250,9 @@ Pages::update_cache() @@ -250,9 +250,9 @@ Pages::update_cache()
250 // Force regeneration of the pages cache. We force immediate recalculation of all_pages since 250 // Force regeneration of the pages cache. We force immediate recalculation of all_pages since
251 // users may have references to it that they got from calls to getAllPages(). We can defer 251 // users may have references to it that they got from calls to getAllPages(). We can defer
252 // recalculation of pageobj_to_pages_pos until needed. 252 // recalculation of pageobj_to_pages_pos until needed.
253 - m->all_pages.clear();  
254 - m->pageobj_to_pages_pos.clear();  
255 - m->pushed_inherited_attributes_to_pages = false; 253 + all_pages.clear();
  254 + pageobj_to_pages_pos.clear();
  255 + pushed_inherited_attributes_to_pages = false;
256 all(); 256 all();
257 } 257 }
258 258
@@ -261,29 +261,29 @@ Pages::flattenPagesTree() @@ -261,29 +261,29 @@ Pages::flattenPagesTree()
261 { 261 {
262 // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos. 262 // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos.
263 263
264 - if (!m->pageobj_to_pages_pos.empty()) { 264 + if (!pageobj_to_pages_pos.empty()) {
265 return; 265 return;
266 } 266 }
267 267
268 - // Push inherited objects down to the /Page level. As a side effect m->all_pages will also be 268 + // Push inherited objects down to the /Page level. As a side effect all_pages will also be
269 // generated. 269 // generated.
270 pushInheritedAttributesToPage(true, true); 270 pushInheritedAttributesToPage(true, true);
271 271
272 QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); 272 QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
273 273
274 - size_t const len = m->all_pages.size(); 274 + size_t const len = all_pages.size();
275 for (size_t pos = 0; pos < len; ++pos) { 275 for (size_t pos = 0; pos < len; ++pos) {
276 // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at 276 // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at
277 // this point because pushInheritedAttributesToPage calls getAllPages which resolves 277 // this point because pushInheritedAttributesToPage calls getAllPages which resolves
278 // duplicates. 278 // duplicates.
279 - insertPageobjToPage(m->all_pages.at(pos), toI(pos), true);  
280 - m->all_pages.at(pos).replaceKey("/Parent", pages); 279 + insertPageobjToPage(all_pages.at(pos), toI(pos), true);
  280 + all_pages.at(pos).replaceKey("/Parent", pages);
281 } 281 }
282 282
283 - pages.replaceKey("/Kids", Array(m->all_pages)); 283 + pages.replaceKey("/Kids", Array(all_pages));
284 // /Count has not changed 284 // /Count has not changed
285 if (pages.getKey("/Count").getUIntValue() != len) { 285 if (pages.getKey("/Count").getUIntValue() != len) {
286 - if (m->invalid_page_found && pages.getKey("/Count").getUIntValue() > len) { 286 + if (invalid_page_found && pages.getKey("/Count").getUIntValue() > len) {
287 pages.replaceKey("/Count", Integer(len)); 287 pages.replaceKey("/Count", Integer(len));
288 } else { 288 } else {
289 throw std::runtime_error("/Count is wrong after flattening pages tree"); 289 throw std::runtime_error("/Count is wrong after flattening pages tree");
@@ -305,7 +305,7 @@ Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) @@ -305,7 +305,7 @@ Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
305 305
306 // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning 306 // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning
307 // for skipped keys, re-traverse unconditionally. 307 // for skipped keys, re-traverse unconditionally.
308 - if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) { 308 + if (pushed_inherited_attributes_to_pages && !warn_skipped_keys) {
309 return; 309 return;
310 } 310 }
311 311
@@ -325,8 +325,8 @@ Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) @@ -325,8 +325,8 @@ Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
325 throw std::logic_error( 325 throw std::logic_error(
326 "key_ancestors not empty after pushing inherited attributes to pages"); 326 "key_ancestors not empty after pushing inherited attributes to pages");
327 } 327 }
328 - m->pushed_inherited_attributes_to_pages = true;  
329 - m->ever_pushed_inherited_attributes_to_pages = true; 328 + pushed_inherited_attributes_to_pages = true;
  329 + ever_pushed_inherited_attributes_to_pages_ = true;
330 } 330 }
331 331
332 void 332 void
@@ -427,7 +427,7 @@ Pages::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupl @@ -427,7 +427,7 @@ Pages::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupl
427 { 427 {
428 QPDFObjGen og(obj.getObjGen()); 428 QPDFObjGen og(obj.getObjGen());
429 if (check_duplicate) { 429 if (check_duplicate) {
430 - if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { 430 + if (!pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) {
431 // The library never calls insertPageobjToPage in a way that causes this to happen. 431 // The library never calls insertPageobjToPage in a way that causes this to happen.
432 throw QPDFExc( 432 throw QPDFExc(
433 qpdf_e_pages, 433 qpdf_e_pages,
@@ -437,7 +437,7 @@ Pages::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupl @@ -437,7 +437,7 @@ Pages::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupl
437 "duplicate page reference found; this would cause loss of data"); 437 "duplicate page reference found; this would cause loss of data");
438 } 438 }
439 } else { 439 } else {
440 - m->pageobj_to_pages_pos[og] = pos; 440 + pageobj_to_pages_pos[og] = pos;
441 } 441 }
442 } 442 }
443 443
@@ -457,19 +457,19 @@ Pages::insertPage(QPDFObjectHandle newpage, int pos) @@ -457,19 +457,19 @@ Pages::insertPage(QPDFObjectHandle newpage, int pos)
457 QTC::TC("qpdf", "QPDF insert indirect page"); 457 QTC::TC("qpdf", "QPDF insert indirect page");
458 } 458 }
459 459
460 - if (pos < 0 || toS(pos) > m->all_pages.size()) { 460 + if (pos < 0 || toS(pos) > all_pages.size()) {
461 throw std::runtime_error("QPDF::insertPage called with pos out of range"); 461 throw std::runtime_error("QPDF::insertPage called with pos out of range");
462 } 462 }
463 463
464 QTC::TC( 464 QTC::TC(
465 "qpdf", 465 "qpdf",
466 "QPDF insert page", 466 "QPDF insert page",
467 - (pos == 0) ? 0 : // insert at beginning  
468 - (pos == toI(m->all_pages.size())) ? 1 // at end  
469 - : 2); // insert in middle 467 + (pos == 0) ? 0 : // insert at beginning
  468 + (pos == toI(all_pages.size())) ? 1 // at end
  469 + : 2); // insert in middle
470 470
471 auto og = newpage.getObjGen(); 471 auto og = newpage.getObjGen();
472 - if (m->pageobj_to_pages_pos.contains(og)) { 472 + if (pageobj_to_pages_pos.contains(og)) {
473 newpage = qpdf.makeIndirectObject(QPDFObjectHandle(newpage).shallowCopy()); 473 newpage = qpdf.makeIndirectObject(QPDFObjectHandle(newpage).shallowCopy());
474 } 474 }
475 475
@@ -480,9 +480,9 @@ Pages::insertPage(QPDFObjectHandle newpage, int pos) @@ -480,9 +480,9 @@ Pages::insertPage(QPDFObjectHandle newpage, int pos)
480 kids.insertItem(pos, newpage); 480 kids.insertItem(pos, newpage);
481 int npages = static_cast<int>(kids.size()); 481 int npages = static_cast<int>(kids.size());
482 pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages)); 482 pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
483 - m->all_pages.insert(m->all_pages.begin() + pos, newpage); 483 + all_pages.insert(all_pages.begin() + pos, newpage);
484 for (int i = pos + 1; i < npages; ++i) { 484 for (int i = pos + 1; i < npages; ++i) {
485 - insertPageobjToPage(m->all_pages.at(toS(i)), i, false); 485 + insertPageobjToPage(all_pages.at(toS(i)), i, false);
486 } 486 }
487 insertPageobjToPage(newpage, pos, true); 487 insertPageobjToPage(newpage, pos, true);
488 } 488 }
@@ -500,9 +500,9 @@ Pages ::erase(QPDFObjectHandle&amp; page) @@ -500,9 +500,9 @@ Pages ::erase(QPDFObjectHandle&amp; page)
500 QTC::TC( 500 QTC::TC(
501 "qpdf", 501 "qpdf",
502 "QPDF remove page", 502 "QPDF remove page",
503 - (pos == 0) ? 0 : // remove at beginning  
504 - (pos == toI(m->all_pages.size() - 1)) ? 1 // end  
505 - : 2); // remove in middle 503 + (pos == 0) ? 0 : // remove at beginning
  504 + (pos == toI(all_pages.size() - 1)) ? 1 // end
  505 + : 2); // remove in middle
506 506
507 QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages"); 507 QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
508 QPDFObjectHandle kids = pages.getKey("/Kids"); 508 QPDFObjectHandle kids = pages.getKey("/Kids");
@@ -510,10 +510,10 @@ Pages ::erase(QPDFObjectHandle&amp; page) @@ -510,10 +510,10 @@ Pages ::erase(QPDFObjectHandle&amp; page)
510 kids.eraseItem(pos); 510 kids.eraseItem(pos);
511 int npages = static_cast<int>(kids.size()); 511 int npages = static_cast<int>(kids.size());
512 pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages)); 512 pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
513 - m->all_pages.erase(m->all_pages.begin() + pos);  
514 - m->pageobj_to_pages_pos.erase(page.getObjGen()); 513 + all_pages.erase(all_pages.begin() + pos);
  514 + pageobj_to_pages_pos.erase(page.getObjGen());
515 for (int i = pos; i < npages; ++i) { 515 for (int i = pos; i < npages; ++i) {
516 - m->pages.insertPageobjToPage(m->all_pages.at(toS(i)), i, false); 516 + m->pages.insertPageobjToPage(all_pages.at(toS(i)), i, false);
517 } 517 }
518 } 518 }
519 519
@@ -554,8 +554,8 @@ int @@ -554,8 +554,8 @@ int
554 Pages ::find(QPDFObjGen og) 554 Pages ::find(QPDFObjGen og)
555 { 555 {
556 flattenPagesTree(); 556 flattenPagesTree();
557 - auto it = m->pageobj_to_pages_pos.find(og);  
558 - if (it == m->pageobj_to_pages_pos.end()) { 557 + auto it = pageobj_to_pages_pos.find(og);
  558 + if (it == pageobj_to_pages_pos.end()) {
559 throw QPDFExc( 559 throw QPDFExc(
560 qpdf_e_pages, 560 qpdf_e_pages,
561 m->file->getName(), 561 m->file->getName(),
libqpdf/qpdf/QPDF_private.hh
@@ -883,6 +883,18 @@ class QPDF::Doc::Pages: Common @@ -883,6 +883,18 @@ class QPDF::Doc::Pages: Common
883 void erase(QPDFObjectHandle& page); 883 void erase(QPDFObjectHandle& page);
884 void update_cache(); 884 void update_cache();
885 885
  886 + bool
  887 + ever_pushed_inherited_attributes_to_pages() const
  888 + {
  889 + return ever_pushed_inherited_attributes_to_pages_;
  890 + }
  891 +
  892 + bool
  893 + ever_called_get_all_pages() const
  894 + {
  895 + return ever_called_get_all_pages_;
  896 + }
  897 +
886 void insertPage(QPDFObjectHandle newpage, int pos); 898 void insertPage(QPDFObjectHandle newpage, int pos);
887 void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys); 899 void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys);
888 900
@@ -902,6 +914,14 @@ class QPDF::Doc::Pages: Common @@ -902,6 +914,14 @@ class QPDF::Doc::Pages: Common
902 bool media_box, 914 bool media_box,
903 bool resources); 915 bool resources);
904 916
  917 + std::vector<QPDFObjectHandle> all_pages;
  918 + std::map<QPDFObjGen, int> pageobj_to_pages_pos;
  919 +
  920 + bool pushed_inherited_attributes_to_pages{false};
  921 + bool invalid_page_found{false};
  922 + bool ever_pushed_inherited_attributes_to_pages_{false};
  923 + bool ever_called_get_all_pages_{false};
  924 +
905 }; // class QPDF::Doc::Pages 925 }; // class QPDF::Doc::Pages
906 926
907 class QPDF::Members: Doc 927 class QPDF::Members: Doc
@@ -941,12 +961,6 @@ class QPDF::Members: Doc @@ -941,12 +961,6 @@ class QPDF::Members: Doc
941 std::map<QPDFObjGen, ObjCache> obj_cache; 961 std::map<QPDFObjGen, ObjCache> obj_cache;
942 std::set<QPDFObjGen> resolving; 962 std::set<QPDFObjGen> resolving;
943 QPDFObjectHandle trailer; 963 QPDFObjectHandle trailer;
944 - std::vector<QPDFObjectHandle> all_pages;  
945 - bool invalid_page_found{false};  
946 - std::map<QPDFObjGen, int> pageobj_to_pages_pos;  
947 - bool pushed_inherited_attributes_to_pages{false};  
948 - bool ever_pushed_inherited_attributes_to_pages{false};  
949 - bool ever_called_get_all_pages{false};  
950 std::vector<QPDFExc> warnings; 964 std::vector<QPDFExc> warnings;
951 bool reconstructed_xref{false}; 965 bool reconstructed_xref{false};
952 bool in_read_xref_stream{false}; 966 bool in_read_xref_stream{false};