Commit e9e2eef5c31396e3bbe3a0e6c54e596fb1629b6f

Authored by m-holger
1 parent eb0fec00

Refactor `Pages` cache management: move `all_pages` and related fields from `Doc…

…` to `Pages`, update methods to use relocated fields, and streamline cache handling.
libqpdf/QPDF.cc
... ... @@ -889,13 +889,13 @@ Common::damagedPDF(std::string const& message) const
889 889 bool
890 890 QPDF::everCalledGetAllPages() const
891 891 {
892   - return m->ever_called_get_all_pages;
  892 + return m->pages.ever_called_get_all_pages();
893 893 }
894 894  
895 895 bool
896 896 QPDF::everPushedInheritedAttributesToPages() const
897 897 {
898   - return m->ever_pushed_inherited_attributes_to_pages;
  898 + return m->pages.ever_pushed_inherited_attributes_to_pages();
899 899 }
900 900  
901 901 void
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -96,12 +96,12 @@ Lin::optimize_internal(
96 96 // Traverse pages tree pushing all inherited resources down to the page level. This also
97 97 // initializes m->all_pages.
98 98 m->pages.pushInheritedAttributesToPage(allow_changes, false);
99   -
100 99 // Traverse pages
101   - size_t n = m->all_pages.size();
102   - for (size_t pageno = 0; pageno < n; ++pageno) {
103   - updateObjectMaps(
104   - ObjUser(ObjUser::ou_page, pageno), m->all_pages.at(pageno), skip_stream_parameters);
  100 +
  101 + size_t n = 0;
  102 + for (auto const& page: m->pages.all()) {
  103 + updateObjectMaps(ObjUser(ObjUser::ou_page, n), page, skip_stream_parameters);
  104 + ++n;
105 105 }
106 106  
107 107 // Traverse document-level items
... ...
libqpdf/QPDF_pages.cc
... ... @@ -49,8 +49,8 @@ std::vector&lt;QPDFObjectHandle&gt; const&amp;
49 49 Pages::all()
50 50 {
51 51 // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages.
52   - if (m->all_pages.empty() && !m->invalid_page_found) {
53   - m->ever_called_get_all_pages = true;
  52 + if (all_pages.empty() && !invalid_page_found) {
  53 + ever_called_get_all_pages_ = true;
54 54 auto root = qpdf.getRoot();
55 55 QPDFObjGen::set visited;
56 56 QPDFObjGen::set seen;
... ... @@ -85,16 +85,16 @@ Pages::all()
85 85 try {
86 86 getAllPagesInternal(pages, visited, seen, false, false);
87 87 } catch (...) {
88   - m->all_pages.clear();
89   - m->invalid_page_found = false;
  88 + all_pages.clear();
  89 + invalid_page_found = false;
90 90 throw;
91 91 }
92   - if (m->invalid_page_found) {
  92 + if (invalid_page_found) {
93 93 flattenPagesTree();
94   - m->invalid_page_found = false;
  94 + invalid_page_found = false;
95 95 }
96 96 }
97   - return m->all_pages;
  97 + return all_pages;
98 98 }
99 99  
100 100 void
... ... @@ -143,7 +143,7 @@ Pages::getAllPagesInternal(
143 143  
144 144 if (!kid.isDictionary()) {
145 145 kid.warn("Pages tree includes non-dictionary object; ignoring");
146   - m->invalid_page_found = true;
  146 + invalid_page_found = true;
147 147 continue;
148 148 }
149 149 if (!kid.isIndirect()) {
... ... @@ -212,7 +212,7 @@ Pages::getAllPagesInternal(
212 212 cur_node.warn(
213 213 "kid " + std::to_string(i) +
214 214 " (from 0) appears more than once in the pages tree; ignoring duplicate");
215   - m->invalid_page_found = true;
  215 + invalid_page_found = true;
216 216 kid = QPDFObjectHandle::newNull();
217 217 continue;
218 218 }
... ... @@ -229,11 +229,11 @@ Pages::getAllPagesInternal(
229 229 if (m->reconstructed_xref && errors > 2) {
230 230 cur_node.warn(
231 231 "kid " + std::to_string(i) + " (from 0) has too many errors; ignoring page");
232   - m->invalid_page_found = true;
  232 + invalid_page_found = true;
233 233 kid = QPDFObjectHandle::newNull();
234 234 continue;
235 235 }
236   - m->all_pages.emplace_back(kid);
  236 + all_pages.emplace_back(kid);
237 237 }
238 238 }
239 239 }
... ... @@ -250,9 +250,9 @@ Pages::update_cache()
250 250 // Force regeneration of the pages cache. We force immediate recalculation of all_pages since
251 251 // users may have references to it that they got from calls to getAllPages(). We can defer
252 252 // recalculation of pageobj_to_pages_pos until needed.
253   - m->all_pages.clear();
254   - m->pageobj_to_pages_pos.clear();
255   - m->pushed_inherited_attributes_to_pages = false;
  253 + all_pages.clear();
  254 + pageobj_to_pages_pos.clear();
  255 + pushed_inherited_attributes_to_pages = false;
256 256 all();
257 257 }
258 258  
... ... @@ -261,29 +261,29 @@ Pages::flattenPagesTree()
261 261 {
262 262 // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos.
263 263  
264   - if (!m->pageobj_to_pages_pos.empty()) {
  264 + if (!pageobj_to_pages_pos.empty()) {
265 265 return;
266 266 }
267 267  
268   - // Push inherited objects down to the /Page level. As a side effect m->all_pages will also be
  268 + // Push inherited objects down to the /Page level. As a side effect all_pages will also be
269 269 // generated.
270 270 pushInheritedAttributesToPage(true, true);
271 271  
272 272 QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
273 273  
274   - size_t const len = m->all_pages.size();
  274 + size_t const len = all_pages.size();
275 275 for (size_t pos = 0; pos < len; ++pos) {
276 276 // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at
277 277 // this point because pushInheritedAttributesToPage calls getAllPages which resolves
278 278 // duplicates.
279   - insertPageobjToPage(m->all_pages.at(pos), toI(pos), true);
280   - m->all_pages.at(pos).replaceKey("/Parent", pages);
  279 + insertPageobjToPage(all_pages.at(pos), toI(pos), true);
  280 + all_pages.at(pos).replaceKey("/Parent", pages);
281 281 }
282 282  
283   - pages.replaceKey("/Kids", Array(m->all_pages));
  283 + pages.replaceKey("/Kids", Array(all_pages));
284 284 // /Count has not changed
285 285 if (pages.getKey("/Count").getUIntValue() != len) {
286   - if (m->invalid_page_found && pages.getKey("/Count").getUIntValue() > len) {
  286 + if (invalid_page_found && pages.getKey("/Count").getUIntValue() > len) {
287 287 pages.replaceKey("/Count", Integer(len));
288 288 } else {
289 289 throw std::runtime_error("/Count is wrong after flattening pages tree");
... ... @@ -305,7 +305,7 @@ Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
305 305  
306 306 // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning
307 307 // for skipped keys, re-traverse unconditionally.
308   - if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) {
  308 + if (pushed_inherited_attributes_to_pages && !warn_skipped_keys) {
309 309 return;
310 310 }
311 311  
... ... @@ -325,8 +325,8 @@ Pages::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
325 325 throw std::logic_error(
326 326 "key_ancestors not empty after pushing inherited attributes to pages");
327 327 }
328   - m->pushed_inherited_attributes_to_pages = true;
329   - m->ever_pushed_inherited_attributes_to_pages = true;
  328 + pushed_inherited_attributes_to_pages = true;
  329 + ever_pushed_inherited_attributes_to_pages_ = true;
330 330 }
331 331  
332 332 void
... ... @@ -427,7 +427,7 @@ Pages::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupl
427 427 {
428 428 QPDFObjGen og(obj.getObjGen());
429 429 if (check_duplicate) {
430   - if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) {
  430 + if (!pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) {
431 431 // The library never calls insertPageobjToPage in a way that causes this to happen.
432 432 throw QPDFExc(
433 433 qpdf_e_pages,
... ... @@ -437,7 +437,7 @@ Pages::insertPageobjToPage(QPDFObjectHandle const&amp; obj, int pos, bool check_dupl
437 437 "duplicate page reference found; this would cause loss of data");
438 438 }
439 439 } else {
440   - m->pageobj_to_pages_pos[og] = pos;
  440 + pageobj_to_pages_pos[og] = pos;
441 441 }
442 442 }
443 443  
... ... @@ -457,19 +457,19 @@ Pages::insertPage(QPDFObjectHandle newpage, int pos)
457 457 QTC::TC("qpdf", "QPDF insert indirect page");
458 458 }
459 459  
460   - if (pos < 0 || toS(pos) > m->all_pages.size()) {
  460 + if (pos < 0 || toS(pos) > all_pages.size()) {
461 461 throw std::runtime_error("QPDF::insertPage called with pos out of range");
462 462 }
463 463  
464 464 QTC::TC(
465 465 "qpdf",
466 466 "QPDF insert page",
467   - (pos == 0) ? 0 : // insert at beginning
468   - (pos == toI(m->all_pages.size())) ? 1 // at end
469   - : 2); // insert in middle
  467 + (pos == 0) ? 0 : // insert at beginning
  468 + (pos == toI(all_pages.size())) ? 1 // at end
  469 + : 2); // insert in middle
470 470  
471 471 auto og = newpage.getObjGen();
472   - if (m->pageobj_to_pages_pos.contains(og)) {
  472 + if (pageobj_to_pages_pos.contains(og)) {
473 473 newpage = qpdf.makeIndirectObject(QPDFObjectHandle(newpage).shallowCopy());
474 474 }
475 475  
... ... @@ -480,9 +480,9 @@ Pages::insertPage(QPDFObjectHandle newpage, int pos)
480 480 kids.insertItem(pos, newpage);
481 481 int npages = static_cast<int>(kids.size());
482 482 pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
483   - m->all_pages.insert(m->all_pages.begin() + pos, newpage);
  483 + all_pages.insert(all_pages.begin() + pos, newpage);
484 484 for (int i = pos + 1; i < npages; ++i) {
485   - insertPageobjToPage(m->all_pages.at(toS(i)), i, false);
  485 + insertPageobjToPage(all_pages.at(toS(i)), i, false);
486 486 }
487 487 insertPageobjToPage(newpage, pos, true);
488 488 }
... ... @@ -500,9 +500,9 @@ Pages ::erase(QPDFObjectHandle&amp; page)
500 500 QTC::TC(
501 501 "qpdf",
502 502 "QPDF remove page",
503   - (pos == 0) ? 0 : // remove at beginning
504   - (pos == toI(m->all_pages.size() - 1)) ? 1 // end
505   - : 2); // remove in middle
  503 + (pos == 0) ? 0 : // remove at beginning
  504 + (pos == toI(all_pages.size() - 1)) ? 1 // end
  505 + : 2); // remove in middle
506 506  
507 507 QPDFObjectHandle pages = qpdf.getRoot().getKey("/Pages");
508 508 QPDFObjectHandle kids = pages.getKey("/Kids");
... ... @@ -510,10 +510,10 @@ Pages ::erase(QPDFObjectHandle&amp; page)
510 510 kids.eraseItem(pos);
511 511 int npages = static_cast<int>(kids.size());
512 512 pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
513   - m->all_pages.erase(m->all_pages.begin() + pos);
514   - m->pageobj_to_pages_pos.erase(page.getObjGen());
  513 + all_pages.erase(all_pages.begin() + pos);
  514 + pageobj_to_pages_pos.erase(page.getObjGen());
515 515 for (int i = pos; i < npages; ++i) {
516   - m->pages.insertPageobjToPage(m->all_pages.at(toS(i)), i, false);
  516 + m->pages.insertPageobjToPage(all_pages.at(toS(i)), i, false);
517 517 }
518 518 }
519 519  
... ... @@ -554,8 +554,8 @@ int
554 554 Pages ::find(QPDFObjGen og)
555 555 {
556 556 flattenPagesTree();
557   - auto it = m->pageobj_to_pages_pos.find(og);
558   - if (it == m->pageobj_to_pages_pos.end()) {
  557 + auto it = pageobj_to_pages_pos.find(og);
  558 + if (it == pageobj_to_pages_pos.end()) {
559 559 throw QPDFExc(
560 560 qpdf_e_pages,
561 561 m->file->getName(),
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -883,6 +883,18 @@ class QPDF::Doc::Pages: Common
883 883 void erase(QPDFObjectHandle& page);
884 884 void update_cache();
885 885  
  886 + bool
  887 + ever_pushed_inherited_attributes_to_pages() const
  888 + {
  889 + return ever_pushed_inherited_attributes_to_pages_;
  890 + }
  891 +
  892 + bool
  893 + ever_called_get_all_pages() const
  894 + {
  895 + return ever_called_get_all_pages_;
  896 + }
  897 +
886 898 void insertPage(QPDFObjectHandle newpage, int pos);
887 899 void pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys);
888 900  
... ... @@ -902,6 +914,14 @@ class QPDF::Doc::Pages: Common
902 914 bool media_box,
903 915 bool resources);
904 916  
  917 + std::vector<QPDFObjectHandle> all_pages;
  918 + std::map<QPDFObjGen, int> pageobj_to_pages_pos;
  919 +
  920 + bool pushed_inherited_attributes_to_pages{false};
  921 + bool invalid_page_found{false};
  922 + bool ever_pushed_inherited_attributes_to_pages_{false};
  923 + bool ever_called_get_all_pages_{false};
  924 +
905 925 }; // class QPDF::Doc::Pages
906 926  
907 927 class QPDF::Members: Doc
... ... @@ -941,12 +961,6 @@ class QPDF::Members: Doc
941 961 std::map<QPDFObjGen, ObjCache> obj_cache;
942 962 std::set<QPDFObjGen> resolving;
943 963 QPDFObjectHandle trailer;
944   - std::vector<QPDFObjectHandle> all_pages;
945   - bool invalid_page_found{false};
946   - std::map<QPDFObjGen, int> pageobj_to_pages_pos;
947   - bool pushed_inherited_attributes_to_pages{false};
948   - bool ever_pushed_inherited_attributes_to_pages{false};
949   - bool ever_called_get_all_pages{false};
950 964 std::vector<QPDFExc> warnings;
951 965 bool reconstructed_xref{false};
952 966 bool in_read_xref_stream{false};
... ...