Commit 256f063300ff29dd1fdd9cb16825401090c895d6
1 parent
bd4bc944
Refactor optimization structures: move `ObjUser` and `UpdateObjectMapsFrame` to …
…`Lin`, encapsulate optimization data within `Lin`, update usage across the codebase, and streamline related logic in `QPDF_linearization`.
Showing
3 changed files
with
78 additions
and
92 deletions
include/qpdf/QPDF.hh
libqpdf/QPDF_linearization.cc
| ... | ... | @@ -69,20 +69,20 @@ load_vector_vector( |
| 69 | 69 | bit_stream.skipToNextByte(); |
| 70 | 70 | } |
| 71 | 71 | |
| 72 | -QPDF::ObjUser::ObjUser(user_e type) : | |
| 72 | +Lin::ObjUser::ObjUser(user_e type) : | |
| 73 | 73 | ou_type(type) |
| 74 | 74 | { |
| 75 | 75 | qpdf_expect(type == ou_root); |
| 76 | 76 | } |
| 77 | 77 | |
| 78 | -QPDF::ObjUser::ObjUser(user_e type, size_t pageno) : | |
| 78 | +Lin::ObjUser::ObjUser(user_e type, size_t pageno) : | |
| 79 | 79 | ou_type(type), |
| 80 | 80 | pageno(pageno) |
| 81 | 81 | { |
| 82 | 82 | qpdf_expect(type == ou_page || type == ou_thumb); |
| 83 | 83 | } |
| 84 | 84 | |
| 85 | -QPDF::ObjUser::ObjUser(user_e type, std::string const& key) : | |
| 85 | +Lin::ObjUser::ObjUser(user_e type, std::string const& key) : | |
| 86 | 86 | ou_type(type), |
| 87 | 87 | key(key) |
| 88 | 88 | { |
| ... | ... | @@ -90,7 +90,7 @@ QPDF::ObjUser::ObjUser(user_e type, std::string const& key) : |
| 90 | 90 | } |
| 91 | 91 | |
| 92 | 92 | bool |
| 93 | -QPDF::ObjUser::operator<(ObjUser const& rhs) const | |
| 93 | +Lin::ObjUser::operator<(ObjUser const& rhs) const | |
| 94 | 94 | { |
| 95 | 95 | if (ou_type < rhs.ou_type) { |
| 96 | 96 | return true; |
| ... | ... | @@ -106,8 +106,8 @@ QPDF::ObjUser::operator<(ObjUser const& rhs) const |
| 106 | 106 | return false; |
| 107 | 107 | } |
| 108 | 108 | |
| 109 | -QPDF::UpdateObjectMapsFrame::UpdateObjectMapsFrame( | |
| 110 | - QPDF::ObjUser const& ou, QPDFObjectHandle oh, bool top) : | |
| 109 | +Lin::UpdateObjectMapsFrame::UpdateObjectMapsFrame( | |
| 110 | + ObjUser const& ou, QPDFObjectHandle oh, bool top) : | |
| 111 | 111 | ou(ou), |
| 112 | 112 | oh(oh), |
| 113 | 113 | top(top) |
| ... | ... | @@ -137,7 +137,7 @@ Lin::optimize_internal( |
| 137 | 137 | bool allow_changes, |
| 138 | 138 | std::function<int(QPDFObjectHandle&)> skip_stream_parameters) |
| 139 | 139 | { |
| 140 | - if (!m->obj_user_to_objects.empty()) { | |
| 140 | + if (!obj_user_to_objects_.empty()) { | |
| 141 | 141 | // already optimized |
| 142 | 142 | return; |
| 143 | 143 | } |
| ... | ... | @@ -186,9 +186,9 @@ Lin::optimize_internal( |
| 186 | 186 | } |
| 187 | 187 | |
| 188 | 188 | ObjUser root_ou = ObjUser(ObjUser::ou_root); |
| 189 | - auto root_og = QPDFObjGen(root.getObjGen()); | |
| 190 | - m->obj_user_to_objects[root_ou].insert(root_og); | |
| 191 | - m->object_to_obj_users[root_og].insert(root_ou); | |
| 189 | + auto root_og =root.id_gen(); | |
| 190 | + obj_user_to_objects_[root_ou].insert(root_og); | |
| 191 | + object_to_obj_users_[root_og].insert(root_ou); | |
| 192 | 192 | |
| 193 | 193 | filterCompressedObjects(object_stream_data); |
| 194 | 194 | } |
| ... | ... | @@ -217,14 +217,14 @@ Lin::updateObjectMaps( |
| 217 | 217 | } |
| 218 | 218 | } |
| 219 | 219 | |
| 220 | - if (cur.oh.isIndirect()) { | |
| 220 | + if (cur.oh.indirect()) { | |
| 221 | 221 | QPDFObjGen og(cur.oh.getObjGen()); |
| 222 | 222 | if (!visited.add(og)) { |
| 223 | 223 | QTC::TC("qpdf", "QPDF opt loop detected"); |
| 224 | 224 | continue; |
| 225 | 225 | } |
| 226 | - m->obj_user_to_objects[cur.ou].insert(og); | |
| 227 | - m->object_to_obj_users[og].insert(cur.ou); | |
| 226 | + obj_user_to_objects_[cur.ou].insert(og); | |
| 227 | + object_to_obj_users_[og].insert(cur.ou); | |
| 228 | 228 | } |
| 229 | 229 | |
| 230 | 230 | if (cur.oh.isArray()) { |
| ... | ... | @@ -280,34 +280,30 @@ Lin::filterCompressedObjects(std::map<int, int> const& object_stream_data) |
| 280 | 280 | std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects; |
| 281 | 281 | std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users; |
| 282 | 282 | |
| 283 | - for (auto const& i1: m->obj_user_to_objects) { | |
| 284 | - ObjUser const& ou = i1.first; | |
| 285 | - // Loop over objects. | |
| 286 | - for (auto const& og: i1.second) { | |
| 283 | + for (auto const& [ou, ogs]: obj_user_to_objects_) { | |
| 284 | + for (auto const& og: ogs) { | |
| 287 | 285 | auto i2 = object_stream_data.find(og.getObj()); |
| 288 | 286 | if (i2 == object_stream_data.end()) { |
| 289 | 287 | t_obj_user_to_objects[ou].insert(og); |
| 290 | 288 | } else { |
| 291 | - t_obj_user_to_objects[ou].insert(QPDFObjGen(i2->second, 0)); | |
| 289 | + t_obj_user_to_objects[ou].insert({i2->second, 0}); | |
| 292 | 290 | } |
| 293 | 291 | } |
| 294 | 292 | } |
| 295 | 293 | |
| 296 | - for (auto const& i1: m->object_to_obj_users) { | |
| 297 | - QPDFObjGen const& og = i1.first; | |
| 298 | - // Loop over obj_users. | |
| 299 | - for (auto const& ou: i1.second) { | |
| 294 | + for (auto const& [og, ous]: object_to_obj_users_) { | |
| 295 | + for (auto const& ou: ous) { | |
| 300 | 296 | auto i2 = object_stream_data.find(og.getObj()); |
| 301 | 297 | if (i2 == object_stream_data.end()) { |
| 302 | 298 | t_object_to_obj_users[og].insert(ou); |
| 303 | 299 | } else { |
| 304 | - t_object_to_obj_users[QPDFObjGen(i2->second, 0)].insert(ou); | |
| 300 | + t_object_to_obj_users[{i2->second, 0}].insert(ou); | |
| 305 | 301 | } |
| 306 | 302 | } |
| 307 | 303 | } |
| 308 | 304 | |
| 309 | - m->obj_user_to_objects = t_obj_user_to_objects; | |
| 310 | - m->object_to_obj_users = t_object_to_obj_users; | |
| 305 | + obj_user_to_objects_ = std::move(t_obj_user_to_objects); | |
| 306 | + object_to_obj_users_ = std::move(t_object_to_obj_users); | |
| 311 | 307 | } |
| 312 | 308 | |
| 313 | 309 | void |
| ... | ... | @@ -324,10 +320,8 @@ Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj) |
| 324 | 320 | std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects; |
| 325 | 321 | std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users; |
| 326 | 322 | |
| 327 | - for (auto const& i1: m->obj_user_to_objects) { | |
| 328 | - ObjUser const& ou = i1.first; | |
| 329 | - // Loop over objects. | |
| 330 | - for (auto const& og: i1.second) { | |
| 323 | + for (auto const& [ou, ogs]: obj_user_to_objects_) { | |
| 324 | + for (auto const& og: ogs) { | |
| 331 | 325 | if (obj.contains(og)) { |
| 332 | 326 | if (auto const& i2 = obj[og].object_stream; i2 <= 0) { |
| 333 | 327 | t_obj_user_to_objects[ou].insert(og); |
| ... | ... | @@ -338,22 +332,21 @@ Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj) |
| 338 | 332 | } |
| 339 | 333 | } |
| 340 | 334 | |
| 341 | - for (auto const& i1: m->object_to_obj_users) { | |
| 342 | - QPDFObjGen const& og = i1.first; | |
| 335 | + for (auto const& [og, ous]: object_to_obj_users_) { | |
| 343 | 336 | if (obj.contains(og)) { |
| 344 | 337 | // Loop over obj_users. |
| 345 | - for (auto const& ou: i1.second) { | |
| 338 | + for (auto const& ou: ous) { | |
| 346 | 339 | if (auto i2 = obj[og].object_stream; i2 <= 0) { |
| 347 | 340 | t_object_to_obj_users[og].insert(ou); |
| 348 | 341 | } else { |
| 349 | - t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou); | |
| 342 | + t_object_to_obj_users[{i2, 0}].insert(ou); | |
| 350 | 343 | } |
| 351 | 344 | } |
| 352 | 345 | } |
| 353 | 346 | } |
| 354 | 347 | |
| 355 | - m->obj_user_to_objects = t_obj_user_to_objects; | |
| 356 | - m->object_to_obj_users = t_object_to_obj_users; | |
| 348 | + obj_user_to_objects_ = std::move(t_obj_user_to_objects); | |
| 349 | + object_to_obj_users_ = std::move(t_object_to_obj_users); | |
| 357 | 350 | } |
| 358 | 351 | |
| 359 | 352 | void |
| ... | ... | @@ -793,12 +786,12 @@ qpdf_offset_t |
| 793 | 786 | Lin::maxEnd(ObjUser const& ou) |
| 794 | 787 | { |
| 795 | 788 | no_ci_stop_if( |
| 796 | - !m->obj_user_to_objects.contains(ou), | |
| 789 | + !obj_user_to_objects_.contains(ou), | |
| 797 | 790 | "no entry in object user table for requested object user" // |
| 798 | 791 | ); |
| 799 | 792 | |
| 800 | 793 | qpdf_offset_t end = 0; |
| 801 | - for (auto const& og: m->obj_user_to_objects[ou]) { | |
| 794 | + for (auto const& og: obj_user_to_objects_[ou]) { | |
| 802 | 795 | no_ci_stop_if( |
| 803 | 796 | !m->obj_cache.contains(og), "unknown object referenced in object user table" // |
| 804 | 797 | ); |
| ... | ... | @@ -1233,7 +1226,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) |
| 1233 | 1226 | // actual offsets and lengths are not computed here, but anything related to object ordering is. |
| 1234 | 1227 | |
| 1235 | 1228 | util::assertion( |
| 1236 | - !m->object_to_obj_users.empty(), | |
| 1229 | + !object_to_obj_users_.empty(), | |
| 1237 | 1230 | "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()" // |
| 1238 | 1231 | ); |
| 1239 | 1232 | // Note that we can't call optimize here because we don't know whether it should be called |
| ... | ... | @@ -1325,10 +1318,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) |
| 1325 | 1318 | std::set<QPDFObjGen> lc_outlines; |
| 1326 | 1319 | std::set<QPDFObjGen> lc_root; |
| 1327 | 1320 | |
| 1328 | - for (auto& oiter: m->object_to_obj_users) { | |
| 1329 | - QPDFObjGen const& og = oiter.first; | |
| 1330 | - std::set<ObjUser>& ous = oiter.second; | |
| 1331 | - | |
| 1321 | + for (auto& [og, ous]: object_to_obj_users_) { | |
| 1332 | 1322 | bool in_open_document = false; |
| 1333 | 1323 | bool in_first_page = false; |
| 1334 | 1324 | int other_pages = 0; |
| ... | ... | @@ -1500,11 +1490,11 @@ Lin::calculateLinearizationData(T const& object_stream_data) |
| 1500 | 1490 | |
| 1501 | 1491 | ObjUser ou(ObjUser::ou_page, i); |
| 1502 | 1492 | no_ci_stop_if( |
| 1503 | - !m->obj_user_to_objects.contains(ou), | |
| 1493 | + !obj_user_to_objects_.contains(ou), | |
| 1504 | 1494 | "found unreferenced page while calculating linearization data" // |
| 1505 | 1495 | ); |
| 1506 | 1496 | |
| 1507 | - for (auto const& og: m->obj_user_to_objects[ou]) { | |
| 1497 | + for (auto const& og: obj_user_to_objects_[ou]) { | |
| 1508 | 1498 | if (lc_other_page_private.erase(og)) { |
| 1509 | 1499 | m->part7.emplace_back(qpdf.getObject(og)); |
| 1510 | 1500 | ++m->c_page_offset_data.entries.at(i).nobjects; |
| ... | ... | @@ -1533,8 +1523,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) |
| 1533 | 1523 | // we throw all remaining objects in arbitrary order. |
| 1534 | 1524 | |
| 1535 | 1525 | // Place the pages tree. |
| 1536 | - std::set<QPDFObjGen> pages_ogs = | |
| 1537 | - m->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")]; | |
| 1526 | + auto& pages_ogs = obj_user_to_objects_[{ObjUser::ou_root_key, "/Pages"}]; | |
| 1538 | 1527 | no_ci_stop_if( |
| 1539 | 1528 | pages_ogs.empty(), "found empty pages tree while calculating linearization data" // |
| 1540 | 1529 | ); |
| ... | ... | @@ -1559,8 +1548,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) |
| 1559 | 1548 | // there's nothing to prevent it from having been in some set other than |
| 1560 | 1549 | // lc_thumbnail_private. |
| 1561 | 1550 | } |
| 1562 | - std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, i)]; | |
| 1563 | - for (auto const& og: ogs) { | |
| 1551 | + for (auto const& og: obj_user_to_objects_[{ObjUser::ou_thumb, i}]) { | |
| 1564 | 1552 | if (lc_thumbnail_private.erase(og)) { |
| 1565 | 1553 | m->part9.emplace_back(qpdf.getObject(og)); |
| 1566 | 1554 | } |
| ... | ... | @@ -1591,7 +1579,7 @@ Lin::calculateLinearizationData(T const& object_stream_data) |
| 1591 | 1579 | |
| 1592 | 1580 | size_t num_placed = |
| 1593 | 1581 | m->part4.size() + m->part6.size() + m->part7.size() + m->part8.size() + m->part9.size(); |
| 1594 | - size_t num_wanted = m->object_to_obj_users.size(); | |
| 1582 | + size_t num_wanted = object_to_obj_users_.size(); | |
| 1595 | 1583 | no_ci_stop_if( |
| 1596 | 1584 | // This can happen with damaged files, e.g. if the root is part of the the pages tree. |
| 1597 | 1585 | num_placed != num_wanted, |
| ... | ... | @@ -1642,12 +1630,12 @@ Lin::calculateLinearizationData(T const& object_stream_data) |
| 1642 | 1630 | CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); |
| 1643 | 1631 | ObjUser ou(ObjUser::ou_page, i); |
| 1644 | 1632 | no_ci_stop_if( |
| 1645 | - !m->obj_user_to_objects.contains(ou), | |
| 1633 | + !obj_user_to_objects_.contains(ou), | |
| 1646 | 1634 | "found unreferenced page while calculating linearization data" // |
| 1647 | 1635 | ); |
| 1648 | 1636 | |
| 1649 | - for (auto const& og: m->obj_user_to_objects[ou]) { | |
| 1650 | - if ((m->object_to_obj_users[og].size() > 1) && (obj_to_index.contains(og.getObj()))) { | |
| 1637 | + for (auto const& og: obj_user_to_objects_[ou]) { | |
| 1638 | + if (object_to_obj_users_[og].size() > 1 && obj_to_index.contains(og.getObj())) { | |
| 1651 | 1639 | int idx = obj_to_index[og.getObj()]; |
| 1652 | 1640 | ++pe.nshared_objects; |
| 1653 | 1641 | pe.shared_identifiers.push_back(idx); | ... | ... |
libqpdf/qpdf/QPDF_private.hh
| ... | ... | @@ -364,40 +364,6 @@ struct QPDF::CHSharedObject |
| 364 | 364 | |
| 365 | 365 | // No need for CHGeneric -- HGeneric is fine as is. |
| 366 | 366 | |
| 367 | -// Data structures to support optimization -- implemented in QPDF_optimization.cc | |
| 368 | - | |
| 369 | -class QPDF::ObjUser | |
| 370 | -{ | |
| 371 | - public: | |
| 372 | - enum user_e { ou_page = 1, ou_thumb, ou_trailer_key, ou_root_key, ou_root }; | |
| 373 | - | |
| 374 | - ObjUser() = delete; | |
| 375 | - | |
| 376 | - // type must be ou_root | |
| 377 | - ObjUser(user_e type); | |
| 378 | - | |
| 379 | - // type must be one of ou_page or ou_thumb | |
| 380 | - ObjUser(user_e type, size_t pageno); | |
| 381 | - | |
| 382 | - // type must be one of ou_trailer_key or ou_root_key | |
| 383 | - ObjUser(user_e type, std::string const& key); | |
| 384 | - | |
| 385 | - bool operator<(ObjUser const&) const; | |
| 386 | - | |
| 387 | - user_e ou_type; | |
| 388 | - size_t pageno{0}; // if ou_page; | |
| 389 | - std::string key; // if ou_trailer_key or ou_root_key | |
| 390 | -}; | |
| 391 | - | |
| 392 | -struct QPDF::UpdateObjectMapsFrame | |
| 393 | -{ | |
| 394 | - UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top); | |
| 395 | - | |
| 396 | - ObjUser const& ou; | |
| 397 | - QPDFObjectHandle oh; | |
| 398 | - bool top; | |
| 399 | -}; | |
| 400 | - | |
| 401 | 367 | class QPDF::PatternFinder final: public InputSource::Finder |
| 402 | 368 | { |
| 403 | 369 | public: |
| ... | ... | @@ -743,6 +709,40 @@ class QPDF::Doc::Linearization: Common |
| 743 | 709 | bool compressed); |
| 744 | 710 | |
| 745 | 711 | private: |
| 712 | + // Data structures to support optimization -- implemented in QPDF_optimization.cc | |
| 713 | + | |
| 714 | + class ObjUser | |
| 715 | + { | |
| 716 | + public: | |
| 717 | + enum user_e { ou_page = 1, ou_thumb, ou_trailer_key, ou_root_key, ou_root }; | |
| 718 | + | |
| 719 | + ObjUser() = delete; | |
| 720 | + | |
| 721 | + // type must be ou_root | |
| 722 | + ObjUser(user_e type); | |
| 723 | + | |
| 724 | + // type must be one of ou_page or ou_thumb | |
| 725 | + ObjUser(user_e type, size_t pageno); | |
| 726 | + | |
| 727 | + // type must be one of ou_trailer_key or ou_root_key | |
| 728 | + ObjUser(user_e type, std::string const& key); | |
| 729 | + | |
| 730 | + bool operator<(ObjUser const&) const; | |
| 731 | + | |
| 732 | + user_e ou_type; | |
| 733 | + size_t pageno{0}; // if ou_page; | |
| 734 | + std::string key; // if ou_trailer_key or ou_root_key | |
| 735 | + }; | |
| 736 | + | |
| 737 | + struct UpdateObjectMapsFrame | |
| 738 | + { | |
| 739 | + UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top) ; | |
| 740 | + | |
| 741 | + ObjUser const& ou; | |
| 742 | + QPDFObjectHandle oh; | |
| 743 | + bool top; | |
| 744 | + }; | |
| 745 | + | |
| 746 | 746 | // methods to support linearization checking -- implemented in QPDF_linearization.cc |
| 747 | 747 | |
| 748 | 748 | void readLinearizationData(); |
| ... | ... | @@ -797,6 +797,10 @@ class QPDF::Doc::Linearization: Common |
| 797 | 797 | std::function<int(QPDFObjectHandle&)> skip_stream_parameters); |
| 798 | 798 | void filterCompressedObjects(std::map<int, int> const& object_stream_data); |
| 799 | 799 | void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); |
| 800 | + | |
| 801 | + // Optimization data | |
| 802 | + std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects_; | |
| 803 | + std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users_; | |
| 800 | 804 | }; |
| 801 | 805 | |
| 802 | 806 | class QPDF::Doc::Objects: Common |
| ... | ... | @@ -1169,10 +1173,6 @@ class QPDF::Members: Doc |
| 1169 | 1173 | std::vector<QPDFObjectHandle> part7; |
| 1170 | 1174 | std::vector<QPDFObjectHandle> part8; |
| 1171 | 1175 | std::vector<QPDFObjectHandle> part9; |
| 1172 | - | |
| 1173 | - // Optimization data | |
| 1174 | - std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects; | |
| 1175 | - std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users; | |
| 1176 | 1176 | }; |
| 1177 | 1177 | |
| 1178 | 1178 | // The Resolver class is restricted to QPDFObject and BaseHandle so that only it can resolve | ... | ... |