Commit 256f063300ff29dd1fdd9cb16825401090c895d6

Authored by m-holger
1 parent bd4bc944

Refactor optimization structures: move `ObjUser` and `UpdateObjectMapsFrame` to …

…`Lin`, encapsulate optimization data within `Lin`, update usage across the codebase, and streamline related logic in `QPDF_linearization`.
include/qpdf/QPDF.hh
... ... @@ -797,8 +797,6 @@ class QPDF
797 797 struct CHPageOffset;
798 798 struct CHSharedObjectEntry;
799 799 struct CHSharedObject;
800   - class ObjUser;
801   - struct UpdateObjectMapsFrame;
802 800 class PatternFinder;
803 801  
804 802 // Methods to support pattern finding
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -69,20 +69,20 @@ load_vector_vector(
69 69 bit_stream.skipToNextByte();
70 70 }
71 71  
72   -QPDF::ObjUser::ObjUser(user_e type) :
  72 +Lin::ObjUser::ObjUser(user_e type) :
73 73 ou_type(type)
74 74 {
75 75 qpdf_expect(type == ou_root);
76 76 }
77 77  
78   -QPDF::ObjUser::ObjUser(user_e type, size_t pageno) :
  78 +Lin::ObjUser::ObjUser(user_e type, size_t pageno) :
79 79 ou_type(type),
80 80 pageno(pageno)
81 81 {
82 82 qpdf_expect(type == ou_page || type == ou_thumb);
83 83 }
84 84  
85   -QPDF::ObjUser::ObjUser(user_e type, std::string const& key) :
  85 +Lin::ObjUser::ObjUser(user_e type, std::string const& key) :
86 86 ou_type(type),
87 87 key(key)
88 88 {
... ... @@ -90,7 +90,7 @@ QPDF::ObjUser::ObjUser(user_e type, std::string const& key) :
90 90 }
91 91  
92 92 bool
93   -QPDF::ObjUser::operator<(ObjUser const& rhs) const
  93 +Lin::ObjUser::operator<(ObjUser const& rhs) const
94 94 {
95 95 if (ou_type < rhs.ou_type) {
96 96 return true;
... ... @@ -106,8 +106,8 @@ QPDF::ObjUser::operator&lt;(ObjUser const&amp; rhs) const
106 106 return false;
107 107 }
108 108  
109   -QPDF::UpdateObjectMapsFrame::UpdateObjectMapsFrame(
110   - QPDF::ObjUser const& ou, QPDFObjectHandle oh, bool top) :
  109 +Lin::UpdateObjectMapsFrame::UpdateObjectMapsFrame(
  110 + ObjUser const& ou, QPDFObjectHandle oh, bool top) :
111 111 ou(ou),
112 112 oh(oh),
113 113 top(top)
... ... @@ -137,7 +137,7 @@ Lin::optimize_internal(
137 137 bool allow_changes,
138 138 std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
139 139 {
140   - if (!m->obj_user_to_objects.empty()) {
  140 + if (!obj_user_to_objects_.empty()) {
141 141 // already optimized
142 142 return;
143 143 }
... ... @@ -186,9 +186,9 @@ Lin::optimize_internal(
186 186 }
187 187  
188 188 ObjUser root_ou = ObjUser(ObjUser::ou_root);
189   - auto root_og = QPDFObjGen(root.getObjGen());
190   - m->obj_user_to_objects[root_ou].insert(root_og);
191   - m->object_to_obj_users[root_og].insert(root_ou);
  189 + auto root_og =root.id_gen();
  190 + obj_user_to_objects_[root_ou].insert(root_og);
  191 + object_to_obj_users_[root_og].insert(root_ou);
192 192  
193 193 filterCompressedObjects(object_stream_data);
194 194 }
... ... @@ -217,14 +217,14 @@ Lin::updateObjectMaps(
217 217 }
218 218 }
219 219  
220   - if (cur.oh.isIndirect()) {
  220 + if (cur.oh.indirect()) {
221 221 QPDFObjGen og(cur.oh.getObjGen());
222 222 if (!visited.add(og)) {
223 223 QTC::TC("qpdf", "QPDF opt loop detected");
224 224 continue;
225 225 }
226   - m->obj_user_to_objects[cur.ou].insert(og);
227   - m->object_to_obj_users[og].insert(cur.ou);
  226 + obj_user_to_objects_[cur.ou].insert(og);
  227 + object_to_obj_users_[og].insert(cur.ou);
228 228 }
229 229  
230 230 if (cur.oh.isArray()) {
... ... @@ -280,34 +280,30 @@ Lin::filterCompressedObjects(std::map&lt;int, int&gt; const&amp; object_stream_data)
280 280 std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
281 281 std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;
282 282  
283   - for (auto const& i1: m->obj_user_to_objects) {
284   - ObjUser const& ou = i1.first;
285   - // Loop over objects.
286   - for (auto const& og: i1.second) {
  283 + for (auto const& [ou, ogs]: obj_user_to_objects_) {
  284 + for (auto const& og: ogs) {
287 285 auto i2 = object_stream_data.find(og.getObj());
288 286 if (i2 == object_stream_data.end()) {
289 287 t_obj_user_to_objects[ou].insert(og);
290 288 } else {
291   - t_obj_user_to_objects[ou].insert(QPDFObjGen(i2->second, 0));
  289 + t_obj_user_to_objects[ou].insert({i2->second, 0});
292 290 }
293 291 }
294 292 }
295 293  
296   - for (auto const& i1: m->object_to_obj_users) {
297   - QPDFObjGen const& og = i1.first;
298   - // Loop over obj_users.
299   - for (auto const& ou: i1.second) {
  294 + for (auto const& [og, ous]: object_to_obj_users_) {
  295 + for (auto const& ou: ous) {
300 296 auto i2 = object_stream_data.find(og.getObj());
301 297 if (i2 == object_stream_data.end()) {
302 298 t_object_to_obj_users[og].insert(ou);
303 299 } else {
304   - t_object_to_obj_users[QPDFObjGen(i2->second, 0)].insert(ou);
  300 + t_object_to_obj_users[{i2->second, 0}].insert(ou);
305 301 }
306 302 }
307 303 }
308 304  
309   - m->obj_user_to_objects = t_obj_user_to_objects;
310   - m->object_to_obj_users = t_object_to_obj_users;
  305 + obj_user_to_objects_ = std::move(t_obj_user_to_objects);
  306 + object_to_obj_users_ = std::move(t_object_to_obj_users);
311 307 }
312 308  
313 309 void
... ... @@ -324,10 +320,8 @@ Lin::filterCompressedObjects(QPDFWriter::ObjTable const&amp; obj)
324 320 std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
325 321 std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;
326 322  
327   - for (auto const& i1: m->obj_user_to_objects) {
328   - ObjUser const& ou = i1.first;
329   - // Loop over objects.
330   - for (auto const& og: i1.second) {
  323 + for (auto const& [ou, ogs]: obj_user_to_objects_) {
  324 + for (auto const& og: ogs) {
331 325 if (obj.contains(og)) {
332 326 if (auto const& i2 = obj[og].object_stream; i2 <= 0) {
333 327 t_obj_user_to_objects[ou].insert(og);
... ... @@ -338,22 +332,21 @@ Lin::filterCompressedObjects(QPDFWriter::ObjTable const&amp; obj)
338 332 }
339 333 }
340 334  
341   - for (auto const& i1: m->object_to_obj_users) {
342   - QPDFObjGen const& og = i1.first;
  335 + for (auto const& [og, ous]: object_to_obj_users_) {
343 336 if (obj.contains(og)) {
344 337 // Loop over obj_users.
345   - for (auto const& ou: i1.second) {
  338 + for (auto const& ou: ous) {
346 339 if (auto i2 = obj[og].object_stream; i2 <= 0) {
347 340 t_object_to_obj_users[og].insert(ou);
348 341 } else {
349   - t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou);
  342 + t_object_to_obj_users[{i2, 0}].insert(ou);
350 343 }
351 344 }
352 345 }
353 346 }
354 347  
355   - m->obj_user_to_objects = t_obj_user_to_objects;
356   - m->object_to_obj_users = t_object_to_obj_users;
  348 + obj_user_to_objects_ = std::move(t_obj_user_to_objects);
  349 + object_to_obj_users_ = std::move(t_object_to_obj_users);
357 350 }
358 351  
359 352 void
... ... @@ -793,12 +786,12 @@ qpdf_offset_t
793 786 Lin::maxEnd(ObjUser const& ou)
794 787 {
795 788 no_ci_stop_if(
796   - !m->obj_user_to_objects.contains(ou),
  789 + !obj_user_to_objects_.contains(ou),
797 790 "no entry in object user table for requested object user" //
798 791 );
799 792  
800 793 qpdf_offset_t end = 0;
801   - for (auto const& og: m->obj_user_to_objects[ou]) {
  794 + for (auto const& og: obj_user_to_objects_[ou]) {
802 795 no_ci_stop_if(
803 796 !m->obj_cache.contains(og), "unknown object referenced in object user table" //
804 797 );
... ... @@ -1233,7 +1226,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1233 1226 // actual offsets and lengths are not computed here, but anything related to object ordering is.
1234 1227  
1235 1228 util::assertion(
1236   - !m->object_to_obj_users.empty(),
  1229 + !object_to_obj_users_.empty(),
1237 1230 "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()" //
1238 1231 );
1239 1232 // Note that we can't call optimize here because we don't know whether it should be called
... ... @@ -1325,10 +1318,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1325 1318 std::set<QPDFObjGen> lc_outlines;
1326 1319 std::set<QPDFObjGen> lc_root;
1327 1320  
1328   - for (auto& oiter: m->object_to_obj_users) {
1329   - QPDFObjGen const& og = oiter.first;
1330   - std::set<ObjUser>& ous = oiter.second;
1331   -
  1321 + for (auto& [og, ous]: object_to_obj_users_) {
1332 1322 bool in_open_document = false;
1333 1323 bool in_first_page = false;
1334 1324 int other_pages = 0;
... ... @@ -1500,11 +1490,11 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1500 1490  
1501 1491 ObjUser ou(ObjUser::ou_page, i);
1502 1492 no_ci_stop_if(
1503   - !m->obj_user_to_objects.contains(ou),
  1493 + !obj_user_to_objects_.contains(ou),
1504 1494 "found unreferenced page while calculating linearization data" //
1505 1495 );
1506 1496  
1507   - for (auto const& og: m->obj_user_to_objects[ou]) {
  1497 + for (auto const& og: obj_user_to_objects_[ou]) {
1508 1498 if (lc_other_page_private.erase(og)) {
1509 1499 m->part7.emplace_back(qpdf.getObject(og));
1510 1500 ++m->c_page_offset_data.entries.at(i).nobjects;
... ... @@ -1533,8 +1523,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1533 1523 // we throw all remaining objects in arbitrary order.
1534 1524  
1535 1525 // Place the pages tree.
1536   - std::set<QPDFObjGen> pages_ogs =
1537   - m->obj_user_to_objects[ObjUser(ObjUser::ou_root_key, "/Pages")];
  1526 + auto& pages_ogs = obj_user_to_objects_[{ObjUser::ou_root_key, "/Pages"}];
1538 1527 no_ci_stop_if(
1539 1528 pages_ogs.empty(), "found empty pages tree while calculating linearization data" //
1540 1529 );
... ... @@ -1559,8 +1548,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1559 1548 // there's nothing to prevent it from having been in some set other than
1560 1549 // lc_thumbnail_private.
1561 1550 }
1562   - std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, i)];
1563   - for (auto const& og: ogs) {
  1551 + for (auto const& og: obj_user_to_objects_[{ObjUser::ou_thumb, i}]) {
1564 1552 if (lc_thumbnail_private.erase(og)) {
1565 1553 m->part9.emplace_back(qpdf.getObject(og));
1566 1554 }
... ... @@ -1591,7 +1579,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1591 1579  
1592 1580 size_t num_placed =
1593 1581 m->part4.size() + m->part6.size() + m->part7.size() + m->part8.size() + m->part9.size();
1594   - size_t num_wanted = m->object_to_obj_users.size();
  1582 + size_t num_wanted = object_to_obj_users_.size();
1595 1583 no_ci_stop_if(
1596 1584 // This can happen with damaged files, e.g. if the root is part of the the pages tree.
1597 1585 num_placed != num_wanted,
... ... @@ -1642,12 +1630,12 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1642 1630 CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i);
1643 1631 ObjUser ou(ObjUser::ou_page, i);
1644 1632 no_ci_stop_if(
1645   - !m->obj_user_to_objects.contains(ou),
  1633 + !obj_user_to_objects_.contains(ou),
1646 1634 "found unreferenced page while calculating linearization data" //
1647 1635 );
1648 1636  
1649   - for (auto const& og: m->obj_user_to_objects[ou]) {
1650   - if ((m->object_to_obj_users[og].size() > 1) && (obj_to_index.contains(og.getObj()))) {
  1637 + for (auto const& og: obj_user_to_objects_[ou]) {
  1638 + if (object_to_obj_users_[og].size() > 1 && obj_to_index.contains(og.getObj())) {
1651 1639 int idx = obj_to_index[og.getObj()];
1652 1640 ++pe.nshared_objects;
1653 1641 pe.shared_identifiers.push_back(idx);
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -364,40 +364,6 @@ struct QPDF::CHSharedObject
364 364  
365 365 // No need for CHGeneric -- HGeneric is fine as is.
366 366  
367   -// Data structures to support optimization -- implemented in QPDF_optimization.cc
368   -
369   -class QPDF::ObjUser
370   -{
371   - public:
372   - enum user_e { ou_page = 1, ou_thumb, ou_trailer_key, ou_root_key, ou_root };
373   -
374   - ObjUser() = delete;
375   -
376   - // type must be ou_root
377   - ObjUser(user_e type);
378   -
379   - // type must be one of ou_page or ou_thumb
380   - ObjUser(user_e type, size_t pageno);
381   -
382   - // type must be one of ou_trailer_key or ou_root_key
383   - ObjUser(user_e type, std::string const& key);
384   -
385   - bool operator<(ObjUser const&) const;
386   -
387   - user_e ou_type;
388   - size_t pageno{0}; // if ou_page;
389   - std::string key; // if ou_trailer_key or ou_root_key
390   -};
391   -
392   -struct QPDF::UpdateObjectMapsFrame
393   -{
394   - UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top);
395   -
396   - ObjUser const& ou;
397   - QPDFObjectHandle oh;
398   - bool top;
399   -};
400   -
401 367 class QPDF::PatternFinder final: public InputSource::Finder
402 368 {
403 369 public:
... ... @@ -743,6 +709,40 @@ class QPDF::Doc::Linearization: Common
743 709 bool compressed);
744 710  
745 711 private:
  712 + // Data structures to support optimization -- implemented in QPDF_optimization.cc
  713 +
  714 + class ObjUser
  715 + {
  716 + public:
  717 + enum user_e { ou_page = 1, ou_thumb, ou_trailer_key, ou_root_key, ou_root };
  718 +
  719 + ObjUser() = delete;
  720 +
  721 + // type must be ou_root
  722 + ObjUser(user_e type);
  723 +
  724 + // type must be one of ou_page or ou_thumb
  725 + ObjUser(user_e type, size_t pageno);
  726 +
  727 + // type must be one of ou_trailer_key or ou_root_key
  728 + ObjUser(user_e type, std::string const& key);
  729 +
  730 + bool operator<(ObjUser const&) const;
  731 +
  732 + user_e ou_type;
  733 + size_t pageno{0}; // if ou_page;
  734 + std::string key; // if ou_trailer_key or ou_root_key
  735 + };
  736 +
  737 + struct UpdateObjectMapsFrame
  738 + {
  739 + UpdateObjectMapsFrame(ObjUser const& ou, QPDFObjectHandle oh, bool top) ;
  740 +
  741 + ObjUser const& ou;
  742 + QPDFObjectHandle oh;
  743 + bool top;
  744 + };
  745 +
746 746 // methods to support linearization checking -- implemented in QPDF_linearization.cc
747 747  
748 748 void readLinearizationData();
... ... @@ -797,6 +797,10 @@ class QPDF::Doc::Linearization: Common
797 797 std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
798 798 void filterCompressedObjects(std::map<int, int> const& object_stream_data);
799 799 void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
  800 +
  801 + // Optimization data
  802 + std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects_;
  803 + std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users_;
800 804 };
801 805  
802 806 class QPDF::Doc::Objects: Common
... ... @@ -1169,10 +1173,6 @@ class QPDF::Members: Doc
1169 1173 std::vector<QPDFObjectHandle> part7;
1170 1174 std::vector<QPDFObjectHandle> part8;
1171 1175 std::vector<QPDFObjectHandle> part9;
1172   -
1173   - // Optimization data
1174   - std::map<ObjUser, std::set<QPDFObjGen>> obj_user_to_objects;
1175   - std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;
1176 1176 };
1177 1177  
1178 1178 // The Resolver class is restricted to QPDFObject and BaseHandle so that only it can resolve
... ...