Commit e7a1009d6a0561547f6f3de1321369cb8dc90bd1

Authored by m-holger
1 parent dd3de389

Refactor linearization data: encapsulate part vectors and warnings in `Lin`, upd…

…ate usage across `QPDF_linearization`, and streamline related logic.
libqpdf/QPDF_linearization.cc
@@ -352,7 +352,7 @@ Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj) @@ -352,7 +352,7 @@ Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj)
352 void 352 void
353 Lin::linearizationWarning(std::string_view msg) 353 Lin::linearizationWarning(std::string_view msg)
354 { 354 {
355 - m->linearization_warnings = true; 355 + linearization_warnings_ = true;
356 warn(qpdf_e_linearization, "", 0, std::string(msg)); 356 warn(qpdf_e_linearization, "", 0, std::string(msg));
357 } 357 }
358 358
@@ -368,7 +368,7 @@ Lin::check() @@ -368,7 +368,7 @@ Lin::check()
368 try { 368 try {
369 readLinearizationData(); 369 readLinearizationData();
370 checkLinearizationInternal(); 370 checkLinearizationInternal();
371 - return !m->linearization_warnings; 371 + return !linearization_warnings_;
372 } catch (std::runtime_error& e) { 372 } catch (std::runtime_error& e) {
373 linearizationWarning( 373 linearizationWarning(
374 "error encountered while checking linearization data: " + std::string(e.what())); 374 "error encountered while checking linearization data: " + std::string(e.what()));
@@ -756,11 +756,11 @@ Lin::checkLinearizationInternal() @@ -756,11 +756,11 @@ Lin::checkLinearizationInternal()
756 // suite doesn't contain any files with threads. 756 // suite doesn't contain any files with threads.
757 757
758 no_ci_stop_if( 758 no_ci_stop_if(
759 - m->part6.empty(), "linearization part 6 unexpectedly empty" // 759 + part6_.empty(), "linearization part 6 unexpectedly empty" //
760 ); 760 );
761 qpdf_offset_t min_E = -1; 761 qpdf_offset_t min_E = -1;
762 qpdf_offset_t max_E = -1; 762 qpdf_offset_t max_E = -1;
763 - for (auto const& oh: m->part6) { 763 + for (auto const& oh: part6_) {
764 QPDFObjGen og(oh.getObjGen()); 764 QPDFObjGen og(oh.getObjGen());
765 // All objects have to have been dereferenced to be classified. 765 // All objects have to have been dereferenced to be classified.
766 util::assertion(m->obj_cache.contains(og), "linearization part6 object not in cache"); 766 util::assertion(m->obj_cache.contains(og), "linearization part6 object not in cache");
@@ -993,10 +993,10 @@ Lin::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int @@ -993,10 +993,10 @@ Lin::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int
993 for (int i = 0; i < so.nshared_total; ++i) { 993 for (int i = 0; i < so.nshared_total; ++i) {
994 if (i == so.nshared_first_page) { 994 if (i == so.nshared_first_page) {
995 QTC::TC("qpdf", "QPDF lin check shared past first page"); 995 QTC::TC("qpdf", "QPDF lin check shared past first page");
996 - if (m->part8.empty()) { 996 + if (part8_.empty()) {
997 linearizationWarning("part 8 is empty but nshared_total > nshared_first_page"); 997 linearizationWarning("part 8 is empty but nshared_total > nshared_first_page");
998 } else { 998 } else {
999 - int obj = m->part8.at(0).getObjectID(); 999 + int obj = part8_.at(0).getObjectID();
1000 if (obj != so.first_shared_obj) { 1000 if (obj != so.first_shared_obj) {
1001 linearizationWarning( 1001 linearizationWarning(
1002 "first shared object number mismatch: hint table = " + 1002 "first shared object number mismatch: hint table = " +
@@ -1275,11 +1275,11 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1275,11 +1275,11 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1275 1275
1276 // * outlines: part 6 or 9 1276 // * outlines: part 6 or 9
1277 1277
1278 - m->part4.clear();  
1279 - m->part6.clear();  
1280 - m->part7.clear();  
1281 - m->part8.clear();  
1282 - m->part9.clear(); 1278 + part4_.clear();
  1279 + part6_.clear();
  1280 + part7_.clear();
  1281 + part8_.clear();
  1282 + part9_.clear();
1283 c_linp_ = LinParameters(); 1283 c_linp_ = LinParameters();
1284 c_page_offset_data_ = CHPageOffset(); 1284 c_page_offset_data_ = CHPageOffset();
1285 c_shared_object_data_ = CHSharedObject(); 1285 c_shared_object_data_ = CHSharedObject();
@@ -1426,9 +1426,9 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1426,9 +1426,9 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1426 lc_root.size() != 1, "found other than one root while calculating linearization data" // 1426 lc_root.size() != 1, "found other than one root while calculating linearization data" //
1427 ); 1427 );
1428 1428
1429 - m->part4.emplace_back(qpdf.getObject(*(lc_root.begin()))); 1429 + part4_.emplace_back(qpdf.getObject(*(lc_root.begin())));
1430 for (auto const& og: lc_open_document) { 1430 for (auto const& og: lc_open_document) {
1431 - m->part4.emplace_back(qpdf.getObject(og)); 1431 + part4_.emplace_back(qpdf.getObject(og));
1432 } 1432 }
1433 1433
1434 // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats 1434 // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats
@@ -1444,30 +1444,30 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1444,30 +1444,30 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1444 !lc_first_page_private.erase(first_page_og), "unable to linearize first page" // 1444 !lc_first_page_private.erase(first_page_og), "unable to linearize first page" //
1445 ); 1445 );
1446 c_linp_.first_page_object = uc_pages.at(0).getObjectID(); 1446 c_linp_.first_page_object = uc_pages.at(0).getObjectID();
1447 - m->part6.emplace_back(uc_pages.at(0)); 1447 + part6_.emplace_back(uc_pages.at(0));
1448 1448
1449 // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard 1449 // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard
1450 // it except to the extent that it groups private and shared objects contiguously for the sake 1450 // it except to the extent that it groups private and shared objects contiguously for the sake
1451 // of hint tables. 1451 // of hint tables.
1452 1452
1453 for (auto const& og: lc_first_page_private) { 1453 for (auto const& og: lc_first_page_private) {
1454 - m->part6.emplace_back(qpdf.getObject(og)); 1454 + part6_.emplace_back(qpdf.getObject(og));
1455 } 1455 }
1456 1456
1457 for (auto const& og: lc_first_page_shared) { 1457 for (auto const& og: lc_first_page_shared) {
1458 - m->part6.emplace_back(qpdf.getObject(og)); 1458 + part6_.emplace_back(qpdf.getObject(og));
1459 } 1459 }
1460 1460
1461 // Place the outline dictionary if it goes in the first page section. 1461 // Place the outline dictionary if it goes in the first page section.
1462 if (outlines_in_first_page) { 1462 if (outlines_in_first_page) {
1463 - pushOutlinesToPart(m->part6, lc_outlines, object_stream_data); 1463 + pushOutlinesToPart(part6_, lc_outlines, object_stream_data);
1464 } 1464 }
1465 1465
1466 // Fill in page offset hint table information for the first page. The PDF spec says that 1466 // Fill in page offset hint table information for the first page. The PDF spec says that
1467 // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but 1467 // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but
1468 // it fills in garbage values for all the shared object identifiers on the first page. 1468 // it fills in garbage values for all the shared object identifiers on the first page.
1469 1469
1470 - c_page_offset_data_.entries.at(0).nobjects = toI(m->part6.size()); 1470 + c_page_offset_data_.entries.at(0).nobjects = toI(part6_.size());
1471 1471
1472 // Part 7: other pages' private objects 1472 // Part 7: other pages' private objects
1473 1473
@@ -1481,7 +1481,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1481,7 +1481,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1481 "unable to linearize page " + std::to_string(i) // 1481 "unable to linearize page " + std::to_string(i) //
1482 ); 1482 );
1483 1483
1484 - m->part7.emplace_back(uc_pages.at(i)); 1484 + part7_.emplace_back(uc_pages.at(i));
1485 1485
1486 // Place all non-shared objects referenced by this page, updating the page object count for 1486 // Place all non-shared objects referenced by this page, updating the page object count for
1487 // the hint table. 1487 // the hint table.
@@ -1496,7 +1496,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1496,7 +1496,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1496 1496
1497 for (auto const& og: obj_user_to_objects_[ou]) { 1497 for (auto const& og: obj_user_to_objects_[ou]) {
1498 if (lc_other_page_private.erase(og)) { 1498 if (lc_other_page_private.erase(og)) {
1499 - m->part7.emplace_back(qpdf.getObject(og)); 1499 + part7_.emplace_back(qpdf.getObject(og));
1500 ++c_page_offset_data_.entries.at(i).nobjects; 1500 ++c_page_offset_data_.entries.at(i).nobjects;
1501 } 1501 }
1502 } 1502 }
@@ -1512,7 +1512,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1512,7 +1512,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1512 1512
1513 // Order is unimportant. 1513 // Order is unimportant.
1514 for (auto const& og: lc_other_page_shared) { 1514 for (auto const& og: lc_other_page_shared) {
1515 - m->part8.emplace_back(qpdf.getObject(og)); 1515 + part8_.emplace_back(qpdf.getObject(og));
1516 } 1516 }
1517 1517
1518 // Part 9: other objects 1518 // Part 9: other objects
@@ -1529,7 +1529,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1529,7 +1529,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1529 ); 1529 );
1530 for (auto const& og: pages_ogs) { 1530 for (auto const& og: pages_ogs) {
1531 if (lc_other.erase(og)) { 1531 if (lc_other.erase(og)) {
1532 - m->part9.emplace_back(qpdf.getObject(og)); 1532 + part9_.emplace_back(qpdf.getObject(og));
1533 } 1533 }
1534 } 1534 }
1535 1535
@@ -1541,7 +1541,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1541,7 +1541,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1541 QPDFObjGen thumb_og(thumb.getObjGen()); 1541 QPDFObjGen thumb_og(thumb.getObjGen());
1542 // Output the thumbnail itself 1542 // Output the thumbnail itself
1543 if (lc_thumbnail_private.erase(thumb_og) && !thumb.null()) { 1543 if (lc_thumbnail_private.erase(thumb_og) && !thumb.null()) {
1544 - m->part9.emplace_back(thumb); 1544 + part9_.emplace_back(thumb);
1545 } else { 1545 } else {
1546 // No internal error this time...there's nothing to stop this object from having 1546 // No internal error this time...there's nothing to stop this object from having
1547 // been referred to somewhere else outside of a page's /Thumb, and if it had been, 1547 // been referred to somewhere else outside of a page's /Thumb, and if it had been,
@@ -1550,7 +1550,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1550,7 +1550,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1550 } 1550 }
1551 for (auto const& og: obj_user_to_objects_[{ObjUser::ou_thumb, i}]) { 1551 for (auto const& og: obj_user_to_objects_[{ObjUser::ou_thumb, i}]) {
1552 if (lc_thumbnail_private.erase(og)) { 1552 if (lc_thumbnail_private.erase(og)) {
1553 - m->part9.emplace_back(qpdf.getObject(og)); 1553 + part9_.emplace_back(qpdf.getObject(og));
1554 } 1554 }
1555 } 1555 }
1556 } 1556 }
@@ -1562,23 +1562,23 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1562,23 +1562,23 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1562 1562
1563 // Place shared thumbnail objects 1563 // Place shared thumbnail objects
1564 for (auto const& og: lc_thumbnail_shared) { 1564 for (auto const& og: lc_thumbnail_shared) {
1565 - m->part9.emplace_back(qpdf.getObject(og)); 1565 + part9_.emplace_back(qpdf.getObject(og));
1566 } 1566 }
1567 1567
1568 // Place outlines unless in first page 1568 // Place outlines unless in first page
1569 if (!outlines_in_first_page) { 1569 if (!outlines_in_first_page) {
1570 - pushOutlinesToPart(m->part9, lc_outlines, object_stream_data); 1570 + pushOutlinesToPart(part9_, lc_outlines, object_stream_data);
1571 } 1571 }
1572 1572
1573 // Place all remaining objects 1573 // Place all remaining objects
1574 for (auto const& og: lc_other) { 1574 for (auto const& og: lc_other) {
1575 - m->part9.emplace_back(qpdf.getObject(og)); 1575 + part9_.emplace_back(qpdf.getObject(og));
1576 } 1576 }
1577 1577
1578 // Make sure we got everything exactly once. 1578 // Make sure we got everything exactly once.
1579 1579
1580 size_t num_placed = 1580 size_t num_placed =
1581 - m->part4.size() + m->part6.size() + m->part7.size() + m->part8.size() + m->part9.size(); 1581 + part4_.size() + part6_.size() + part7_.size() + part8_.size() + part9_.size();
1582 size_t num_wanted = object_to_obj_users_.size(); 1582 size_t num_wanted = object_to_obj_users_.size();
1583 no_ci_stop_if( 1583 no_ci_stop_if(
1584 // This can happen with damaged files, e.g. if the root is part of the the pages tree. 1584 // This can happen with damaged files, e.g. if the root is part of the the pages tree.
@@ -1599,20 +1599,20 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data) @@ -1599,20 +1599,20 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1599 // only without regards to generation. 1599 // only without regards to generation.
1600 std::map<int, int> obj_to_index; 1600 std::map<int, int> obj_to_index;
1601 1601
1602 - c_shared_object_data_.nshared_first_page = toI(m->part6.size()); 1602 + c_shared_object_data_.nshared_first_page = toI(part6_.size());
1603 c_shared_object_data_.nshared_total = 1603 c_shared_object_data_.nshared_total =
1604 - c_shared_object_data_.nshared_first_page + toI(m->part8.size()); 1604 + c_shared_object_data_.nshared_first_page + toI(part8_.size());
1605 1605
1606 std::vector<CHSharedObjectEntry>& shared = c_shared_object_data_.entries; 1606 std::vector<CHSharedObjectEntry>& shared = c_shared_object_data_.entries;
1607 - for (auto& oh: m->part6) { 1607 + for (auto& oh: part6_) {
1608 int obj = oh.getObjectID(); 1608 int obj = oh.getObjectID();
1609 obj_to_index[obj] = toI(shared.size()); 1609 obj_to_index[obj] = toI(shared.size());
1610 shared.emplace_back(obj); 1610 shared.emplace_back(obj);
1611 } 1611 }
1612 - QTC::TC("qpdf", "QPDF lin part 8 empty", m->part8.empty() ? 1 : 0);  
1613 - if (!m->part8.empty()) {  
1614 - c_shared_object_data_.first_shared_obj = m->part8.at(0).getObjectID();  
1615 - for (auto& oh: m->part8) { 1612 + QTC::TC("qpdf", "QPDF lin part 8 empty", part8_.empty() ? 1 : 0);
  1613 + if (!part8_.empty()) {
  1614 + c_shared_object_data_.first_shared_obj = part8_.at(0).getObjectID();
  1615 + for (auto& oh: part8_) {
1616 int obj = oh.getObjectID(); 1616 int obj = oh.getObjectID();
1617 obj_to_index[obj] = toI(shared.size()); 1617 obj_to_index[obj] = toI(shared.size());
1618 shared.emplace_back(obj); 1618 shared.emplace_back(obj);
@@ -1661,9 +1661,9 @@ Lin::pushOutlinesToPart( @@ -1661,9 +1661,9 @@ Lin::pushOutlinesToPart(
1661 QTC::TC( 1661 QTC::TC(
1662 "qpdf", 1662 "qpdf",
1663 "QPDF lin outlines in part", 1663 "QPDF lin outlines in part",
1664 - &part == &m->part6 ? 0  
1665 - : (&part == &m->part9) ? 1  
1666 - : 9999); // can't happen 1664 + &part == &part6_ ? 0
  1665 + : (&part == &part9_) ? 1
  1666 + : 9999); // can't happen
1667 if (lc_outlines.erase(outlines_og)) { 1667 if (lc_outlines.erase(outlines_og)) {
1668 // Make sure outlines is in lc_outlines in case the file is damaged. in which case it may be 1668 // Make sure outlines is in lc_outlines in case the file is damaged. in which case it may be
1669 // included in an earlier part. 1669 // included in an earlier part.
@@ -1690,11 +1690,11 @@ Lin::getLinearizedParts( @@ -1690,11 +1690,11 @@ Lin::getLinearizedParts(
1690 std::vector<QPDFObjectHandle>& part9) 1690 std::vector<QPDFObjectHandle>& part9)
1691 { 1691 {
1692 calculateLinearizationData(obj); 1692 calculateLinearizationData(obj);
1693 - part4 = m->part4;  
1694 - part6 = m->part6;  
1695 - part7 = m->part7;  
1696 - part8 = m->part8;  
1697 - part9 = m->part9; 1693 + part4 = part4_;
  1694 + part6 = part6_;
  1695 + part7 = part7_;
  1696 + part8 = part8_;
  1697 + part9 = part9_;
1698 } 1698 }
1699 1699
1700 static inline int 1700 static inline int
libqpdf/qpdf/QPDF_private.hh
@@ -804,6 +804,7 @@ class QPDF::Doc::Linearization: Common @@ -804,6 +804,7 @@ class QPDF::Doc::Linearization: Common
804 std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users_; 804 std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users_;
805 805
806 // Linearization data 806 // Linearization data
  807 + bool linearization_warnings_{false}; // set by linearizationWarning, used by checkLinearization
807 808
808 // Linearization parameter dictionary and hint table data: may be read from file or computed 809 // Linearization parameter dictionary and hint table data: may be read from file or computed
809 // prior to writing a linearized file 810 // prior to writing a linearized file
@@ -819,6 +820,14 @@ class QPDF::Doc::Linearization: Common @@ -819,6 +820,14 @@ class QPDF::Doc::Linearization: Common
819 CHPageOffset c_page_offset_data_; 820 CHPageOffset c_page_offset_data_;
820 CHSharedObject c_shared_object_data_; 821 CHSharedObject c_shared_object_data_;
821 HGeneric c_outline_data_; 822 HGeneric c_outline_data_;
  823 +
  824 + // Object ordering data for linearized files: initialized by calculateLinearizationData().
  825 + // Part numbers refer to the PDF 1.4 specification.
  826 + std::vector<QPDFObjectHandle> part4_;
  827 + std::vector<QPDFObjectHandle> part6_;
  828 + std::vector<QPDFObjectHandle> part7_;
  829 + std::vector<QPDFObjectHandle> part8_;
  830 + std::vector<QPDFObjectHandle> part9_;
822 }; 831 };
823 832
824 class QPDF::Doc::Objects: Common 833 class QPDF::Doc::Objects: Common
@@ -1167,15 +1176,6 @@ class QPDF::Members: Doc @@ -1167,15 +1176,6 @@ class QPDF::Members: Doc
1167 // Linearization data 1176 // Linearization data
1168 qpdf_offset_t first_xref_item_offset{0}; // actual value from file 1177 qpdf_offset_t first_xref_item_offset{0}; // actual value from file
1169 bool uncompressed_after_compressed{false}; 1178 bool uncompressed_after_compressed{false};
1170 - bool linearization_warnings{false}; // set by linearizationWarning, used by checkLinearization  
1171 -  
1172 - // Object ordering data for linearized files: initialized by calculateLinearizationData().  
1173 - // Part numbers refer to the PDF 1.4 specification.  
1174 - std::vector<QPDFObjectHandle> part4;  
1175 - std::vector<QPDFObjectHandle> part6;  
1176 - std::vector<QPDFObjectHandle> part7;  
1177 - std::vector<QPDFObjectHandle> part8;  
1178 - std::vector<QPDFObjectHandle> part9;  
1179 }; 1179 };
1180 1180
1181 // The Resolver class is restricted to QPDFObject and BaseHandle so that only it can resolve 1181 // The Resolver class is restricted to QPDFObject and BaseHandle so that only it can resolve