Commit e7a1009d6a0561547f6f3de1321369cb8dc90bd1

Authored by m-holger
1 parent dd3de389

Refactor linearization data: encapsulate part vectors and warnings in `Lin`, upd…

…ate usage across `QPDF_linearization`, and streamline related logic.
libqpdf/QPDF_linearization.cc
... ... @@ -352,7 +352,7 @@ Lin::filterCompressedObjects(QPDFWriter::ObjTable const& obj)
352 352 void
353 353 Lin::linearizationWarning(std::string_view msg)
354 354 {
355   - m->linearization_warnings = true;
  355 + linearization_warnings_ = true;
356 356 warn(qpdf_e_linearization, "", 0, std::string(msg));
357 357 }
358 358  
... ... @@ -368,7 +368,7 @@ Lin::check()
368 368 try {
369 369 readLinearizationData();
370 370 checkLinearizationInternal();
371   - return !m->linearization_warnings;
  371 + return !linearization_warnings_;
372 372 } catch (std::runtime_error& e) {
373 373 linearizationWarning(
374 374 "error encountered while checking linearization data: " + std::string(e.what()));
... ... @@ -756,11 +756,11 @@ Lin::checkLinearizationInternal()
756 756 // suite doesn't contain any files with threads.
757 757  
758 758 no_ci_stop_if(
759   - m->part6.empty(), "linearization part 6 unexpectedly empty" //
  759 + part6_.empty(), "linearization part 6 unexpectedly empty" //
760 760 );
761 761 qpdf_offset_t min_E = -1;
762 762 qpdf_offset_t max_E = -1;
763   - for (auto const& oh: m->part6) {
  763 + for (auto const& oh: part6_) {
764 764 QPDFObjGen og(oh.getObjGen());
765 765 // All objects have to have been dereferenced to be classified.
766 766 util::assertion(m->obj_cache.contains(og), "linearization part6 object not in cache");
... ... @@ -993,10 +993,10 @@ Lin::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int
993 993 for (int i = 0; i < so.nshared_total; ++i) {
994 994 if (i == so.nshared_first_page) {
995 995 QTC::TC("qpdf", "QPDF lin check shared past first page");
996   - if (m->part8.empty()) {
  996 + if (part8_.empty()) {
997 997 linearizationWarning("part 8 is empty but nshared_total > nshared_first_page");
998 998 } else {
999   - int obj = m->part8.at(0).getObjectID();
  999 + int obj = part8_.at(0).getObjectID();
1000 1000 if (obj != so.first_shared_obj) {
1001 1001 linearizationWarning(
1002 1002 "first shared object number mismatch: hint table = " +
... ... @@ -1275,11 +1275,11 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1275 1275  
1276 1276 // * outlines: part 6 or 9
1277 1277  
1278   - m->part4.clear();
1279   - m->part6.clear();
1280   - m->part7.clear();
1281   - m->part8.clear();
1282   - m->part9.clear();
  1278 + part4_.clear();
  1279 + part6_.clear();
  1280 + part7_.clear();
  1281 + part8_.clear();
  1282 + part9_.clear();
1283 1283 c_linp_ = LinParameters();
1284 1284 c_page_offset_data_ = CHPageOffset();
1285 1285 c_shared_object_data_ = CHSharedObject();
... ... @@ -1426,9 +1426,9 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1426 1426 lc_root.size() != 1, "found other than one root while calculating linearization data" //
1427 1427 );
1428 1428  
1429   - m->part4.emplace_back(qpdf.getObject(*(lc_root.begin())));
  1429 + part4_.emplace_back(qpdf.getObject(*(lc_root.begin())));
1430 1430 for (auto const& og: lc_open_document) {
1431   - m->part4.emplace_back(qpdf.getObject(og));
  1431 + part4_.emplace_back(qpdf.getObject(og));
1432 1432 }
1433 1433  
1434 1434 // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats
... ... @@ -1444,30 +1444,30 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1444 1444 !lc_first_page_private.erase(first_page_og), "unable to linearize first page" //
1445 1445 );
1446 1446 c_linp_.first_page_object = uc_pages.at(0).getObjectID();
1447   - m->part6.emplace_back(uc_pages.at(0));
  1447 + part6_.emplace_back(uc_pages.at(0));
1448 1448  
1449 1449 // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard
1450 1450 // it except to the extent that it groups private and shared objects contiguously for the sake
1451 1451 // of hint tables.
1452 1452  
1453 1453 for (auto const& og: lc_first_page_private) {
1454   - m->part6.emplace_back(qpdf.getObject(og));
  1454 + part6_.emplace_back(qpdf.getObject(og));
1455 1455 }
1456 1456  
1457 1457 for (auto const& og: lc_first_page_shared) {
1458   - m->part6.emplace_back(qpdf.getObject(og));
  1458 + part6_.emplace_back(qpdf.getObject(og));
1459 1459 }
1460 1460  
1461 1461 // Place the outline dictionary if it goes in the first page section.
1462 1462 if (outlines_in_first_page) {
1463   - pushOutlinesToPart(m->part6, lc_outlines, object_stream_data);
  1463 + pushOutlinesToPart(part6_, lc_outlines, object_stream_data);
1464 1464 }
1465 1465  
1466 1466 // Fill in page offset hint table information for the first page. The PDF spec says that
1467 1467 // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but
1468 1468 // it fills in garbage values for all the shared object identifiers on the first page.
1469 1469  
1470   - c_page_offset_data_.entries.at(0).nobjects = toI(m->part6.size());
  1470 + c_page_offset_data_.entries.at(0).nobjects = toI(part6_.size());
1471 1471  
1472 1472 // Part 7: other pages' private objects
1473 1473  
... ... @@ -1481,7 +1481,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1481 1481 "unable to linearize page " + std::to_string(i) //
1482 1482 );
1483 1483  
1484   - m->part7.emplace_back(uc_pages.at(i));
  1484 + part7_.emplace_back(uc_pages.at(i));
1485 1485  
1486 1486 // Place all non-shared objects referenced by this page, updating the page object count for
1487 1487 // the hint table.
... ... @@ -1496,7 +1496,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1496 1496  
1497 1497 for (auto const& og: obj_user_to_objects_[ou]) {
1498 1498 if (lc_other_page_private.erase(og)) {
1499   - m->part7.emplace_back(qpdf.getObject(og));
  1499 + part7_.emplace_back(qpdf.getObject(og));
1500 1500 ++c_page_offset_data_.entries.at(i).nobjects;
1501 1501 }
1502 1502 }
... ... @@ -1512,7 +1512,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1512 1512  
1513 1513 // Order is unimportant.
1514 1514 for (auto const& og: lc_other_page_shared) {
1515   - m->part8.emplace_back(qpdf.getObject(og));
  1515 + part8_.emplace_back(qpdf.getObject(og));
1516 1516 }
1517 1517  
1518 1518 // Part 9: other objects
... ... @@ -1529,7 +1529,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1529 1529 );
1530 1530 for (auto const& og: pages_ogs) {
1531 1531 if (lc_other.erase(og)) {
1532   - m->part9.emplace_back(qpdf.getObject(og));
  1532 + part9_.emplace_back(qpdf.getObject(og));
1533 1533 }
1534 1534 }
1535 1535  
... ... @@ -1541,7 +1541,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1541 1541 QPDFObjGen thumb_og(thumb.getObjGen());
1542 1542 // Output the thumbnail itself
1543 1543 if (lc_thumbnail_private.erase(thumb_og) && !thumb.null()) {
1544   - m->part9.emplace_back(thumb);
  1544 + part9_.emplace_back(thumb);
1545 1545 } else {
1546 1546 // No internal error this time...there's nothing to stop this object from having
1547 1547 // been referred to somewhere else outside of a page's /Thumb, and if it had been,
... ... @@ -1550,7 +1550,7 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1550 1550 }
1551 1551 for (auto const& og: obj_user_to_objects_[{ObjUser::ou_thumb, i}]) {
1552 1552 if (lc_thumbnail_private.erase(og)) {
1553   - m->part9.emplace_back(qpdf.getObject(og));
  1553 + part9_.emplace_back(qpdf.getObject(og));
1554 1554 }
1555 1555 }
1556 1556 }
... ... @@ -1562,23 +1562,23 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1562 1562  
1563 1563 // Place shared thumbnail objects
1564 1564 for (auto const& og: lc_thumbnail_shared) {
1565   - m->part9.emplace_back(qpdf.getObject(og));
  1565 + part9_.emplace_back(qpdf.getObject(og));
1566 1566 }
1567 1567  
1568 1568 // Place outlines unless in first page
1569 1569 if (!outlines_in_first_page) {
1570   - pushOutlinesToPart(m->part9, lc_outlines, object_stream_data);
  1570 + pushOutlinesToPart(part9_, lc_outlines, object_stream_data);
1571 1571 }
1572 1572  
1573 1573 // Place all remaining objects
1574 1574 for (auto const& og: lc_other) {
1575   - m->part9.emplace_back(qpdf.getObject(og));
  1575 + part9_.emplace_back(qpdf.getObject(og));
1576 1576 }
1577 1577  
1578 1578 // Make sure we got everything exactly once.
1579 1579  
1580 1580 size_t num_placed =
1581   - m->part4.size() + m->part6.size() + m->part7.size() + m->part8.size() + m->part9.size();
  1581 + part4_.size() + part6_.size() + part7_.size() + part8_.size() + part9_.size();
1582 1582 size_t num_wanted = object_to_obj_users_.size();
1583 1583 no_ci_stop_if(
1584 1584 // This can happen with damaged files, e.g. if the root is part of the the pages tree.
... ... @@ -1599,20 +1599,20 @@ Lin::calculateLinearizationData(T const&amp; object_stream_data)
1599 1599 // only without regards to generation.
1600 1600 std::map<int, int> obj_to_index;
1601 1601  
1602   - c_shared_object_data_.nshared_first_page = toI(m->part6.size());
  1602 + c_shared_object_data_.nshared_first_page = toI(part6_.size());
1603 1603 c_shared_object_data_.nshared_total =
1604   - c_shared_object_data_.nshared_first_page + toI(m->part8.size());
  1604 + c_shared_object_data_.nshared_first_page + toI(part8_.size());
1605 1605  
1606 1606 std::vector<CHSharedObjectEntry>& shared = c_shared_object_data_.entries;
1607   - for (auto& oh: m->part6) {
  1607 + for (auto& oh: part6_) {
1608 1608 int obj = oh.getObjectID();
1609 1609 obj_to_index[obj] = toI(shared.size());
1610 1610 shared.emplace_back(obj);
1611 1611 }
1612   - QTC::TC("qpdf", "QPDF lin part 8 empty", m->part8.empty() ? 1 : 0);
1613   - if (!m->part8.empty()) {
1614   - c_shared_object_data_.first_shared_obj = m->part8.at(0).getObjectID();
1615   - for (auto& oh: m->part8) {
  1612 + QTC::TC("qpdf", "QPDF lin part 8 empty", part8_.empty() ? 1 : 0);
  1613 + if (!part8_.empty()) {
  1614 + c_shared_object_data_.first_shared_obj = part8_.at(0).getObjectID();
  1615 + for (auto& oh: part8_) {
1616 1616 int obj = oh.getObjectID();
1617 1617 obj_to_index[obj] = toI(shared.size());
1618 1618 shared.emplace_back(obj);
... ... @@ -1661,9 +1661,9 @@ Lin::pushOutlinesToPart(
1661 1661 QTC::TC(
1662 1662 "qpdf",
1663 1663 "QPDF lin outlines in part",
1664   - &part == &m->part6 ? 0
1665   - : (&part == &m->part9) ? 1
1666   - : 9999); // can't happen
  1664 + &part == &part6_ ? 0
  1665 + : (&part == &part9_) ? 1
  1666 + : 9999); // can't happen
1667 1667 if (lc_outlines.erase(outlines_og)) {
1668 1668 // Make sure outlines is in lc_outlines in case the file is damaged. in which case it may be
1669 1669 // included in an earlier part.
... ... @@ -1690,11 +1690,11 @@ Lin::getLinearizedParts(
1690 1690 std::vector<QPDFObjectHandle>& part9)
1691 1691 {
1692 1692 calculateLinearizationData(obj);
1693   - part4 = m->part4;
1694   - part6 = m->part6;
1695   - part7 = m->part7;
1696   - part8 = m->part8;
1697   - part9 = m->part9;
  1693 + part4 = part4_;
  1694 + part6 = part6_;
  1695 + part7 = part7_;
  1696 + part8 = part8_;
  1697 + part9 = part9_;
1698 1698 }
1699 1699  
1700 1700 static inline int
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -804,6 +804,7 @@ class QPDF::Doc::Linearization: Common
804 804 std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users_;
805 805  
806 806 // Linearization data
  807 + bool linearization_warnings_{false}; // set by linearizationWarning, used by checkLinearization
807 808  
808 809 // Linearization parameter dictionary and hint table data: may be read from file or computed
809 810 // prior to writing a linearized file
... ... @@ -819,6 +820,14 @@ class QPDF::Doc::Linearization: Common
819 820 CHPageOffset c_page_offset_data_;
820 821 CHSharedObject c_shared_object_data_;
821 822 HGeneric c_outline_data_;
  823 +
  824 + // Object ordering data for linearized files: initialized by calculateLinearizationData().
  825 + // Part numbers refer to the PDF 1.4 specification.
  826 + std::vector<QPDFObjectHandle> part4_;
  827 + std::vector<QPDFObjectHandle> part6_;
  828 + std::vector<QPDFObjectHandle> part7_;
  829 + std::vector<QPDFObjectHandle> part8_;
  830 + std::vector<QPDFObjectHandle> part9_;
822 831 };
823 832  
824 833 class QPDF::Doc::Objects: Common
... ... @@ -1167,15 +1176,6 @@ class QPDF::Members: Doc
1167 1176 // Linearization data
1168 1177 qpdf_offset_t first_xref_item_offset{0}; // actual value from file
1169 1178 bool uncompressed_after_compressed{false};
1170   - bool linearization_warnings{false}; // set by linearizationWarning, used by checkLinearization
1171   -
1172   - // Object ordering data for linearized files: initialized by calculateLinearizationData().
1173   - // Part numbers refer to the PDF 1.4 specification.
1174   - std::vector<QPDFObjectHandle> part4;
1175   - std::vector<QPDFObjectHandle> part6;
1176   - std::vector<QPDFObjectHandle> part7;
1177   - std::vector<QPDFObjectHandle> part8;
1178   - std::vector<QPDFObjectHandle> part9;
1179 1179 };
1180 1180  
1181 1181 // The Resolver class is restricted to QPDFObject and BaseHandle so that only it can resolve
... ...