Commit 0df0d00c588f3ca5f71524f96817a84c4b98bc2c

Authored by m-holger
1 parent ae00ee61

Add method QPDF::Writer::getCompressibleObjSet

Create set without creation of an intermediate vector.
include/qpdf/QPDF.hh
@@ -756,7 +756,13 @@ class QPDF @@ -756,7 +756,13 @@ class QPDF
756 static std::vector<QPDFObjGen> 756 static std::vector<QPDFObjGen>
757 getCompressibleObjGens(QPDF& qpdf) 757 getCompressibleObjGens(QPDF& qpdf)
758 { 758 {
759 - return qpdf.getCompressibleObjGens(); 759 + return qpdf.getCompressibleObjVector();
  760 + }
  761 +
  762 + static std::vector<bool>
  763 + getCompressibleObjSet(QPDF& qpdf)
  764 + {
  765 + return qpdf.getCompressibleObjSet();
760 } 766 }
761 767
762 static std::map<QPDFObjGen, QPDFXRefEntry> const& 768 static std::map<QPDFObjGen, QPDFXRefEntry> const&
@@ -1110,7 +1116,10 @@ class QPDF @@ -1110,7 +1116,10 @@ class QPDF
1110 bool compressed); 1116 bool compressed);
1111 1117
1112 // Get a list of objects that would be permitted in an object stream. 1118 // Get a list of objects that would be permitted in an object stream.
1113 - std::vector<QPDFObjGen> getCompressibleObjGens(); 1119 + template <typename T>
  1120 + std::vector<T> getCompressibleObjGens();
  1121 + std::vector<QPDFObjGen> getCompressibleObjVector();
  1122 + std::vector<bool> getCompressibleObjSet();
1114 1123
1115 // methods to support page handling 1124 // methods to support page handling
1116 1125
libqpdf/QPDF.cc
@@ -2397,6 +2397,19 @@ QPDF::tableSize() @@ -2397,6 +2397,19 @@ QPDF::tableSize()
2397 } 2397 }
2398 2398
2399 std::vector<QPDFObjGen> 2399 std::vector<QPDFObjGen>
  2400 +QPDF::getCompressibleObjVector()
  2401 +{
  2402 + return getCompressibleObjGens<QPDFObjGen>();
  2403 +}
  2404 +
  2405 +std::vector<bool>
  2406 +QPDF::getCompressibleObjSet()
  2407 +{
  2408 + return getCompressibleObjGens<bool>();
  2409 +}
  2410 +
  2411 +template <typename T>
  2412 +std::vector<T>
2400 QPDF::getCompressibleObjGens() 2413 QPDF::getCompressibleObjGens()
2401 { 2414 {
2402 // Return a list of objects that are allowed to be in object streams. Walk through the objects 2415 // Return a list of objects that are allowed to be in object streams. Walk through the objects
@@ -2414,7 +2427,14 @@ QPDF::getCompressibleObjGens() @@ -2414,7 +2427,14 @@ QPDF::getCompressibleObjGens()
2414 std::vector<QPDFObjectHandle> queue; 2427 std::vector<QPDFObjectHandle> queue;
2415 queue.reserve(512); 2428 queue.reserve(512);
2416 queue.push_back(m->trailer); 2429 queue.push_back(m->trailer);
2417 - std::vector<QPDFObjGen> result; 2430 + std::vector<T> result;
  2431 + if constexpr (std::is_same_v<T, QPDFObjGen>) {
  2432 + result.reserve(m->obj_cache.size());
  2433 + } else if constexpr (std::is_same_v<T, bool>) {
  2434 + result.resize(max_obj + 1U, false);
  2435 + } else {
  2436 + throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens");
  2437 + }
2418 while (!queue.empty()) { 2438 while (!queue.empty()) {
2419 auto obj = queue.back(); 2439 auto obj = queue.back();
2420 queue.pop_back(); 2440 queue.pop_back();
@@ -2446,7 +2466,11 @@ QPDF::getCompressibleObjGens() @@ -2446,7 +2466,11 @@ QPDF::getCompressibleObjGens()
2446 } else if (!(obj.isStream() || 2466 } else if (!(obj.isStream() ||
2447 (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") && 2467 (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") &&
2448 obj.hasKey("/Contents")))) { 2468 obj.hasKey("/Contents")))) {
2449 - result.push_back(og); 2469 + if constexpr (std::is_same_v<T, QPDFObjGen>) {
  2470 + result.push_back(og);
  2471 + } else if constexpr (std::is_same_v<T, bool>) {
  2472 + result[id + 1U] = true;
  2473 + }
2450 } 2474 }
2451 } 2475 }
2452 if (obj.isStream()) { 2476 if (obj.isStream()) {
libqpdf/QPDFWriter.cc
@@ -1964,13 +1964,11 @@ QPDFWriter::preserveObjectStreams() @@ -1964,13 +1964,11 @@ QPDFWriter::preserveObjectStreams()
1964 } 1964 }
1965 } 1965 }
1966 } else { 1966 } else {
1967 - std::set<QPDFObjGen> eligible;  
1968 - std::vector<QPDFObjGen> eligible_v = QPDF::Writer::getCompressibleObjGens(m->pdf);  
1969 - eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end()); 1967 + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
1970 for (; iter != end; ++iter) { 1968 for (; iter != end; ++iter) {
1971 if (iter->second.getType() == 2) { 1969 if (iter->second.getType() == 2) {
1972 - QPDFObjGen og(iter->first.getObj(), 0);  
1973 - if (eligible.count(og)) { 1970 + auto id = static_cast<size_t>(iter->first.getObj());
  1971 + if (id < eligible.size() && eligible[id]) {
1974 m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); 1972 m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1975 } else { 1973 } else {
1976 QTC::TC("qpdf", "QPDFWriter exclude from object stream"); 1974 QTC::TC("qpdf", "QPDFWriter exclude from object stream");
@@ -2009,22 +2007,18 @@ QPDFWriter::generateObjectStreams() @@ -2009,22 +2007,18 @@ QPDFWriter::generateObjectStreams()
2009 ++n_per; 2007 ++n_per;
2010 } 2008 }
2011 unsigned int n = 0; 2009 unsigned int n = 0;
2012 - int cur_ostream = 0;  
2013 - for (auto const& iter: eligible) {  
2014 - if ((n % n_per) == 0) {  
2015 - if (n > 0) {  
2016 - QTC::TC("qpdf", "QPDFWriter generate >1 ostream");  
2017 - } 2010 + int cur_ostream = m->pdf.newIndirectNull().getObjectID();
  2011 + for (auto const& item: eligible) {
  2012 + if (n == n_per) {
  2013 + QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2018 n = 0; 2014 n = 0;
2019 - }  
2020 - if (n == 0) {  
2021 // Construct a new null object as the "original" object stream. The rest of the code 2015 // Construct a new null object as the "original" object stream. The rest of the code
2022 // knows that this means we're creating the object stream from scratch. 2016 // knows that this means we're creating the object stream from scratch.
2023 - cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); 2017 + cur_ostream = m->pdf.newIndirectNull().getObjectID();
2024 } 2018 }
2025 - auto& obj = m->obj[iter]; 2019 + auto& obj = m->obj[item];
2026 obj.object_stream = cur_ostream; 2020 obj.object_stream = cur_ostream;
2027 - obj.gen = iter.getGen(); 2021 + obj.gen = item.getGen();
2028 ++n; 2022 ++n;
2029 } 2023 }
2030 } 2024 }