Commit 7775aec33e55db7a9440b6bcf402c062df1ee967

Authored by m-holger
1 parent a1b646fc

Refactor QPDFWriter::preserveObjectStreams

libqpdf/QPDF.cc
... ... @@ -1365,6 +1365,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1365 1365  
1366 1366 case 2:
1367 1367 entry = {0, Compressed(toI(f1), f2)};
  1368 + object_streams_ = true;
1368 1369 break;
1369 1370  
1370 1371 default:
... ...
libqpdf/QPDFWriter.cc
... ... @@ -1936,47 +1936,26 @@ void
1936 1936 QPDFWriter::preserveObjectStreams()
1937 1937 {
1938 1938 auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1939   - // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1940   - // streams out of old objects that have generation numbers greater than zero. However in an
1941   - // existing PDF, all object stream objects and all objects in them must have generation 0
1942   - // because the PDF spec does not provide any way to do otherwise. This code filters out objects
1943   - // that are not allowed to be in object streams. In addition to removing objects that were
1944   - // erroneously included in object streams in the source PDF, it also prevents unreferenced
1945   - // objects from being included.
1946   - auto end = xref.cend();
1947   - m->obj.streams_empty = true;
  1939 + m->obj.streams_empty = !xref.object_streams();
  1940 + if (m->obj.streams_empty) {
  1941 + return;
  1942 + }
  1943 + // This code filters out objects that are not allowed to be in object streams. In addition to
  1944 + // removing objects that were erroneously included in object streams in the source PDF, it also
  1945 + // prevents unreferenced objects from being included.
1948 1946 if (m->preserve_unreferenced_objects) {
1949   - for (auto iter = xref.cbegin(); iter != end; ++iter) {
1950   - if (iter->second.getType() == 2) {
1951   - // Pdf contains object streams.
1952   - QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
1953   - m->obj.streams_empty = false;
1954   - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1955   - }
  1947 + QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
  1948 + for (auto [id, stream]: xref.compressed_objects()) {
  1949 + m->obj[id].object_stream = stream;
1956 1950 }
1957 1951 } else {
1958   - // Start by scanning for first compressed object in case we don't have any object streams to
1959   - // process.
1960   - for (auto iter = xref.cbegin(); iter != end; ++iter) {
1961   - if (iter->second.getType() == 2) {
1962   - // Pdf contains object streams.
1963   - QTC::TC("qpdf", "QPDFWriter preserve object streams");
1964   - m->obj.streams_empty = false;
1965   - auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
1966   - // The object pointed to by iter may be a previous generation, in which case it is
1967   - // removed by getCompressibleObjSet. We need to restart the loop (while the object
1968   - // table may contain multiple generations of an object).
1969   - for (iter = xref.cbegin(); iter != end; ++iter) {
1970   - if (iter->second.getType() == 2) {
1971   - auto id = static_cast<size_t>(iter->first.getObj());
1972   - if (id < eligible.size() && eligible[id]) {
1973   - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
1974   - } else {
1975   - QTC::TC("qpdf", "QPDFWriter exclude from object stream");
1976   - }
1977   - }
1978   - }
1979   - return;
  1952 + QTC::TC("qpdf", "QPDFWriter preserve object streams");
  1953 + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
  1954 + for (auto [id, stream]: xref.compressed_objects()) {
  1955 + if (eligible[id]) {
  1956 + m->obj[id].object_stream = stream;
  1957 + } else {
  1958 + QTC::TC("qpdf", "QPDFWriter exclude from object stream");
1980 1959 }
1981 1960 }
1982 1961 }
... ...
libqpdf/qpdf/ObjTable.hh
... ... @@ -46,6 +46,12 @@ class ObjTable: public std::vector&lt;T&gt;
46 46 }
47 47  
48 48 inline T const&
  49 + operator[](unsigned int idx) const
  50 + {
  51 + return element(idx);
  52 + }
  53 +
  54 + inline T const&
49 55 operator[](QPDFObjGen og) const
50 56 {
51 57 return element(static_cast<size_t>(og.getObj()));
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -112,6 +112,33 @@ class QPDF::Xref_table
112 112 return result;
113 113 }
114 114  
  115 + bool
  116 + object_streams() const noexcept
  117 + {
  118 + return object_streams_;
  119 + }
  120 +
  121 + // Return a vector of object id and stream number for each compressed object.
  122 + std::vector<std::pair<unsigned int, int>>
  123 + compressed_objects() const
  124 + {
  125 + if (!initialized()) {
  126 + throw std::logic_error("Xref_table::compressed_objects called before parsing.");
  127 + }
  128 +
  129 + std::vector<std::pair<unsigned int, int>> result;
  130 + result.reserve(table.size());
  131 +
  132 + unsigned int i{0};
  133 + for (auto const& item: table) {
  134 + if (item.type() == 2) {
  135 + result.emplace_back(i, item.stream_number());
  136 + }
  137 + ++i;
  138 + }
  139 + return result;
  140 + }
  141 +
115 142 // Temporary access to underlying table size
116 143 size_t
117 144 size() const noexcept
... ... @@ -282,6 +309,7 @@ class QPDF::Xref_table
282 309 bool initialized_{false};
283 310 bool ignore_streams_{false};
284 311 bool reconstructed_{false};
  312 + bool object_streams_{false};
285 313 // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
286 314 // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
287 315 // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
... ... @@ -293,72 +321,6 @@ class QPDF::Xref_table
293 321 qpdf_offset_t first_item_offset_{0}; // actual value from file
294 322 };
295 323  
296   -// Writer class is restricted to QPDFWriter so that only it can call certain methods.
297   -class QPDF::Writer
298   -{
299   - friend class QPDFWriter;
300   -
301   - private:
302   - static void
303   - optimize(
304   - QPDF& qpdf,
305   - QPDFWriter::ObjTable const& obj,
306   - std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
307   - {
308   - return qpdf.optimize(obj, skip_stream_parameters);
309   - }
310   -
311   - static void
312   - getLinearizedParts(
313   - QPDF& qpdf,
314   - QPDFWriter::ObjTable const& obj,
315   - std::vector<QPDFObjectHandle>& part4,
316   - std::vector<QPDFObjectHandle>& part6,
317   - std::vector<QPDFObjectHandle>& part7,
318   - std::vector<QPDFObjectHandle>& part8,
319   - std::vector<QPDFObjectHandle>& part9)
320   - {
321   - qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
322   - }
323   -
324   - static void
325   - generateHintStream(
326   - QPDF& qpdf,
327   - QPDFWriter::NewObjTable const& new_obj,
328   - QPDFWriter::ObjTable const& obj,
329   - std::shared_ptr<Buffer>& hint_stream,
330   - int& S,
331   - int& O,
332   - bool compressed)
333   - {
334   - return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
335   - }
336   -
337   - static std::vector<QPDFObjGen>
338   - getCompressibleObjGens(QPDF& qpdf)
339   - {
340   - return qpdf.getCompressibleObjVector();
341   - }
342   -
343   - static std::vector<bool>
344   - getCompressibleObjSet(QPDF& qpdf)
345   - {
346   - return qpdf.getCompressibleObjSet();
347   - }
348   -
349   - static std::map<QPDFObjGen, QPDFXRefEntry>
350   - getXRefTable(QPDF& qpdf)
351   - {
352   - return qpdf.getXRefTableInternal();
353   - }
354   -
355   - static size_t
356   - tableSize(QPDF& qpdf)
357   - {
358   - return qpdf.tableSize();
359   - }
360   -};
361   -
362 324 // The Resolver class is restricted to QPDFObject so that only it can resolve indirect
363 325 // references.
364 326 class QPDF::Resolver
... ... @@ -841,4 +803,70 @@ class QPDF::ResolveRecorder
841 803 std::set<QPDFObjGen>::const_iterator iter;
842 804 };
843 805  
  806 +// Writer class is restricted to QPDFWriter so that only it can call certain methods.
  807 +class QPDF::Writer
  808 +{
  809 + friend class QPDFWriter;
  810 +
  811 + private:
  812 + static void
  813 + optimize(
  814 + QPDF& qpdf,
  815 + QPDFWriter::ObjTable const& obj,
  816 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  817 + {
  818 + return qpdf.optimize(obj, skip_stream_parameters);
  819 + }
  820 +
  821 + static void
  822 + getLinearizedParts(
  823 + QPDF& qpdf,
  824 + QPDFWriter::ObjTable const& obj,
  825 + std::vector<QPDFObjectHandle>& part4,
  826 + std::vector<QPDFObjectHandle>& part6,
  827 + std::vector<QPDFObjectHandle>& part7,
  828 + std::vector<QPDFObjectHandle>& part8,
  829 + std::vector<QPDFObjectHandle>& part9)
  830 + {
  831 + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
  832 + }
  833 +
  834 + static void
  835 + generateHintStream(
  836 + QPDF& qpdf,
  837 + QPDFWriter::NewObjTable const& new_obj,
  838 + QPDFWriter::ObjTable const& obj,
  839 + std::shared_ptr<Buffer>& hint_stream,
  840 + int& S,
  841 + int& O,
  842 + bool compressed)
  843 + {
  844 + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
  845 + }
  846 +
  847 + static std::vector<QPDFObjGen>
  848 + getCompressibleObjGens(QPDF& qpdf)
  849 + {
  850 + return qpdf.getCompressibleObjVector();
  851 + }
  852 +
  853 + static std::vector<bool>
  854 + getCompressibleObjSet(QPDF& qpdf)
  855 + {
  856 + return qpdf.getCompressibleObjSet();
  857 + }
  858 +
  859 + static Xref_table const&
  860 + getXRefTable(QPDF& qpdf)
  861 + {
  862 + return qpdf.m->xref_table;
  863 + }
  864 +
  865 + static size_t
  866 + tableSize(QPDF& qpdf)
  867 + {
  868 + return qpdf.tableSize();
  869 + }
  870 +};
  871 +
844 872 #endif // QPDF_PRIVATE_HH
... ...