Commit 7775aec33e55db7a9440b6bcf402c062df1ee967

Authored by m-holger
1 parent a1b646fc

Refactor QPDFWriter::preserveObjectStreams

libqpdf/QPDF.cc
@@ -1365,6 +1365,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1365,6 +1365,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1365 1365
1366 case 2: 1366 case 2:
1367 entry = {0, Compressed(toI(f1), f2)}; 1367 entry = {0, Compressed(toI(f1), f2)};
  1368 + object_streams_ = true;
1368 break; 1369 break;
1369 1370
1370 default: 1371 default:
libqpdf/QPDFWriter.cc
@@ -1936,47 +1936,26 @@ void @@ -1936,47 +1936,26 @@ void
1936 QPDFWriter::preserveObjectStreams() 1936 QPDFWriter::preserveObjectStreams()
1937 { 1937 {
1938 auto const& xref = QPDF::Writer::getXRefTable(m->pdf); 1938 auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
1939 - // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object  
1940 - // streams out of old objects that have generation numbers greater than zero. However in an  
1941 - // existing PDF, all object stream objects and all objects in them must have generation 0  
1942 - // because the PDF spec does not provide any way to do otherwise. This code filters out objects  
1943 - // that are not allowed to be in object streams. In addition to removing objects that were  
1944 - // erroneously included in object streams in the source PDF, it also prevents unreferenced  
1945 - // objects from being included.  
1946 - auto end = xref.cend();  
1947 - m->obj.streams_empty = true; 1939 + m->obj.streams_empty = !xref.object_streams();
  1940 + if (m->obj.streams_empty) {
  1941 + return;
  1942 + }
  1943 + // This code filters out objects that are not allowed to be in object streams. In addition to
  1944 + // removing objects that were erroneously included in object streams in the source PDF, it also
  1945 + // prevents unreferenced objects from being included.
1948 if (m->preserve_unreferenced_objects) { 1946 if (m->preserve_unreferenced_objects) {
1949 - for (auto iter = xref.cbegin(); iter != end; ++iter) {  
1950 - if (iter->second.getType() == 2) {  
1951 - // Pdf contains object streams.  
1952 - QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");  
1953 - m->obj.streams_empty = false;  
1954 - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();  
1955 - } 1947 + QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
  1948 + for (auto [id, stream]: xref.compressed_objects()) {
  1949 + m->obj[id].object_stream = stream;
1956 } 1950 }
1957 } else { 1951 } else {
1958 - // Start by scanning for first compressed object in case we don't have any object streams to  
1959 - // process.  
1960 - for (auto iter = xref.cbegin(); iter != end; ++iter) {  
1961 - if (iter->second.getType() == 2) {  
1962 - // Pdf contains object streams.  
1963 - QTC::TC("qpdf", "QPDFWriter preserve object streams");  
1964 - m->obj.streams_empty = false;  
1965 - auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);  
1966 - // The object pointed to by iter may be a previous generation, in which case it is  
1967 - // removed by getCompressibleObjSet. We need to restart the loop (while the object  
1968 - // table may contain multiple generations of an object).  
1969 - for (iter = xref.cbegin(); iter != end; ++iter) {  
1970 - if (iter->second.getType() == 2) {  
1971 - auto id = static_cast<size_t>(iter->first.getObj());  
1972 - if (id < eligible.size() && eligible[id]) {  
1973 - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();  
1974 - } else {  
1975 - QTC::TC("qpdf", "QPDFWriter exclude from object stream");  
1976 - }  
1977 - }  
1978 - }  
1979 - return; 1952 + QTC::TC("qpdf", "QPDFWriter preserve object streams");
  1953 + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
  1954 + for (auto [id, stream]: xref.compressed_objects()) {
  1955 + if (eligible[id]) {
  1956 + m->obj[id].object_stream = stream;
  1957 + } else {
  1958 + QTC::TC("qpdf", "QPDFWriter exclude from object stream");
1980 } 1959 }
1981 } 1960 }
1982 } 1961 }
libqpdf/qpdf/ObjTable.hh
@@ -46,6 +46,12 @@ class ObjTable: public std::vector&lt;T&gt; @@ -46,6 +46,12 @@ class ObjTable: public std::vector&lt;T&gt;
46 } 46 }
47 47
48 inline T const& 48 inline T const&
  49 + operator[](unsigned int idx) const
  50 + {
  51 + return element(idx);
  52 + }
  53 +
  54 + inline T const&
49 operator[](QPDFObjGen og) const 55 operator[](QPDFObjGen og) const
50 { 56 {
51 return element(static_cast<size_t>(og.getObj())); 57 return element(static_cast<size_t>(og.getObj()));
libqpdf/qpdf/QPDF_private.hh
@@ -112,6 +112,33 @@ class QPDF::Xref_table @@ -112,6 +112,33 @@ class QPDF::Xref_table
112 return result; 112 return result;
113 } 113 }
114 114
  115 + bool
  116 + object_streams() const noexcept
  117 + {
  118 + return object_streams_;
  119 + }
  120 +
  121 + // Return a vector of object id and stream number for each compressed object.
  122 + std::vector<std::pair<unsigned int, int>>
  123 + compressed_objects() const
  124 + {
  125 + if (!initialized()) {
  126 + throw std::logic_error("Xref_table::compressed_objects called before parsing.");
  127 + }
  128 +
  129 + std::vector<std::pair<unsigned int, int>> result;
  130 + result.reserve(table.size());
  131 +
  132 + unsigned int i{0};
  133 + for (auto const& item: table) {
  134 + if (item.type() == 2) {
  135 + result.emplace_back(i, item.stream_number());
  136 + }
  137 + ++i;
  138 + }
  139 + return result;
  140 + }
  141 +
115 // Temporary access to underlying table size 142 // Temporary access to underlying table size
116 size_t 143 size_t
117 size() const noexcept 144 size() const noexcept
@@ -282,6 +309,7 @@ class QPDF::Xref_table @@ -282,6 +309,7 @@ class QPDF::Xref_table
282 bool initialized_{false}; 309 bool initialized_{false};
283 bool ignore_streams_{false}; 310 bool ignore_streams_{false};
284 bool reconstructed_{false}; 311 bool reconstructed_{false};
  312 + bool object_streams_{false};
285 // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids 313 // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
286 // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the 314 // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
287 // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref 315 // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
@@ -293,72 +321,6 @@ class QPDF::Xref_table @@ -293,72 +321,6 @@ class QPDF::Xref_table
293 qpdf_offset_t first_item_offset_{0}; // actual value from file 321 qpdf_offset_t first_item_offset_{0}; // actual value from file
294 }; 322 };
295 323
296 -// Writer class is restricted to QPDFWriter so that only it can call certain methods.  
297 -class QPDF::Writer  
298 -{  
299 - friend class QPDFWriter;  
300 -  
301 - private:  
302 - static void  
303 - optimize(  
304 - QPDF& qpdf,  
305 - QPDFWriter::ObjTable const& obj,  
306 - std::function<int(QPDFObjectHandle&)> skip_stream_parameters)  
307 - {  
308 - return qpdf.optimize(obj, skip_stream_parameters);  
309 - }  
310 -  
311 - static void  
312 - getLinearizedParts(  
313 - QPDF& qpdf,  
314 - QPDFWriter::ObjTable const& obj,  
315 - std::vector<QPDFObjectHandle>& part4,  
316 - std::vector<QPDFObjectHandle>& part6,  
317 - std::vector<QPDFObjectHandle>& part7,  
318 - std::vector<QPDFObjectHandle>& part8,  
319 - std::vector<QPDFObjectHandle>& part9)  
320 - {  
321 - qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);  
322 - }  
323 -  
324 - static void  
325 - generateHintStream(  
326 - QPDF& qpdf,  
327 - QPDFWriter::NewObjTable const& new_obj,  
328 - QPDFWriter::ObjTable const& obj,  
329 - std::shared_ptr<Buffer>& hint_stream,  
330 - int& S,  
331 - int& O,  
332 - bool compressed)  
333 - {  
334 - return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);  
335 - }  
336 -  
337 - static std::vector<QPDFObjGen>  
338 - getCompressibleObjGens(QPDF& qpdf)  
339 - {  
340 - return qpdf.getCompressibleObjVector();  
341 - }  
342 -  
343 - static std::vector<bool>  
344 - getCompressibleObjSet(QPDF& qpdf)  
345 - {  
346 - return qpdf.getCompressibleObjSet();  
347 - }  
348 -  
349 - static std::map<QPDFObjGen, QPDFXRefEntry>  
350 - getXRefTable(QPDF& qpdf)  
351 - {  
352 - return qpdf.getXRefTableInternal();  
353 - }  
354 -  
355 - static size_t  
356 - tableSize(QPDF& qpdf)  
357 - {  
358 - return qpdf.tableSize();  
359 - }  
360 -};  
361 -  
362 // The Resolver class is restricted to QPDFObject so that only it can resolve indirect 324 // The Resolver class is restricted to QPDFObject so that only it can resolve indirect
363 // references. 325 // references.
364 class QPDF::Resolver 326 class QPDF::Resolver
@@ -841,4 +803,70 @@ class QPDF::ResolveRecorder @@ -841,4 +803,70 @@ class QPDF::ResolveRecorder
841 std::set<QPDFObjGen>::const_iterator iter; 803 std::set<QPDFObjGen>::const_iterator iter;
842 }; 804 };
843 805
  806 +// Writer class is restricted to QPDFWriter so that only it can call certain methods.
  807 +class QPDF::Writer
  808 +{
  809 + friend class QPDFWriter;
  810 +
  811 + private:
  812 + static void
  813 + optimize(
  814 + QPDF& qpdf,
  815 + QPDFWriter::ObjTable const& obj,
  816 + std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
  817 + {
  818 + return qpdf.optimize(obj, skip_stream_parameters);
  819 + }
  820 +
  821 + static void
  822 + getLinearizedParts(
  823 + QPDF& qpdf,
  824 + QPDFWriter::ObjTable const& obj,
  825 + std::vector<QPDFObjectHandle>& part4,
  826 + std::vector<QPDFObjectHandle>& part6,
  827 + std::vector<QPDFObjectHandle>& part7,
  828 + std::vector<QPDFObjectHandle>& part8,
  829 + std::vector<QPDFObjectHandle>& part9)
  830 + {
  831 + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
  832 + }
  833 +
  834 + static void
  835 + generateHintStream(
  836 + QPDF& qpdf,
  837 + QPDFWriter::NewObjTable const& new_obj,
  838 + QPDFWriter::ObjTable const& obj,
  839 + std::shared_ptr<Buffer>& hint_stream,
  840 + int& S,
  841 + int& O,
  842 + bool compressed)
  843 + {
  844 + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
  845 + }
  846 +
  847 + static std::vector<QPDFObjGen>
  848 + getCompressibleObjGens(QPDF& qpdf)
  849 + {
  850 + return qpdf.getCompressibleObjVector();
  851 + }
  852 +
  853 + static std::vector<bool>
  854 + getCompressibleObjSet(QPDF& qpdf)
  855 + {
  856 + return qpdf.getCompressibleObjSet();
  857 + }
  858 +
  859 + static Xref_table const&
  860 + getXRefTable(QPDF& qpdf)
  861 + {
  862 + return qpdf.m->xref_table;
  863 + }
  864 +
  865 + static size_t
  866 + tableSize(QPDF& qpdf)
  867 + {
  868 + return qpdf.tableSize();
  869 + }
  870 +};
  871 +
844 #endif // QPDF_PRIVATE_HH 872 #endif // QPDF_PRIVATE_HH