Commit a70158a3d3d84e887ecc710b57d1bccd77b7cada

Authored by m-holger
1 parent a15d0afa

Refactor `QPDFWriter`: move `initializeTables`, `doWriteSetup`, `initializeSpeci…

…alStreams`, `preserveObjectStreams`, and `generateObjectStreams` to `QPDFWriter::Members`. Update related logic and remove obsolete test coverage entries.
include/qpdf/QPDFWriter.hh
... ... @@ -491,9 +491,6 @@ class QPDFWriter
491 491 size_t stream_length = 0,
492 492 bool compress = false);
493 493 void unparseChild(QPDFObjectHandle const& child, size_t level, int flags);
494   - void initializeSpecialStreams();
495   - void preserveObjectStreams();
496   - void generateObjectStreams();
497 494 void interpretR3EncryptionParameters(
498 495 bool allow_accessibility,
499 496 bool allow_extract,
... ... @@ -517,8 +514,6 @@ class QPDFWriter
517 514 void writeLinearized();
518 515 void enqueuePart(std::vector<QPDFObjectHandle>& part);
519 516 void writeEncryptionDictionary();
520   - void initializeTables(size_t extra = 0);
521   - void doWriteSetup();
522 517 void writeHeader();
523 518 void writeHintStream(int hint_id);
524 519 qpdf_offset_t writeXRefTable(trailer_e which, int first, int last, int size);
... ...
libqpdf/QPDFWriter.cc
... ... @@ -286,12 +286,17 @@ class QPDFWriter::Members
286 286  
287 287 void setMinimumPDFVersion(std::string const& version, int extension_level);
288 288 void copyEncryptionParameters(QPDF&);
  289 + void doWriteSetup();
289 290  
290 291 void disableIncompatibleEncryption(int major, int minor, int extension_level);
291 292 void parseVersion(std::string const& version, int& major, int& minor) const;
292 293 int compareVersions(int major1, int minor1, int major2, int minor2) const;
293 294 void generateID(bool encrypted);
294 295 std::string getOriginalID1();
  296 + void initializeTables(size_t extra = 0);
  297 + void preserveObjectStreams();
  298 + void generateObjectStreams();
  299 + void initializeSpecialStreams();
295 300  
296 301 private:
297 302 QPDFWriter& w;
... ... @@ -1964,13 +1969,13 @@ QPDFWriter::Members::generateID(bool encrypted)
1964 1969 }
1965 1970  
1966 1971 void
1967   -QPDFWriter::initializeSpecialStreams()
  1972 +QPDFWriter::Members::initializeSpecialStreams()
1968 1973 {
1969 1974 // Mark all page content streams in case we are filtering or normalizing.
1970   - std::vector<QPDFObjectHandle> pages = m->pdf.getAllPages();
  1975 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
1971 1976 int num = 0;
1972 1977 for (auto& page: pages) {
1973   - m->page_object_to_seq[page.getObjGen()] = ++num;
  1978 + page_object_to_seq[page.getObjGen()] = ++num;
1974 1979 QPDFObjectHandle contents = page.getKey("/Contents");
1975 1980 std::vector<QPDFObjGen> contents_objects;
1976 1981 if (contents.isArray()) {
... ... @@ -1983,16 +1988,16 @@ QPDFWriter::initializeSpecialStreams()
1983 1988 }
1984 1989  
1985 1990 for (auto const& c: contents_objects) {
1986   - m->contents_to_page_seq[c] = num;
1987   - m->normalized_streams.insert(c);
  1991 + contents_to_page_seq[c] = num;
  1992 + normalized_streams.insert(c);
1988 1993 }
1989 1994 }
1990 1995 }
1991 1996  
1992 1997 void
1993   -QPDFWriter::preserveObjectStreams()
  1998 +QPDFWriter::Members::preserveObjectStreams()
1994 1999 {
1995   - auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
  2000 + auto const& xref = QPDF::Writer::getXRefTable(pdf);
1996 2001 // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
1997 2002 // streams out of old objects that have generation numbers greater than zero. However in an
1998 2003 // existing PDF, all object stream objects and all objects in them must have generation 0
... ... @@ -2001,14 +2006,13 @@ QPDFWriter::preserveObjectStreams()
2001 2006 // erroneously included in object streams in the source PDF, it also prevents unreferenced
2002 2007 // objects from being included.
2003 2008 auto end = xref.cend();
2004   - m->obj.streams_empty = true;
2005   - if (m->preserve_unreferenced_objects) {
  2009 + obj.streams_empty = true;
  2010 + if (preserve_unreferenced_objects) {
2006 2011 for (auto iter = xref.cbegin(); iter != end; ++iter) {
2007 2012 if (iter->second.getType() == 2) {
2008 2013 // Pdf contains object streams.
2009   - QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
2010   - m->obj.streams_empty = false;
2011   - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
  2014 + obj.streams_empty = false;
  2015 + obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2012 2016 }
2013 2017 }
2014 2018 } else {
... ... @@ -2017,9 +2021,8 @@ QPDFWriter::preserveObjectStreams()
2017 2021 for (auto iter = xref.cbegin(); iter != end; ++iter) {
2018 2022 if (iter->second.getType() == 2) {
2019 2023 // Pdf contains object streams.
2020   - QTC::TC("qpdf", "QPDFWriter preserve object streams");
2021   - m->obj.streams_empty = false;
2022   - auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
  2024 + obj.streams_empty = false;
  2025 + auto eligible = QPDF::Writer::getCompressibleObjSet(pdf);
2023 2026 // The object pointed to by iter may be a previous generation, in which case it is
2024 2027 // removed by getCompressibleObjSet. We need to restart the loop (while the object
2025 2028 // table may contain multiple generations of an object).
... ... @@ -2027,7 +2030,7 @@ QPDFWriter::preserveObjectStreams()
2027 2030 if (iter->second.getType() == 2) {
2028 2031 auto id = static_cast<size_t>(iter->first.getObj());
2029 2032 if (id < eligible.size() && eligible[id]) {
2030   - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
  2033 + obj[iter->first].object_stream = iter->second.getObjStreamNumber();
2031 2034 } else {
2032 2035 QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2033 2036 }
... ... @@ -2040,7 +2043,7 @@ QPDFWriter::preserveObjectStreams()
2040 2043 }
2041 2044  
2042 2045 void
2043   -QPDFWriter::generateObjectStreams()
  2046 +QPDFWriter::Members::generateObjectStreams()
2044 2047 {
2045 2048 // Basic strategy: make a list of objects that can go into an object stream. Then figure out
2046 2049 // how many object streams are needed so that we can distribute objects approximately evenly
... ... @@ -2050,12 +2053,12 @@ QPDFWriter::generateObjectStreams()
2050 2053  
2051 2054 // This code doesn't do anything with /Extends.
2052 2055  
2053   - std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
  2056 + std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(pdf);
2054 2057 size_t n_object_streams = (eligible.size() + 99U) / 100U;
2055 2058  
2056 2059 initializeTables(2U * n_object_streams);
2057 2060 if (n_object_streams == 0) {
2058   - m->obj.streams_empty = true;
  2061 + obj.streams_empty = true;
2059 2062 return;
2060 2063 }
2061 2064 size_t n_per = eligible.size() / n_object_streams;
... ... @@ -2063,18 +2066,17 @@ QPDFWriter::generateObjectStreams()
2063 2066 ++n_per;
2064 2067 }
2065 2068 unsigned int n = 0;
2066   - int cur_ostream = m->pdf.newIndirectNull().getObjectID();
  2069 + int cur_ostream = pdf.newIndirectNull().getObjectID();
2067 2070 for (auto const& item: eligible) {
2068 2071 if (n == n_per) {
2069   - QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2070 2072 n = 0;
2071 2073 // Construct a new null object as the "original" object stream. The rest of the code
2072 2074 // knows that this means we're creating the object stream from scratch.
2073   - cur_ostream = m->pdf.newIndirectNull().getObjectID();
  2075 + cur_ostream = pdf.newIndirectNull().getObjectID();
2074 2076 }
2075   - auto& obj = m->obj[item];
2076   - obj.object_stream = cur_ostream;
2077   - obj.gen = item.getGen();
  2077 + auto& o = obj[item];
  2078 + o.object_stream = cur_ostream;
  2079 + o.gen = item.getGen();
2078 2080 ++n;
2079 2081 }
2080 2082 }
... ... @@ -2130,83 +2132,83 @@ QPDFWriter::prepareFileForWrite()
2130 2132 }
2131 2133  
2132 2134 void
2133   -QPDFWriter::initializeTables(size_t extra)
  2135 +QPDFWriter::Members::initializeTables(size_t extra)
2134 2136 {
2135   - auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
2136   - m->obj.resize(size);
2137   - m->new_obj.resize(size);
  2137 + auto size = QIntC::to_size(QPDF::Writer::tableSize(pdf) + 100) + extra;
  2138 + obj.resize(size);
  2139 + new_obj.resize(size);
2138 2140 }
2139 2141  
2140 2142 void
2141   -QPDFWriter::doWriteSetup()
  2143 +QPDFWriter::Members::doWriteSetup()
2142 2144 {
2143   - if (m->did_write_setup) {
  2145 + if (did_write_setup) {
2144 2146 return;
2145 2147 }
2146   - m->did_write_setup = true;
  2148 + did_write_setup = true;
2147 2149  
2148 2150 // Do preliminary setup
2149 2151  
2150   - if (m->linearized) {
2151   - m->qdf_mode = false;
  2152 + if (linearized) {
  2153 + qdf_mode = false;
2152 2154 }
2153 2155  
2154   - if (m->pclm) {
2155   - m->stream_decode_level = qpdf_dl_none;
2156   - m->compress_streams = false;
2157   - m->encryption = nullptr;
  2156 + if (pclm) {
  2157 + stream_decode_level = qpdf_dl_none;
  2158 + compress_streams = false;
  2159 + encryption = nullptr;
2158 2160 }
2159 2161  
2160   - if (m->qdf_mode) {
2161   - if (!m->normalize_content_set) {
2162   - m->normalize_content = true;
  2162 + if (qdf_mode) {
  2163 + if (!normalize_content_set) {
  2164 + normalize_content = true;
2163 2165 }
2164   - if (!m->compress_streams_set) {
2165   - m->compress_streams = false;
  2166 + if (!compress_streams_set) {
  2167 + compress_streams = false;
2166 2168 }
2167   - if (!m->stream_decode_level_set) {
2168   - m->stream_decode_level = qpdf_dl_generalized;
  2169 + if (!stream_decode_level_set) {
  2170 + stream_decode_level = qpdf_dl_generalized;
2169 2171 }
2170 2172 }
2171 2173  
2172   - if (m->encryption) {
  2174 + if (encryption) {
2173 2175 // Encryption has been explicitly set
2174   - m->preserve_encryption = false;
2175   - } else if (m->normalize_content || !m->compress_streams || m->pclm || m->qdf_mode) {
  2176 + preserve_encryption = false;
  2177 + } else if (normalize_content || !compress_streams || pclm || qdf_mode) {
2176 2178 // Encryption makes looking at contents pretty useless. If the user explicitly encrypted
2177 2179 // though, we still obey that.
2178   - m->preserve_encryption = false;
  2180 + preserve_encryption = false;
2179 2181 }
2180 2182  
2181   - if (m->preserve_encryption) {
2182   - copyEncryptionParameters(m->pdf);
  2183 + if (preserve_encryption) {
  2184 + copyEncryptionParameters(pdf);
2183 2185 }
2184 2186  
2185   - if (!m->forced_pdf_version.empty()) {
  2187 + if (!forced_pdf_version.empty()) {
2186 2188 int major = 0;
2187 2189 int minor = 0;
2188   - m->parseVersion(m->forced_pdf_version, major, minor);
2189   - m->disableIncompatibleEncryption(major, minor, m->forced_extension_level);
2190   - if (m->compareVersions(major, minor, 1, 5) < 0) {
2191   - m->object_stream_mode = qpdf_o_disable;
  2190 + parseVersion(forced_pdf_version, major, minor);
  2191 + disableIncompatibleEncryption(major, minor, forced_extension_level);
  2192 + if (compareVersions(major, minor, 1, 5) < 0) {
  2193 + object_stream_mode = qpdf_o_disable;
2192 2194 }
2193 2195 }
2194 2196  
2195   - if (m->qdf_mode || m->normalize_content) {
  2197 + if (qdf_mode || normalize_content) {
2196 2198 initializeSpecialStreams();
2197 2199 }
2198 2200  
2199   - if (m->qdf_mode) {
  2201 + if (qdf_mode) {
2200 2202 // Generate indirect stream lengths for qdf mode since fix-qdf uses them for storing
2201 2203 // recomputed stream length data. Certain streams such as object streams, xref streams, and
2202 2204 // hint streams always get direct stream lengths.
2203   - m->direct_stream_lengths = false;
  2205 + direct_stream_lengths = false;
2204 2206 }
2205 2207  
2206   - switch (m->object_stream_mode) {
  2208 + switch (object_stream_mode) {
2207 2209 case qpdf_o_disable:
2208 2210 initializeTables();
2209   - m->obj.streams_empty = true;
  2211 + obj.streams_empty = true;
2210 2212 break;
2211 2213  
2212 2214 case qpdf_o_preserve:
... ... @@ -2221,61 +2223,58 @@ QPDFWriter::doWriteSetup()
2221 2223 // no default so gcc will warn for missing case tag
2222 2224 }
2223 2225  
2224   - if (!m->obj.streams_empty) {
2225   - if (m->linearized) {
  2226 + if (!obj.streams_empty) {
  2227 + if (linearized) {
2226 2228 // Page dictionaries are not allowed to be compressed objects.
2227   - for (auto& page: m->pdf.getAllPages()) {
2228   - if (m->obj[page].object_stream > 0) {
2229   - QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2230   - m->obj[page].object_stream = 0;
  2229 + for (auto& page: pdf.getAllPages()) {
  2230 + if (obj[page].object_stream > 0) {
  2231 + obj[page].object_stream = 0;
2231 2232 }
2232 2233 }
2233 2234 }
2234 2235  
2235   - if (m->linearized || m->encryption) {
  2236 + if (linearized || encryption) {
2236 2237 // The document catalog is not allowed to be compressed in linearized files either. It
2237 2238 // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
2238 2239 // handle encrypted files with compressed document catalogs, so we disable them in that
2239 2240 // case as well.
2240   - if (m->obj[m->root_og].object_stream > 0) {
2241   - QTC::TC("qpdf", "QPDFWriter uncompressing root");
2242   - m->obj[m->root_og].object_stream = 0;
  2241 + if (obj[root_og].object_stream > 0) {
  2242 + obj[root_og].object_stream = 0;
2243 2243 }
2244 2244 }
2245 2245  
2246 2246 // Generate reverse mapping from object stream to objects
2247   - m->obj.forEach([this](auto id, auto const& item) -> void {
  2247 + obj.forEach([this](auto id, auto const& item) -> void {
2248 2248 if (item.object_stream > 0) {
2249   - auto& vec = m->object_stream_to_objects[item.object_stream];
  2249 + auto& vec = object_stream_to_objects[item.object_stream];
2250 2250 vec.emplace_back(id, item.gen);
2251   - if (m->max_ostream_index < vec.size()) {
2252   - ++m->max_ostream_index;
  2251 + if (max_ostream_index < vec.size()) {
  2252 + ++max_ostream_index;
2253 2253 }
2254 2254 }
2255 2255 });
2256   - --m->max_ostream_index;
  2256 + --max_ostream_index;
2257 2257  
2258   - if (m->object_stream_to_objects.empty()) {
2259   - m->obj.streams_empty = true;
  2258 + if (object_stream_to_objects.empty()) {
  2259 + obj.streams_empty = true;
2260 2260 } else {
2261   - setMinimumPDFVersion("1.5");
  2261 + w.setMinimumPDFVersion("1.5");
2262 2262 }
2263 2263 }
2264 2264  
2265   - setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
2266   - m->final_pdf_version = m->min_pdf_version;
2267   - m->final_extension_level = m->min_extension_level;
2268   - if (!m->forced_pdf_version.empty()) {
2269   - QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2270   - m->final_pdf_version = m->forced_pdf_version;
2271   - m->final_extension_level = m->forced_extension_level;
  2265 + setMinimumPDFVersion(pdf.getPDFVersion(), pdf.getExtensionLevel());
  2266 + final_pdf_version = min_pdf_version;
  2267 + final_extension_level = min_extension_level;
  2268 + if (!forced_pdf_version.empty()) {
  2269 + final_pdf_version = forced_pdf_version;
  2270 + final_extension_level = forced_extension_level;
2272 2271 }
2273 2272 }
2274 2273  
2275 2274 void
2276 2275 QPDFWriter::write()
2277 2276 {
2278   - doWriteSetup();
  2277 + m->doWriteSetup();
2279 2278  
2280 2279 // Set up progress reporting. For linearized files, we write two passes. events_expected is an
2281 2280 // approximation, but it's good enough for progress reporting, which is mostly a guess anyway.
... ... @@ -2372,7 +2371,7 @@ QPDFWriter::writeEncryptionDictionary()
2372 2371 std::string
2373 2372 QPDFWriter::getFinalVersion()
2374 2373 {
2375   - doWriteSetup();
  2374 + m->doWriteSetup();
2376 2375 return m->final_pdf_version;
2377 2376 }
2378 2377  
... ...
qpdf/qpdf.testcov
... ... @@ -81,11 +81,8 @@ QPDF xref deleted object 0
81 81 SF_FlateLzwDecode PNG filter 0
82 82 QPDF xref /Index is array 1
83 83 QPDFWriter encrypt object stream 0
84   -QPDFWriter uncompressing page dictionary 0
85   -QPDFWriter uncompressing root 0
86 84 QPDFWriter compressing uncompressed stream 0
87 85 QPDF exclude indirect length 0
88   -QPDFWriter generate >1 ostream 0
89 86 QPDF exclude encryption dictionary 0
90 87 QPDF loop detected traversing objects 0
91 88 QPDF reconstructed xref table 0
... ... @@ -133,7 +130,6 @@ qpdf-c called qpdf_allow_modify_annotation 0
133 130 qpdf-c called qpdf_allow_modify_other 0
134 131 qpdf-c called qpdf_allow_modify_all 0
135 132 QPDFWriter increasing minimum version 1
136   -QPDFWriter using forced PDF version 0
137 133 qpdf-c called qpdf_set_minimum_pdf_version 0
138 134 qpdf-c called qpdf_force_pdf_version 0
139 135 qpdf-c called qpdf_init_write multiple times 0
... ... @@ -476,8 +472,6 @@ QPDFAcroFormDocumentHelper AP parse error 1
476 472 QPDFJob copy fields not this file 0
477 473 QPDFJob copy fields non-first from orig 0
478 474 QPDF resolve duplicated page in insert 0
479   -QPDFWriter preserve object streams 0
480   -QPDFWriter preserve object streams preserve unreferenced 0
481 475 QPDFWriter exclude from object stream 0
482 476 QPDF_pages findPage not found 0
483 477 QPDFJob weak crypto error 0
... ...