Commit 2fa93e79b40404948fcfcb79bff92b3caf0684fd
1 parent
84e25919
In QPDFWriter replace map object_to_object_stream with ObjTable obj
Showing
3 changed files
with
67 additions
and
54 deletions
include/qpdf/QPDFWriter.hh
| ... | ... | @@ -609,7 +609,7 @@ class QPDFWriter |
| 609 | 609 | void pushMD5Pipeline(PipelinePopper&); |
| 610 | 610 | void computeDeterministicIDData(); |
| 611 | 611 | |
| 612 | - void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out); | |
| 612 | + void discardGeneration(std::map<int, int>& out); | |
| 613 | 613 | |
| 614 | 614 | class Members; |
| 615 | 615 | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -1093,19 +1093,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) |
| 1093 | 1093 | } |
| 1094 | 1094 | |
| 1095 | 1095 | QPDFObjGen og = object.getObjGen(); |
| 1096 | - auto& renumber = m->obj[og].renumber; | |
| 1096 | + auto& obj = m->obj[og]; | |
| 1097 | 1097 | |
| 1098 | - if (renumber == 0) { | |
| 1099 | - if (m->object_to_object_stream.count(og)) { | |
| 1098 | + if (obj.renumber == 0) { | |
| 1099 | + if (obj.object_stream > 0) { | |
| 1100 | 1100 | // This is in an object stream. Don't process it here. Instead, enqueue the object |
| 1101 | 1101 | // stream. Object streams always have generation 0. |
| 1102 | - int stream_id = m->object_to_object_stream[og]; | |
| 1103 | - // Detect loops by storing invalid object ID 0, which will get overwritten later. | |
| 1104 | - renumber = -1; | |
| 1105 | - enqueueObject(m->pdf.getObjectByID(stream_id, 0)); | |
| 1102 | + // Detect loops by storing invalid object ID -1, which will get overwritten later. | |
| 1103 | + obj.renumber = -1; | |
| 1104 | + enqueueObject(m->pdf.getObject(obj.object_stream, 0)); | |
| 1106 | 1105 | } else { |
| 1107 | 1106 | m->object_queue.push_back(object); |
| 1108 | - renumber = m->next_objid++; | |
| 1107 | + obj.renumber = m->next_objid++; | |
| 1109 | 1108 | |
| 1110 | 1109 | if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { |
| 1111 | 1110 | // For linearized files, uncompressed objects go at end, and we take care of |
| ... | ... | @@ -1118,7 +1117,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) |
| 1118 | 1117 | ++m->next_objid; |
| 1119 | 1118 | } |
| 1120 | 1119 | } |
| 1121 | - } else if (renumber == -1) { | |
| 1120 | + } else if (obj.renumber == -1) { | |
| 1122 | 1121 | // This can happen if a specially constructed file indicates that an object stream is |
| 1123 | 1122 | // inside itself. |
| 1124 | 1123 | QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); |
| ... | ... | @@ -1939,6 +1938,7 @@ QPDFWriter::preserveObjectStreams() |
| 1939 | 1938 | std::map<int, int> omap; |
| 1940 | 1939 | QPDF::Writer::getObjectStreamData(m->pdf, omap); |
| 1941 | 1940 | if (omap.empty()) { |
| 1941 | + m->obj.streams_empty = true; | |
| 1942 | 1942 | return; |
| 1943 | 1943 | } |
| 1944 | 1944 | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object |
| ... | ... | @@ -1957,7 +1957,7 @@ QPDFWriter::preserveObjectStreams() |
| 1957 | 1957 | for (auto iter: omap) { |
| 1958 | 1958 | QPDFObjGen og(iter.first, 0); |
| 1959 | 1959 | if (eligible.count(og) || m->preserve_unreferenced_objects) { |
| 1960 | - m->object_to_object_stream[og] = iter.second; | |
| 1960 | + m->obj[iter.first].object_stream = iter.second; | |
| 1961 | 1961 | } else { |
| 1962 | 1962 | QTC::TC("qpdf", "QPDFWriter exclude from object stream"); |
| 1963 | 1963 | } |
| ... | ... | @@ -1982,6 +1982,7 @@ QPDFWriter::generateObjectStreams() |
| 1982 | 1982 | // during writing. |
| 1983 | 1983 | initializeTables(2U * n_object_streams); |
| 1984 | 1984 | if (n_object_streams == 0) { |
| 1985 | + m->obj.streams_empty = true; | |
| 1985 | 1986 | return; |
| 1986 | 1987 | } |
| 1987 | 1988 | size_t n_per = eligible.size() / n_object_streams; |
| ... | ... | @@ -2002,7 +2003,9 @@ QPDFWriter::generateObjectStreams() |
| 2002 | 2003 | // knows that this means we're creating the object stream from scratch. |
| 2003 | 2004 | cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); |
| 2004 | 2005 | } |
| 2005 | - m->object_to_object_stream[iter] = cur_ostream; | |
| 2006 | + auto& obj = m->obj[iter]; | |
| 2007 | + obj.object_stream = cur_ostream; | |
| 2008 | + obj.gen = iter.getGen(); | |
| 2006 | 2009 | ++n; |
| 2007 | 2010 | } |
| 2008 | 2011 | } |
| ... | ... | @@ -2136,6 +2139,7 @@ QPDFWriter::doWriteSetup() |
| 2136 | 2139 | // Initialize object table for all existing objects plus some headroom for objects created |
| 2137 | 2140 | // during writing. |
| 2138 | 2141 | initializeTables(); |
| 2142 | + m->obj.streams_empty = true; | |
| 2139 | 2143 | break; |
| 2140 | 2144 | |
| 2141 | 2145 | case qpdf_o_preserve: |
| ... | ... | @@ -2150,39 +2154,43 @@ QPDFWriter::doWriteSetup() |
| 2150 | 2154 | // no default so gcc will warn for missing case tag |
| 2151 | 2155 | } |
| 2152 | 2156 | |
| 2153 | - if (m->linearized) { | |
| 2154 | - // Page dictionaries are not allowed to be compressed objects. | |
| 2155 | - for (auto& page: m->pdf.getAllPages()) { | |
| 2156 | - QPDFObjGen og = page.getObjGen(); | |
| 2157 | - if (m->object_to_object_stream.count(og)) { | |
| 2158 | - QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); | |
| 2159 | - m->object_to_object_stream.erase(og); | |
| 2157 | + if (!m->obj.streams_empty) { | |
| 2158 | + if (m->linearized) { | |
| 2159 | + // Page dictionaries are not allowed to be compressed objects. | |
| 2160 | + for (auto& page: m->pdf.getAllPages()) { | |
| 2161 | + if (m->obj[page].object_stream > 0) { | |
| 2162 | + QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); | |
| 2163 | + m->obj[page].object_stream = 0; | |
| 2164 | + } | |
| 2160 | 2165 | } |
| 2161 | 2166 | } |
| 2162 | - } | |
| 2163 | 2167 | |
| 2164 | - if (m->linearized || m->encrypted) { | |
| 2165 | - // The document catalog is not allowed to be compressed in linearized files either. It also | |
| 2166 | - // appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to handle | |
| 2167 | - // encrypted files with compressed document catalogs, so we disable them in that case as | |
| 2168 | - // well. | |
| 2169 | - if (m->object_to_object_stream.count(m->root_og)) { | |
| 2170 | - QTC::TC("qpdf", "QPDFWriter uncompressing root"); | |
| 2171 | - m->object_to_object_stream.erase(m->root_og); | |
| 2168 | + if (m->linearized || m->encrypted) { | |
| 2169 | + // The document catalog is not allowed to be compressed in linearized files either. It | |
| 2170 | + // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to | |
| 2171 | + // handle encrypted files with compressed document catalogs, so we disable them in that | |
| 2172 | + // case as well. | |
| 2173 | + if (m->obj[m->root_og].object_stream > 0) { | |
| 2174 | + QTC::TC("qpdf", "QPDFWriter uncompressing root"); | |
| 2175 | + m->obj[m->root_og].object_stream = 0; | |
| 2176 | + } | |
| 2172 | 2177 | } |
| 2173 | - } | |
| 2174 | 2178 | |
| 2175 | - // Generate reverse mapping from object stream to objects | |
| 2176 | - for (auto const& iter: m->object_to_object_stream) { | |
| 2177 | - QPDFObjGen const& obj = iter.first; | |
| 2178 | - int stream = iter.second; | |
| 2179 | - m->object_stream_to_objects[stream].insert(obj); | |
| 2180 | - m->max_ostream_index = std::max( | |
| 2181 | - m->max_ostream_index, QIntC::to_int(m->object_stream_to_objects[stream].size()) - 1); | |
| 2182 | - } | |
| 2179 | + // Generate reverse mapping from object stream to objects | |
| 2180 | + m->obj.forEach([this](auto id, auto const& item) -> void { | |
| 2181 | + if (item.object_stream > 0) { | |
| 2182 | + m->object_stream_to_objects[item.object_stream].insert(QPDFObjGen(id, item.gen)); | |
| 2183 | + m->max_ostream_index = std::max( | |
| 2184 | + m->max_ostream_index, | |
| 2185 | + QIntC::to_int(m->object_stream_to_objects[item.object_stream].size()) - 1); | |
| 2186 | + } | |
| 2187 | + }); | |
| 2183 | 2188 | |
| 2184 | - if (!m->object_stream_to_objects.empty()) { | |
| 2185 | - setMinimumPDFVersion("1.5"); | |
| 2189 | + if (m->object_stream_to_objects.empty()) { | |
| 2190 | + m->obj.streams_empty = true; | |
| 2191 | + } else { | |
| 2192 | + setMinimumPDFVersion("1.5"); | |
| 2193 | + } | |
| 2186 | 2194 | } |
| 2187 | 2195 | |
| 2188 | 2196 | setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); |
| ... | ... | @@ -2519,7 +2527,7 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) |
| 2519 | 2527 | } |
| 2520 | 2528 | |
| 2521 | 2529 | void |
| 2522 | -QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out) | |
| 2530 | +QPDFWriter::discardGeneration(std::map<int, int>& out) | |
| 2523 | 2531 | { |
| 2524 | 2532 | // There are deep assumptions in the linearization code in QPDF that there is only one object |
| 2525 | 2533 | // with each object number; i.e., you can't have two objects with the same object number and |
| ... | ... | @@ -2531,18 +2539,11 @@ QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, |
| 2531 | 2539 | // generation numbers. |
| 2532 | 2540 | |
| 2533 | 2541 | out.clear(); |
| 2534 | - for (auto const& iter: in) { | |
| 2535 | - if (out.count(iter.first.getObj())) { | |
| 2536 | - throw std::runtime_error("QPDF cannot currently linearize files that contain" | |
| 2537 | - " multiple objects with the same object ID and different" | |
| 2538 | - " generations. If you see this error message, please file" | |
| 2539 | - " a bug report and attach the file if possible. As a" | |
| 2540 | - " workaround, first convert the file with qpdf without" | |
| 2541 | - " linearizing, and then linearize the result of that" | |
| 2542 | - " conversion."); | |
| 2542 | + m->obj.forEach([&out](auto id, auto const& item) -> void { | |
| 2543 | + if (item.object_stream > 0) { | |
| 2544 | + out[id] = item.object_stream; | |
| 2543 | 2545 | } |
| 2544 | - out[iter.first.getObj()] = iter.second; | |
| 2545 | - } | |
| 2546 | + }); | |
| 2546 | 2547 | } |
| 2547 | 2548 | |
| 2548 | 2549 | void |
| ... | ... | @@ -2550,7 +2551,7 @@ QPDFWriter::writeLinearized() |
| 2550 | 2551 | { |
| 2551 | 2552 | // Optimize file and enqueue objects in order |
| 2552 | 2553 | |
| 2553 | - discardGeneration(m->object_to_object_stream, m->object_to_object_stream_no_gen); | |
| 2554 | + discardGeneration(m->object_to_object_stream_no_gen); | |
| 2554 | 2555 | |
| 2555 | 2556 | auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { |
| 2556 | 2557 | bool compress_stream; |
| ... | ... | @@ -2594,7 +2595,7 @@ QPDFWriter::writeLinearized() |
| 2594 | 2595 | int after_second_half = 1 + second_half_uncompressed; |
| 2595 | 2596 | m->next_objid = after_second_half; |
| 2596 | 2597 | int second_half_xref = 0; |
| 2597 | - bool need_xref_stream = (!m->object_to_object_stream.empty()); | |
| 2598 | + bool need_xref_stream = !m->obj.streams_empty; | |
| 2598 | 2599 | if (need_xref_stream) { |
| 2599 | 2600 | second_half_xref = m->next_objid++; |
| 2600 | 2601 | } | ... | ... |
libqpdf/qpdf/QPDFWriter_private.hh
| ... | ... | @@ -11,11 +11,24 @@ |
| 11 | 11 | struct QPDFWriter::Object |
| 12 | 12 | { |
| 13 | 13 | int renumber{0}; |
| 14 | + int gen{0}; | |
| 15 | + int object_stream{0}; | |
| 14 | 16 | }; |
| 15 | 17 | |
| 16 | 18 | class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object> |
| 17 | 19 | { |
| 18 | 20 | friend class QPDFWriter; |
| 21 | + | |
| 22 | + public: | |
| 23 | + bool | |
| 24 | + getStreamsEmpty() const noexcept | |
| 25 | + { | |
| 26 | + return streams_empty; | |
| 27 | + } | |
| 28 | + | |
| 29 | + private: | |
| 30 | + // For performance, set by QPDFWriter rather than tracked by ObjTable. | |
| 31 | + bool streams_empty{false}; | |
| 19 | 32 | }; |
| 20 | 33 | |
| 21 | 34 | class QPDFWriter::Members |
| ... | ... | @@ -88,7 +101,6 @@ class QPDFWriter::Members |
| 88 | 101 | std::set<QPDFObjGen> normalized_streams; |
| 89 | 102 | std::map<QPDFObjGen, int> page_object_to_seq; |
| 90 | 103 | std::map<QPDFObjGen, int> contents_to_page_seq; |
| 91 | - std::map<QPDFObjGen, int> object_to_object_stream; | |
| 92 | 104 | std::map<int, std::set<QPDFObjGen>> object_stream_to_objects; |
| 93 | 105 | std::list<Pipeline*> pipeline_stack; |
| 94 | 106 | unsigned long long next_stack_id{0}; | ... | ... |