Commit 2fa93e79b40404948fcfcb79bff92b3caf0684fd
1 parent
84e25919
In QPDFWriter replace map object_to_object_stream with ObjTable obj
Showing
3 changed files
with
67 additions
and
54 deletions
include/qpdf/QPDFWriter.hh
| @@ -609,7 +609,7 @@ class QPDFWriter | @@ -609,7 +609,7 @@ class QPDFWriter | ||
| 609 | void pushMD5Pipeline(PipelinePopper&); | 609 | void pushMD5Pipeline(PipelinePopper&); |
| 610 | void computeDeterministicIDData(); | 610 | void computeDeterministicIDData(); |
| 611 | 611 | ||
| 612 | - void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out); | 612 | + void discardGeneration(std::map<int, int>& out); |
| 613 | 613 | ||
| 614 | class Members; | 614 | class Members; |
| 615 | 615 |
libqpdf/QPDFWriter.cc
| @@ -1093,19 +1093,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) | @@ -1093,19 +1093,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) | ||
| 1093 | } | 1093 | } |
| 1094 | 1094 | ||
| 1095 | QPDFObjGen og = object.getObjGen(); | 1095 | QPDFObjGen og = object.getObjGen(); |
| 1096 | - auto& renumber = m->obj[og].renumber; | 1096 | + auto& obj = m->obj[og]; |
| 1097 | 1097 | ||
| 1098 | - if (renumber == 0) { | ||
| 1099 | - if (m->object_to_object_stream.count(og)) { | 1098 | + if (obj.renumber == 0) { |
| 1099 | + if (obj.object_stream > 0) { | ||
| 1100 | // This is in an object stream. Don't process it here. Instead, enqueue the object | 1100 | // This is in an object stream. Don't process it here. Instead, enqueue the object |
| 1101 | // stream. Object streams always have generation 0. | 1101 | // stream. Object streams always have generation 0. |
| 1102 | - int stream_id = m->object_to_object_stream[og]; | ||
| 1103 | - // Detect loops by storing invalid object ID 0, which will get overwritten later. | ||
| 1104 | - renumber = -1; | ||
| 1105 | - enqueueObject(m->pdf.getObjectByID(stream_id, 0)); | 1102 | + // Detect loops by storing invalid object ID -1, which will get overwritten later. |
| 1103 | + obj.renumber = -1; | ||
| 1104 | + enqueueObject(m->pdf.getObject(obj.object_stream, 0)); | ||
| 1106 | } else { | 1105 | } else { |
| 1107 | m->object_queue.push_back(object); | 1106 | m->object_queue.push_back(object); |
| 1108 | - renumber = m->next_objid++; | 1107 | + obj.renumber = m->next_objid++; |
| 1109 | 1108 | ||
| 1110 | if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { | 1109 | if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { |
| 1111 | // For linearized files, uncompressed objects go at end, and we take care of | 1110 | // For linearized files, uncompressed objects go at end, and we take care of |
| @@ -1118,7 +1117,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) | @@ -1118,7 +1117,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) | ||
| 1118 | ++m->next_objid; | 1117 | ++m->next_objid; |
| 1119 | } | 1118 | } |
| 1120 | } | 1119 | } |
| 1121 | - } else if (renumber == -1) { | 1120 | + } else if (obj.renumber == -1) { |
| 1122 | // This can happen if a specially constructed file indicates that an object stream is | 1121 | // This can happen if a specially constructed file indicates that an object stream is |
| 1123 | // inside itself. | 1122 | // inside itself. |
| 1124 | QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); | 1123 | QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); |
| @@ -1939,6 +1938,7 @@ QPDFWriter::preserveObjectStreams() | @@ -1939,6 +1938,7 @@ QPDFWriter::preserveObjectStreams() | ||
| 1939 | std::map<int, int> omap; | 1938 | std::map<int, int> omap; |
| 1940 | QPDF::Writer::getObjectStreamData(m->pdf, omap); | 1939 | QPDF::Writer::getObjectStreamData(m->pdf, omap); |
| 1941 | if (omap.empty()) { | 1940 | if (omap.empty()) { |
| 1941 | + m->obj.streams_empty = true; | ||
| 1942 | return; | 1942 | return; |
| 1943 | } | 1943 | } |
| 1944 | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object | 1944 | // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object |
| @@ -1957,7 +1957,7 @@ QPDFWriter::preserveObjectStreams() | @@ -1957,7 +1957,7 @@ QPDFWriter::preserveObjectStreams() | ||
| 1957 | for (auto iter: omap) { | 1957 | for (auto iter: omap) { |
| 1958 | QPDFObjGen og(iter.first, 0); | 1958 | QPDFObjGen og(iter.first, 0); |
| 1959 | if (eligible.count(og) || m->preserve_unreferenced_objects) { | 1959 | if (eligible.count(og) || m->preserve_unreferenced_objects) { |
| 1960 | - m->object_to_object_stream[og] = iter.second; | 1960 | + m->obj[iter.first].object_stream = iter.second; |
| 1961 | } else { | 1961 | } else { |
| 1962 | QTC::TC("qpdf", "QPDFWriter exclude from object stream"); | 1962 | QTC::TC("qpdf", "QPDFWriter exclude from object stream"); |
| 1963 | } | 1963 | } |
| @@ -1982,6 +1982,7 @@ QPDFWriter::generateObjectStreams() | @@ -1982,6 +1982,7 @@ QPDFWriter::generateObjectStreams() | ||
| 1982 | // during writing. | 1982 | // during writing. |
| 1983 | initializeTables(2U * n_object_streams); | 1983 | initializeTables(2U * n_object_streams); |
| 1984 | if (n_object_streams == 0) { | 1984 | if (n_object_streams == 0) { |
| 1985 | + m->obj.streams_empty = true; | ||
| 1985 | return; | 1986 | return; |
| 1986 | } | 1987 | } |
| 1987 | size_t n_per = eligible.size() / n_object_streams; | 1988 | size_t n_per = eligible.size() / n_object_streams; |
| @@ -2002,7 +2003,9 @@ QPDFWriter::generateObjectStreams() | @@ -2002,7 +2003,9 @@ QPDFWriter::generateObjectStreams() | ||
| 2002 | // knows that this means we're creating the object stream from scratch. | 2003 | // knows that this means we're creating the object stream from scratch. |
| 2003 | cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); | 2004 | cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); |
| 2004 | } | 2005 | } |
| 2005 | - m->object_to_object_stream[iter] = cur_ostream; | 2006 | + auto& obj = m->obj[iter]; |
| 2007 | + obj.object_stream = cur_ostream; | ||
| 2008 | + obj.gen = iter.getGen(); | ||
| 2006 | ++n; | 2009 | ++n; |
| 2007 | } | 2010 | } |
| 2008 | } | 2011 | } |
| @@ -2136,6 +2139,7 @@ QPDFWriter::doWriteSetup() | @@ -2136,6 +2139,7 @@ QPDFWriter::doWriteSetup() | ||
| 2136 | // Initialize object table for all existing objects plus some headroom for objects created | 2139 | // Initialize object table for all existing objects plus some headroom for objects created |
| 2137 | // during writing. | 2140 | // during writing. |
| 2138 | initializeTables(); | 2141 | initializeTables(); |
| 2142 | + m->obj.streams_empty = true; | ||
| 2139 | break; | 2143 | break; |
| 2140 | 2144 | ||
| 2141 | case qpdf_o_preserve: | 2145 | case qpdf_o_preserve: |
| @@ -2150,39 +2154,43 @@ QPDFWriter::doWriteSetup() | @@ -2150,39 +2154,43 @@ QPDFWriter::doWriteSetup() | ||
| 2150 | // no default so gcc will warn for missing case tag | 2154 | // no default so gcc will warn for missing case tag |
| 2151 | } | 2155 | } |
| 2152 | 2156 | ||
| 2153 | - if (m->linearized) { | ||
| 2154 | - // Page dictionaries are not allowed to be compressed objects. | ||
| 2155 | - for (auto& page: m->pdf.getAllPages()) { | ||
| 2156 | - QPDFObjGen og = page.getObjGen(); | ||
| 2157 | - if (m->object_to_object_stream.count(og)) { | ||
| 2158 | - QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); | ||
| 2159 | - m->object_to_object_stream.erase(og); | 2157 | + if (!m->obj.streams_empty) { |
| 2158 | + if (m->linearized) { | ||
| 2159 | + // Page dictionaries are not allowed to be compressed objects. | ||
| 2160 | + for (auto& page: m->pdf.getAllPages()) { | ||
| 2161 | + if (m->obj[page].object_stream > 0) { | ||
| 2162 | + QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); | ||
| 2163 | + m->obj[page].object_stream = 0; | ||
| 2164 | + } | ||
| 2160 | } | 2165 | } |
| 2161 | } | 2166 | } |
| 2162 | - } | ||
| 2163 | 2167 | ||
| 2164 | - if (m->linearized || m->encrypted) { | ||
| 2165 | - // The document catalog is not allowed to be compressed in linearized files either. It also | ||
| 2166 | - // appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to handle | ||
| 2167 | - // encrypted files with compressed document catalogs, so we disable them in that case as | ||
| 2168 | - // well. | ||
| 2169 | - if (m->object_to_object_stream.count(m->root_og)) { | ||
| 2170 | - QTC::TC("qpdf", "QPDFWriter uncompressing root"); | ||
| 2171 | - m->object_to_object_stream.erase(m->root_og); | 2168 | + if (m->linearized || m->encrypted) { |
| 2169 | + // The document catalog is not allowed to be compressed in linearized files either. It | ||
| 2170 | + // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to | ||
| 2171 | + // handle encrypted files with compressed document catalogs, so we disable them in that | ||
| 2172 | + // case as well. | ||
| 2173 | + if (m->obj[m->root_og].object_stream > 0) { | ||
| 2174 | + QTC::TC("qpdf", "QPDFWriter uncompressing root"); | ||
| 2175 | + m->obj[m->root_og].object_stream = 0; | ||
| 2176 | + } | ||
| 2172 | } | 2177 | } |
| 2173 | - } | ||
| 2174 | 2178 | ||
| 2175 | - // Generate reverse mapping from object stream to objects | ||
| 2176 | - for (auto const& iter: m->object_to_object_stream) { | ||
| 2177 | - QPDFObjGen const& obj = iter.first; | ||
| 2178 | - int stream = iter.second; | ||
| 2179 | - m->object_stream_to_objects[stream].insert(obj); | ||
| 2180 | - m->max_ostream_index = std::max( | ||
| 2181 | - m->max_ostream_index, QIntC::to_int(m->object_stream_to_objects[stream].size()) - 1); | ||
| 2182 | - } | 2179 | + // Generate reverse mapping from object stream to objects |
| 2180 | + m->obj.forEach([this](auto id, auto const& item) -> void { | ||
| 2181 | + if (item.object_stream > 0) { | ||
| 2182 | + m->object_stream_to_objects[item.object_stream].insert(QPDFObjGen(id, item.gen)); | ||
| 2183 | + m->max_ostream_index = std::max( | ||
| 2184 | + m->max_ostream_index, | ||
| 2185 | + QIntC::to_int(m->object_stream_to_objects[item.object_stream].size()) - 1); | ||
| 2186 | + } | ||
| 2187 | + }); | ||
| 2183 | 2188 | ||
| 2184 | - if (!m->object_stream_to_objects.empty()) { | ||
| 2185 | - setMinimumPDFVersion("1.5"); | 2189 | + if (m->object_stream_to_objects.empty()) { |
| 2190 | + m->obj.streams_empty = true; | ||
| 2191 | + } else { | ||
| 2192 | + setMinimumPDFVersion("1.5"); | ||
| 2193 | + } | ||
| 2186 | } | 2194 | } |
| 2187 | 2195 | ||
| 2188 | setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); | 2196 | setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); |
| @@ -2519,7 +2527,7 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) | @@ -2519,7 +2527,7 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) | ||
| 2519 | } | 2527 | } |
| 2520 | 2528 | ||
| 2521 | void | 2529 | void |
| 2522 | -QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out) | 2530 | +QPDFWriter::discardGeneration(std::map<int, int>& out) |
| 2523 | { | 2531 | { |
| 2524 | // There are deep assumptions in the linearization code in QPDF that there is only one object | 2532 | // There are deep assumptions in the linearization code in QPDF that there is only one object |
| 2525 | // with each object number; i.e., you can't have two objects with the same object number and | 2533 | // with each object number; i.e., you can't have two objects with the same object number and |
| @@ -2531,18 +2539,11 @@ QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, | @@ -2531,18 +2539,11 @@ QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, | ||
| 2531 | // generation numbers. | 2539 | // generation numbers. |
| 2532 | 2540 | ||
| 2533 | out.clear(); | 2541 | out.clear(); |
| 2534 | - for (auto const& iter: in) { | ||
| 2535 | - if (out.count(iter.first.getObj())) { | ||
| 2536 | - throw std::runtime_error("QPDF cannot currently linearize files that contain" | ||
| 2537 | - " multiple objects with the same object ID and different" | ||
| 2538 | - " generations. If you see this error message, please file" | ||
| 2539 | - " a bug report and attach the file if possible. As a" | ||
| 2540 | - " workaround, first convert the file with qpdf without" | ||
| 2541 | - " linearizing, and then linearize the result of that" | ||
| 2542 | - " conversion."); | 2542 | + m->obj.forEach([&out](auto id, auto const& item) -> void { |
| 2543 | + if (item.object_stream > 0) { | ||
| 2544 | + out[id] = item.object_stream; | ||
| 2543 | } | 2545 | } |
| 2544 | - out[iter.first.getObj()] = iter.second; | ||
| 2545 | - } | 2546 | + }); |
| 2546 | } | 2547 | } |
| 2547 | 2548 | ||
| 2548 | void | 2549 | void |
| @@ -2550,7 +2551,7 @@ QPDFWriter::writeLinearized() | @@ -2550,7 +2551,7 @@ QPDFWriter::writeLinearized() | ||
| 2550 | { | 2551 | { |
| 2551 | // Optimize file and enqueue objects in order | 2552 | // Optimize file and enqueue objects in order |
| 2552 | 2553 | ||
| 2553 | - discardGeneration(m->object_to_object_stream, m->object_to_object_stream_no_gen); | 2554 | + discardGeneration(m->object_to_object_stream_no_gen); |
| 2554 | 2555 | ||
| 2555 | auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { | 2556 | auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { |
| 2556 | bool compress_stream; | 2557 | bool compress_stream; |
| @@ -2594,7 +2595,7 @@ QPDFWriter::writeLinearized() | @@ -2594,7 +2595,7 @@ QPDFWriter::writeLinearized() | ||
| 2594 | int after_second_half = 1 + second_half_uncompressed; | 2595 | int after_second_half = 1 + second_half_uncompressed; |
| 2595 | m->next_objid = after_second_half; | 2596 | m->next_objid = after_second_half; |
| 2596 | int second_half_xref = 0; | 2597 | int second_half_xref = 0; |
| 2597 | - bool need_xref_stream = (!m->object_to_object_stream.empty()); | 2598 | + bool need_xref_stream = !m->obj.streams_empty; |
| 2598 | if (need_xref_stream) { | 2599 | if (need_xref_stream) { |
| 2599 | second_half_xref = m->next_objid++; | 2600 | second_half_xref = m->next_objid++; |
| 2600 | } | 2601 | } |
libqpdf/qpdf/QPDFWriter_private.hh
| @@ -11,11 +11,24 @@ | @@ -11,11 +11,24 @@ | ||
| 11 | struct QPDFWriter::Object | 11 | struct QPDFWriter::Object |
| 12 | { | 12 | { |
| 13 | int renumber{0}; | 13 | int renumber{0}; |
| 14 | + int gen{0}; | ||
| 15 | + int object_stream{0}; | ||
| 14 | }; | 16 | }; |
| 15 | 17 | ||
| 16 | class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object> | 18 | class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object> |
| 17 | { | 19 | { |
| 18 | friend class QPDFWriter; | 20 | friend class QPDFWriter; |
| 21 | + | ||
| 22 | + public: | ||
| 23 | + bool | ||
| 24 | + getStreamsEmpty() const noexcept | ||
| 25 | + { | ||
| 26 | + return streams_empty; | ||
| 27 | + } | ||
| 28 | + | ||
| 29 | + private: | ||
| 30 | + // For performance, set by QPDFWriter rather than tracked by ObjTable. | ||
| 31 | + bool streams_empty{false}; | ||
| 19 | }; | 32 | }; |
| 20 | 33 | ||
| 21 | class QPDFWriter::Members | 34 | class QPDFWriter::Members |
| @@ -88,7 +101,6 @@ class QPDFWriter::Members | @@ -88,7 +101,6 @@ class QPDFWriter::Members | ||
| 88 | std::set<QPDFObjGen> normalized_streams; | 101 | std::set<QPDFObjGen> normalized_streams; |
| 89 | std::map<QPDFObjGen, int> page_object_to_seq; | 102 | std::map<QPDFObjGen, int> page_object_to_seq; |
| 90 | std::map<QPDFObjGen, int> contents_to_page_seq; | 103 | std::map<QPDFObjGen, int> contents_to_page_seq; |
| 91 | - std::map<QPDFObjGen, int> object_to_object_stream; | ||
| 92 | std::map<int, std::set<QPDFObjGen>> object_stream_to_objects; | 104 | std::map<int, std::set<QPDFObjGen>> object_stream_to_objects; |
| 93 | std::list<Pipeline*> pipeline_stack; | 105 | std::list<Pipeline*> pipeline_stack; |
| 94 | unsigned long long next_stack_id{0}; | 106 | unsigned long long next_stack_id{0}; |