Commit 2fa93e79b40404948fcfcb79bff92b3caf0684fd

Authored by m-holger
1 parent 84e25919

In QPDFWriter replace map object_to_object_stream with ObjTable obj

include/qpdf/QPDFWriter.hh
... ... @@ -609,7 +609,7 @@ class QPDFWriter
609 609 void pushMD5Pipeline(PipelinePopper&);
610 610 void computeDeterministicIDData();
611 611  
612   - void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out);
  612 + void discardGeneration(std::map<int, int>& out);
613 613  
614 614 class Members;
615 615  
... ...
libqpdf/QPDFWriter.cc
... ... @@ -1093,19 +1093,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
1093 1093 }
1094 1094  
1095 1095 QPDFObjGen og = object.getObjGen();
1096   - auto& renumber = m->obj[og].renumber;
  1096 + auto& obj = m->obj[og];
1097 1097  
1098   - if (renumber == 0) {
1099   - if (m->object_to_object_stream.count(og)) {
  1098 + if (obj.renumber == 0) {
  1099 + if (obj.object_stream > 0) {
1100 1100 // This is in an object stream. Don't process it here. Instead, enqueue the object
1101 1101 // stream. Object streams always have generation 0.
1102   - int stream_id = m->object_to_object_stream[og];
1103   - // Detect loops by storing invalid object ID 0, which will get overwritten later.
1104   - renumber = -1;
1105   - enqueueObject(m->pdf.getObjectByID(stream_id, 0));
  1102 + // Detect loops by storing invalid object ID -1, which will get overwritten later.
  1103 + obj.renumber = -1;
  1104 + enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1106 1105 } else {
1107 1106 m->object_queue.push_back(object);
1108   - renumber = m->next_objid++;
  1107 + obj.renumber = m->next_objid++;
1109 1108  
1110 1109 if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) {
1111 1110 // For linearized files, uncompressed objects go at end, and we take care of
... ... @@ -1118,7 +1117,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
1118 1117 ++m->next_objid;
1119 1118 }
1120 1119 }
1121   - } else if (renumber == -1) {
  1120 + } else if (obj.renumber == -1) {
1122 1121 // This can happen if a specially constructed file indicates that an object stream is
1123 1122 // inside itself.
1124 1123 QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream");
... ... @@ -1939,6 +1938,7 @@ QPDFWriter::preserveObjectStreams()
1939 1938 std::map<int, int> omap;
1940 1939 QPDF::Writer::getObjectStreamData(m->pdf, omap);
1941 1940 if (omap.empty()) {
  1941 + m->obj.streams_empty = true;
1942 1942 return;
1943 1943 }
1944 1944 // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
... ... @@ -1957,7 +1957,7 @@ QPDFWriter::preserveObjectStreams()
1957 1957 for (auto iter: omap) {
1958 1958 QPDFObjGen og(iter.first, 0);
1959 1959 if (eligible.count(og) || m->preserve_unreferenced_objects) {
1960   - m->object_to_object_stream[og] = iter.second;
  1960 + m->obj[iter.first].object_stream = iter.second;
1961 1961 } else {
1962 1962 QTC::TC("qpdf", "QPDFWriter exclude from object stream");
1963 1963 }
... ... @@ -1982,6 +1982,7 @@ QPDFWriter::generateObjectStreams()
1982 1982 // during writing.
1983 1983 initializeTables(2U * n_object_streams);
1984 1984 if (n_object_streams == 0) {
  1985 + m->obj.streams_empty = true;
1985 1986 return;
1986 1987 }
1987 1988 size_t n_per = eligible.size() / n_object_streams;
... ... @@ -2002,7 +2003,9 @@ QPDFWriter::generateObjectStreams()
2002 2003 // knows that this means we're creating the object stream from scratch.
2003 2004 cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID();
2004 2005 }
2005   - m->object_to_object_stream[iter] = cur_ostream;
  2006 + auto& obj = m->obj[iter];
  2007 + obj.object_stream = cur_ostream;
  2008 + obj.gen = iter.getGen();
2006 2009 ++n;
2007 2010 }
2008 2011 }
... ... @@ -2136,6 +2139,7 @@ QPDFWriter::doWriteSetup()
2136 2139 // Initialize object table for all existing objects plus some headroom for objects created
2137 2140 // during writing.
2138 2141 initializeTables();
  2142 + m->obj.streams_empty = true;
2139 2143 break;
2140 2144  
2141 2145 case qpdf_o_preserve:
... ... @@ -2150,39 +2154,43 @@ QPDFWriter::doWriteSetup()
2150 2154 // no default so gcc will warn for missing case tag
2151 2155 }
2152 2156  
2153   - if (m->linearized) {
2154   - // Page dictionaries are not allowed to be compressed objects.
2155   - for (auto& page: m->pdf.getAllPages()) {
2156   - QPDFObjGen og = page.getObjGen();
2157   - if (m->object_to_object_stream.count(og)) {
2158   - QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2159   - m->object_to_object_stream.erase(og);
  2157 + if (!m->obj.streams_empty) {
  2158 + if (m->linearized) {
  2159 + // Page dictionaries are not allowed to be compressed objects.
  2160 + for (auto& page: m->pdf.getAllPages()) {
  2161 + if (m->obj[page].object_stream > 0) {
  2162 + QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
  2163 + m->obj[page].object_stream = 0;
  2164 + }
2160 2165 }
2161 2166 }
2162   - }
2163 2167  
2164   - if (m->linearized || m->encrypted) {
2165   - // The document catalog is not allowed to be compressed in linearized files either. It also
2166   - // appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to handle
2167   - // encrypted files with compressed document catalogs, so we disable them in that case as
2168   - // well.
2169   - if (m->object_to_object_stream.count(m->root_og)) {
2170   - QTC::TC("qpdf", "QPDFWriter uncompressing root");
2171   - m->object_to_object_stream.erase(m->root_og);
  2168 + if (m->linearized || m->encrypted) {
  2169 + // The document catalog is not allowed to be compressed in linearized files either. It
  2170 + // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
  2171 + // handle encrypted files with compressed document catalogs, so we disable them in that
  2172 + // case as well.
  2173 + if (m->obj[m->root_og].object_stream > 0) {
  2174 + QTC::TC("qpdf", "QPDFWriter uncompressing root");
  2175 + m->obj[m->root_og].object_stream = 0;
  2176 + }
2172 2177 }
2173   - }
2174 2178  
2175   - // Generate reverse mapping from object stream to objects
2176   - for (auto const& iter: m->object_to_object_stream) {
2177   - QPDFObjGen const& obj = iter.first;
2178   - int stream = iter.second;
2179   - m->object_stream_to_objects[stream].insert(obj);
2180   - m->max_ostream_index = std::max(
2181   - m->max_ostream_index, QIntC::to_int(m->object_stream_to_objects[stream].size()) - 1);
2182   - }
  2179 + // Generate reverse mapping from object stream to objects
  2180 + m->obj.forEach([this](auto id, auto const& item) -> void {
  2181 + if (item.object_stream > 0) {
  2182 + m->object_stream_to_objects[item.object_stream].insert(QPDFObjGen(id, item.gen));
  2183 + m->max_ostream_index = std::max(
  2184 + m->max_ostream_index,
  2185 + QIntC::to_int(m->object_stream_to_objects[item.object_stream].size()) - 1);
  2186 + }
  2187 + });
2183 2188  
2184   - if (!m->object_stream_to_objects.empty()) {
2185   - setMinimumPDFVersion("1.5");
  2189 + if (m->object_stream_to_objects.empty()) {
  2190 + m->obj.streams_empty = true;
  2191 + } else {
  2192 + setMinimumPDFVersion("1.5");
  2193 + }
2186 2194 }
2187 2195  
2188 2196 setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
... ... @@ -2519,7 +2527,7 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2519 2527 }
2520 2528  
2521 2529 void
2522   -QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out)
  2530 +QPDFWriter::discardGeneration(std::map<int, int>& out)
2523 2531 {
2524 2532 // There are deep assumptions in the linearization code in QPDF that there is only one object
2525 2533 // with each object number; i.e., you can't have two objects with the same object number and
... ... @@ -2531,18 +2539,11 @@ QPDFWriter::discardGeneration(std::map&lt;QPDFObjGen, int&gt; const&amp; in, std::map&lt;int,
2531 2539 // generation numbers.
2532 2540  
2533 2541 out.clear();
2534   - for (auto const& iter: in) {
2535   - if (out.count(iter.first.getObj())) {
2536   - throw std::runtime_error("QPDF cannot currently linearize files that contain"
2537   - " multiple objects with the same object ID and different"
2538   - " generations. If you see this error message, please file"
2539   - " a bug report and attach the file if possible. As a"
2540   - " workaround, first convert the file with qpdf without"
2541   - " linearizing, and then linearize the result of that"
2542   - " conversion.");
  2542 + m->obj.forEach([&out](auto id, auto const& item) -> void {
  2543 + if (item.object_stream > 0) {
  2544 + out[id] = item.object_stream;
2543 2545 }
2544   - out[iter.first.getObj()] = iter.second;
2545   - }
  2546 + });
2546 2547 }
2547 2548  
2548 2549 void
... ... @@ -2550,7 +2551,7 @@ QPDFWriter::writeLinearized()
2550 2551 {
2551 2552 // Optimize file and enqueue objects in order
2552 2553  
2553   - discardGeneration(m->object_to_object_stream, m->object_to_object_stream_no_gen);
  2554 + discardGeneration(m->object_to_object_stream_no_gen);
2554 2555  
2555 2556 auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
2556 2557 bool compress_stream;
... ... @@ -2594,7 +2595,7 @@ QPDFWriter::writeLinearized()
2594 2595 int after_second_half = 1 + second_half_uncompressed;
2595 2596 m->next_objid = after_second_half;
2596 2597 int second_half_xref = 0;
2597   - bool need_xref_stream = (!m->object_to_object_stream.empty());
  2598 + bool need_xref_stream = !m->obj.streams_empty;
2598 2599 if (need_xref_stream) {
2599 2600 second_half_xref = m->next_objid++;
2600 2601 }
... ...
libqpdf/qpdf/QPDFWriter_private.hh
... ... @@ -11,11 +11,24 @@
11 11 struct QPDFWriter::Object
12 12 {
13 13 int renumber{0};
  14 + int gen{0};
  15 + int object_stream{0};
14 16 };
15 17  
16 18 class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object>
17 19 {
18 20 friend class QPDFWriter;
  21 +
  22 + public:
  23 + bool
  24 + getStreamsEmpty() const noexcept
  25 + {
  26 + return streams_empty;
  27 + }
  28 +
  29 + private:
  30 + // For performance, set by QPDFWriter rather than tracked by ObjTable.
  31 + bool streams_empty{false};
19 32 };
20 33  
21 34 class QPDFWriter::Members
... ... @@ -88,7 +101,6 @@ class QPDFWriter::Members
88 101 std::set<QPDFObjGen> normalized_streams;
89 102 std::map<QPDFObjGen, int> page_object_to_seq;
90 103 std::map<QPDFObjGen, int> contents_to_page_seq;
91   - std::map<QPDFObjGen, int> object_to_object_stream;
92 104 std::map<int, std::set<QPDFObjGen>> object_stream_to_objects;
93 105 std::list<Pipeline*> pipeline_stack;
94 106 unsigned long long next_stack_id{0};
... ...