Commit 2fa93e79b40404948fcfcb79bff92b3caf0684fd

Authored by m-holger
1 parent 84e25919

In QPDFWriter replace map object_to_object_stream with ObjTable obj

include/qpdf/QPDFWriter.hh
@@ -609,7 +609,7 @@ class QPDFWriter @@ -609,7 +609,7 @@ class QPDFWriter
609 void pushMD5Pipeline(PipelinePopper&); 609 void pushMD5Pipeline(PipelinePopper&);
610 void computeDeterministicIDData(); 610 void computeDeterministicIDData();
611 611
612 - void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out); 612 + void discardGeneration(std::map<int, int>& out);
613 613
614 class Members; 614 class Members;
615 615
libqpdf/QPDFWriter.cc
@@ -1093,19 +1093,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) @@ -1093,19 +1093,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
1093 } 1093 }
1094 1094
1095 QPDFObjGen og = object.getObjGen(); 1095 QPDFObjGen og = object.getObjGen();
1096 - auto& renumber = m->obj[og].renumber; 1096 + auto& obj = m->obj[og];
1097 1097
1098 - if (renumber == 0) {  
1099 - if (m->object_to_object_stream.count(og)) { 1098 + if (obj.renumber == 0) {
  1099 + if (obj.object_stream > 0) {
1100 // This is in an object stream. Don't process it here. Instead, enqueue the object 1100 // This is in an object stream. Don't process it here. Instead, enqueue the object
1101 // stream. Object streams always have generation 0. 1101 // stream. Object streams always have generation 0.
1102 - int stream_id = m->object_to_object_stream[og];  
1103 - // Detect loops by storing invalid object ID 0, which will get overwritten later.  
1104 - renumber = -1;  
1105 - enqueueObject(m->pdf.getObjectByID(stream_id, 0)); 1102 + // Detect loops by storing invalid object ID -1, which will get overwritten later.
  1103 + obj.renumber = -1;
  1104 + enqueueObject(m->pdf.getObject(obj.object_stream, 0));
1106 } else { 1105 } else {
1107 m->object_queue.push_back(object); 1106 m->object_queue.push_back(object);
1108 - renumber = m->next_objid++; 1107 + obj.renumber = m->next_objid++;
1109 1108
1110 if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { 1109 if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) {
1111 // For linearized files, uncompressed objects go at end, and we take care of 1110 // For linearized files, uncompressed objects go at end, and we take care of
@@ -1118,7 +1117,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) @@ -1118,7 +1117,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
1118 ++m->next_objid; 1117 ++m->next_objid;
1119 } 1118 }
1120 } 1119 }
1121 - } else if (renumber == -1) { 1120 + } else if (obj.renumber == -1) {
1122 // This can happen if a specially constructed file indicates that an object stream is 1121 // This can happen if a specially constructed file indicates that an object stream is
1123 // inside itself. 1122 // inside itself.
1124 QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); 1123 QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream");
@@ -1939,6 +1938,7 @@ QPDFWriter::preserveObjectStreams() @@ -1939,6 +1938,7 @@ QPDFWriter::preserveObjectStreams()
1939 std::map<int, int> omap; 1938 std::map<int, int> omap;
1940 QPDF::Writer::getObjectStreamData(m->pdf, omap); 1939 QPDF::Writer::getObjectStreamData(m->pdf, omap);
1941 if (omap.empty()) { 1940 if (omap.empty()) {
  1941 + m->obj.streams_empty = true;
1942 return; 1942 return;
1943 } 1943 }
1944 // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object 1944 // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
@@ -1957,7 +1957,7 @@ QPDFWriter::preserveObjectStreams() @@ -1957,7 +1957,7 @@ QPDFWriter::preserveObjectStreams()
1957 for (auto iter: omap) { 1957 for (auto iter: omap) {
1958 QPDFObjGen og(iter.first, 0); 1958 QPDFObjGen og(iter.first, 0);
1959 if (eligible.count(og) || m->preserve_unreferenced_objects) { 1959 if (eligible.count(og) || m->preserve_unreferenced_objects) {
1960 - m->object_to_object_stream[og] = iter.second; 1960 + m->obj[iter.first].object_stream = iter.second;
1961 } else { 1961 } else {
1962 QTC::TC("qpdf", "QPDFWriter exclude from object stream"); 1962 QTC::TC("qpdf", "QPDFWriter exclude from object stream");
1963 } 1963 }
@@ -1982,6 +1982,7 @@ QPDFWriter::generateObjectStreams() @@ -1982,6 +1982,7 @@ QPDFWriter::generateObjectStreams()
1982 // during writing. 1982 // during writing.
1983 initializeTables(2U * n_object_streams); 1983 initializeTables(2U * n_object_streams);
1984 if (n_object_streams == 0) { 1984 if (n_object_streams == 0) {
  1985 + m->obj.streams_empty = true;
1985 return; 1986 return;
1986 } 1987 }
1987 size_t n_per = eligible.size() / n_object_streams; 1988 size_t n_per = eligible.size() / n_object_streams;
@@ -2002,7 +2003,9 @@ QPDFWriter::generateObjectStreams() @@ -2002,7 +2003,9 @@ QPDFWriter::generateObjectStreams()
2002 // knows that this means we're creating the object stream from scratch. 2003 // knows that this means we're creating the object stream from scratch.
2003 cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); 2004 cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID();
2004 } 2005 }
2005 - m->object_to_object_stream[iter] = cur_ostream; 2006 + auto& obj = m->obj[iter];
  2007 + obj.object_stream = cur_ostream;
  2008 + obj.gen = iter.getGen();
2006 ++n; 2009 ++n;
2007 } 2010 }
2008 } 2011 }
@@ -2136,6 +2139,7 @@ QPDFWriter::doWriteSetup() @@ -2136,6 +2139,7 @@ QPDFWriter::doWriteSetup()
2136 // Initialize object table for all existing objects plus some headroom for objects created 2139 // Initialize object table for all existing objects plus some headroom for objects created
2137 // during writing. 2140 // during writing.
2138 initializeTables(); 2141 initializeTables();
  2142 + m->obj.streams_empty = true;
2139 break; 2143 break;
2140 2144
2141 case qpdf_o_preserve: 2145 case qpdf_o_preserve:
@@ -2150,39 +2154,43 @@ QPDFWriter::doWriteSetup() @@ -2150,39 +2154,43 @@ QPDFWriter::doWriteSetup()
2150 // no default so gcc will warn for missing case tag 2154 // no default so gcc will warn for missing case tag
2151 } 2155 }
2152 2156
2153 - if (m->linearized) {  
2154 - // Page dictionaries are not allowed to be compressed objects.  
2155 - for (auto& page: m->pdf.getAllPages()) {  
2156 - QPDFObjGen og = page.getObjGen();  
2157 - if (m->object_to_object_stream.count(og)) {  
2158 - QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");  
2159 - m->object_to_object_stream.erase(og); 2157 + if (!m->obj.streams_empty) {
  2158 + if (m->linearized) {
  2159 + // Page dictionaries are not allowed to be compressed objects.
  2160 + for (auto& page: m->pdf.getAllPages()) {
  2161 + if (m->obj[page].object_stream > 0) {
  2162 + QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
  2163 + m->obj[page].object_stream = 0;
  2164 + }
2160 } 2165 }
2161 } 2166 }
2162 - }  
2163 2167
2164 - if (m->linearized || m->encrypted) {  
2165 - // The document catalog is not allowed to be compressed in linearized files either. It also  
2166 - // appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to handle  
2167 - // encrypted files with compressed document catalogs, so we disable them in that case as  
2168 - // well.  
2169 - if (m->object_to_object_stream.count(m->root_og)) {  
2170 - QTC::TC("qpdf", "QPDFWriter uncompressing root");  
2171 - m->object_to_object_stream.erase(m->root_og); 2168 + if (m->linearized || m->encrypted) {
  2169 + // The document catalog is not allowed to be compressed in linearized files either. It
  2170 + // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
  2171 + // handle encrypted files with compressed document catalogs, so we disable them in that
  2172 + // case as well.
  2173 + if (m->obj[m->root_og].object_stream > 0) {
  2174 + QTC::TC("qpdf", "QPDFWriter uncompressing root");
  2175 + m->obj[m->root_og].object_stream = 0;
  2176 + }
2172 } 2177 }
2173 - }  
2174 2178
2175 - // Generate reverse mapping from object stream to objects  
2176 - for (auto const& iter: m->object_to_object_stream) {  
2177 - QPDFObjGen const& obj = iter.first;  
2178 - int stream = iter.second;  
2179 - m->object_stream_to_objects[stream].insert(obj);  
2180 - m->max_ostream_index = std::max(  
2181 - m->max_ostream_index, QIntC::to_int(m->object_stream_to_objects[stream].size()) - 1);  
2182 - } 2179 + // Generate reverse mapping from object stream to objects
  2180 + m->obj.forEach([this](auto id, auto const& item) -> void {
  2181 + if (item.object_stream > 0) {
  2182 + m->object_stream_to_objects[item.object_stream].insert(QPDFObjGen(id, item.gen));
  2183 + m->max_ostream_index = std::max(
  2184 + m->max_ostream_index,
  2185 + QIntC::to_int(m->object_stream_to_objects[item.object_stream].size()) - 1);
  2186 + }
  2187 + });
2183 2188
2184 - if (!m->object_stream_to_objects.empty()) {  
2185 - setMinimumPDFVersion("1.5"); 2189 + if (m->object_stream_to_objects.empty()) {
  2190 + m->obj.streams_empty = true;
  2191 + } else {
  2192 + setMinimumPDFVersion("1.5");
  2193 + }
2186 } 2194 }
2187 2195
2188 setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); 2196 setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
@@ -2519,7 +2527,7 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) @@ -2519,7 +2527,7 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
2519 } 2527 }
2520 2528
2521 void 2529 void
2522 -QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out) 2530 +QPDFWriter::discardGeneration(std::map<int, int>& out)
2523 { 2531 {
2524 // There are deep assumptions in the linearization code in QPDF that there is only one object 2532 // There are deep assumptions in the linearization code in QPDF that there is only one object
2525 // with each object number; i.e., you can't have two objects with the same object number and 2533 // with each object number; i.e., you can't have two objects with the same object number and
@@ -2531,18 +2539,11 @@ QPDFWriter::discardGeneration(std::map&lt;QPDFObjGen, int&gt; const&amp; in, std::map&lt;int, @@ -2531,18 +2539,11 @@ QPDFWriter::discardGeneration(std::map&lt;QPDFObjGen, int&gt; const&amp; in, std::map&lt;int,
2531 // generation numbers. 2539 // generation numbers.
2532 2540
2533 out.clear(); 2541 out.clear();
2534 - for (auto const& iter: in) {  
2535 - if (out.count(iter.first.getObj())) {  
2536 - throw std::runtime_error("QPDF cannot currently linearize files that contain"  
2537 - " multiple objects with the same object ID and different"  
2538 - " generations. If you see this error message, please file"  
2539 - " a bug report and attach the file if possible. As a"  
2540 - " workaround, first convert the file with qpdf without"  
2541 - " linearizing, and then linearize the result of that"  
2542 - " conversion."); 2542 + m->obj.forEach([&out](auto id, auto const& item) -> void {
  2543 + if (item.object_stream > 0) {
  2544 + out[id] = item.object_stream;
2543 } 2545 }
2544 - out[iter.first.getObj()] = iter.second;  
2545 - } 2546 + });
2546 } 2547 }
2547 2548
2548 void 2549 void
@@ -2550,7 +2551,7 @@ QPDFWriter::writeLinearized() @@ -2550,7 +2551,7 @@ QPDFWriter::writeLinearized()
2550 { 2551 {
2551 // Optimize file and enqueue objects in order 2552 // Optimize file and enqueue objects in order
2552 2553
2553 - discardGeneration(m->object_to_object_stream, m->object_to_object_stream_no_gen); 2554 + discardGeneration(m->object_to_object_stream_no_gen);
2554 2555
2555 auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { 2556 auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
2556 bool compress_stream; 2557 bool compress_stream;
@@ -2594,7 +2595,7 @@ QPDFWriter::writeLinearized() @@ -2594,7 +2595,7 @@ QPDFWriter::writeLinearized()
2594 int after_second_half = 1 + second_half_uncompressed; 2595 int after_second_half = 1 + second_half_uncompressed;
2595 m->next_objid = after_second_half; 2596 m->next_objid = after_second_half;
2596 int second_half_xref = 0; 2597 int second_half_xref = 0;
2597 - bool need_xref_stream = (!m->object_to_object_stream.empty()); 2598 + bool need_xref_stream = !m->obj.streams_empty;
2598 if (need_xref_stream) { 2599 if (need_xref_stream) {
2599 second_half_xref = m->next_objid++; 2600 second_half_xref = m->next_objid++;
2600 } 2601 }
libqpdf/qpdf/QPDFWriter_private.hh
@@ -11,11 +11,24 @@ @@ -11,11 +11,24 @@
11 struct QPDFWriter::Object 11 struct QPDFWriter::Object
12 { 12 {
13 int renumber{0}; 13 int renumber{0};
  14 + int gen{0};
  15 + int object_stream{0};
14 }; 16 };
15 17
16 class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object> 18 class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object>
17 { 19 {
18 friend class QPDFWriter; 20 friend class QPDFWriter;
  21 +
  22 + public:
  23 + bool
  24 + getStreamsEmpty() const noexcept
  25 + {
  26 + return streams_empty;
  27 + }
  28 +
  29 + private:
  30 + // For performance, set by QPDFWriter rather than tracked by ObjTable.
  31 + bool streams_empty{false};
19 }; 32 };
20 33
21 class QPDFWriter::Members 34 class QPDFWriter::Members
@@ -88,7 +101,6 @@ class QPDFWriter::Members @@ -88,7 +101,6 @@ class QPDFWriter::Members
88 std::set<QPDFObjGen> normalized_streams; 101 std::set<QPDFObjGen> normalized_streams;
89 std::map<QPDFObjGen, int> page_object_to_seq; 102 std::map<QPDFObjGen, int> page_object_to_seq;
90 std::map<QPDFObjGen, int> contents_to_page_seq; 103 std::map<QPDFObjGen, int> contents_to_page_seq;
91 - std::map<QPDFObjGen, int> object_to_object_stream;  
92 std::map<int, std::set<QPDFObjGen>> object_stream_to_objects; 104 std::map<int, std::set<QPDFObjGen>> object_stream_to_objects;
93 std::list<Pipeline*> pipeline_stack; 105 std::list<Pipeline*> pipeline_stack;
94 unsigned long long next_stack_id{0}; 106 unsigned long long next_stack_id{0};