Commit cb7180b1bafd075a00d31dc598ebb01e755a2f09

Authored by m-holger
1 parent 28c13f54

Move QPDF::ObjCache::end_before_space etc to Xref_table

Also, delay adjustments for compressed objects until needed by
linearization checks.
include/qpdf/QPDF.hh
... ... @@ -786,11 +786,7 @@ class QPDF
786 786 std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);
787 787 std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
788 788 void removeObject(QPDFObjGen og);
789   - void updateCache(
790   - QPDFObjGen const& og,
791   - std::shared_ptr<QPDFObject> const& object,
792   - qpdf_offset_t end_before_space,
793   - qpdf_offset_t end_after_space);
  789 + void updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object);
794 790 static QPDFExc damagedPDF(
795 791 InputSource& input,
796 792 std::string const& object,
... ...
libqpdf/QPDF.cc
... ... @@ -1953,7 +1953,9 @@ QPDF::readObjectAtOffset(
1953 1953 // could use !check_og in place of skip_cache_if_in_xref.
1954 1954 QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
1955 1955 } else {
1956   - updateCache(og, oh.getObj(), end_before_space, end_after_space);
  1956 + m->xref_table.linearization_offsets(
  1957 + toS(og.getObj()), end_before_space, end_after_space);
  1958 + updateCache(og, oh.getObj());
1957 1959 }
1958 1960 }
1959 1961  
... ... @@ -1972,7 +1974,7 @@ QPDF::resolve(QPDFObjGen og)
1972 1974 // has to be resolved during object parsing, such as stream length.
1973 1975 QTC::TC("qpdf", "QPDF recursion loop in resolve");
1974 1976 warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
1975   - updateCache(og, QPDF_Null::create(), -1, -1);
  1977 + updateCache(og, QPDF_Null::create());
1976 1978 return m->obj_cache[og].object.get();
1977 1979 }
1978 1980 ResolveRecorder rr(this, og);
... ... @@ -2008,7 +2010,7 @@ QPDF::resolve(QPDFObjGen og)
2008 2010 if (isUnresolved(og)) {
2009 2011 // PDF spec says unknown objects resolve to the null object.
2010 2012 QTC::TC("qpdf", "QPDF resolve failure to null");
2011   - updateCache(og, QPDF_Null::create(), -1, -1);
  2013 + updateCache(og, QPDF_Null::create());
2012 2014 }
2013 2015  
2014 2016 auto result(m->obj_cache[og].object);
... ... @@ -2030,12 +2032,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2030 2032 "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
2031 2033 }
2032 2034  
2033   - // For linearization data in the object, use the data from the object stream for the objects in
2034   - // the stream.
2035   - QPDFObjGen stream_og(obj_stream_number, 0);
2036   - qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space;
2037   - qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space;
2038   -
2039 2035 QPDFObjectHandle dict = obj_stream.getDict();
2040 2036 if (!dict.isDictionaryOfType("/ObjStm")) {
2041 2037 QTC::TC("qpdf", "QPDF ERR object stream with wrong type");
... ... @@ -2101,7 +2097,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2101 2097 int offset = iter.second;
2102 2098 input->seek(offset, SEEK_SET);
2103 2099 QPDFObjectHandle oh = readObjectInStream(input, iter.first);
2104   - updateCache(og, oh.getObj(), end_before_space, end_after_space);
  2100 + updateCache(og, oh.getObj());
2105 2101 } else {
2106 2102 QTC::TC("qpdf", "QPDF not caching overridden objstm object");
2107 2103 }
... ... @@ -2116,20 +2112,14 @@ QPDF::newIndirect(QPDFObjGen const&amp; og, std::shared_ptr&lt;QPDFObject&gt; const&amp; obj)
2116 2112 }
2117 2113  
2118 2114 void
2119   -QPDF::updateCache(
2120   - QPDFObjGen const& og,
2121   - std::shared_ptr<QPDFObject> const& object,
2122   - qpdf_offset_t end_before_space,
2123   - qpdf_offset_t end_after_space)
  2115 +QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object)
2124 2116 {
2125 2117 object->setObjGen(this, og);
2126 2118 if (isCached(og)) {
2127 2119 auto& cache = m->obj_cache[og];
2128 2120 cache.object->assign(object);
2129   - cache.end_before_space = end_before_space;
2130   - cache.end_after_space = end_after_space;
2131 2121 } else {
2132   - m->obj_cache[og] = ObjCache(object, end_before_space, end_after_space);
  2122 + m->obj_cache[og] = ObjCache(object);
2133 2123 }
2134 2124 }
2135 2125  
... ... @@ -2159,7 +2149,7 @@ QPDFObjectHandle
2159 2149 QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj)
2160 2150 {
2161 2151 QPDFObjGen next{nextObjGen()};
2162   - m->obj_cache[next] = ObjCache(obj, -1, -1);
  2152 + m->obj_cache[next] = ObjCache(obj);
2163 2153 return newIndirect(next, m->obj_cache[next].object);
2164 2154 }
2165 2155  
... ... @@ -2246,7 +2236,7 @@ QPDF::getObject(QPDFObjGen const&amp; og)
2246 2236 } else if (m->xref_table.initialized() && !m->xref_table.type(og)) {
2247 2237 return QPDF_Null::create();
2248 2238 } else {
2249   - auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1);
  2239 + auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og));
2250 2240 return {result.first->second.object};
2251 2241 }
2252 2242 }
... ... @@ -2282,7 +2272,7 @@ QPDF::replaceObject(QPDFObjGen const&amp; og, QPDFObjectHandle oh)
2282 2272 QTC::TC("qpdf", "QPDF replaceObject called with indirect object");
2283 2273 throw std::logic_error("QPDF::replaceObject called with indirect object handle");
2284 2274 }
2285   - updateCache(og, oh.getObj(), -1, -1);
  2275 + updateCache(og, oh.getObj());
2286 2276 }
2287 2277  
2288 2278 void
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -288,9 +288,8 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
288 288 QPDFObjGen og;
289 289 QPDFObjectHandle H =
290 290 readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
291   - ObjCache& oc = m->obj_cache[og];
292   - qpdf_offset_t min_end_offset = oc.end_before_space;
293   - qpdf_offset_t max_end_offset = oc.end_after_space;
  291 + qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og);
  292 + qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og);
294 293 if (!H.isStream()) {
295 294 throw damagedPDF("linearization dictionary", "hint table is not a stream");
296 295 }
... ... @@ -301,14 +300,11 @@ QPDF::readHintStream(Pipeline&amp; pl, qpdf_offset_t offset, size_t length)
301 300 // increasing length to cover it, even though the specification says all objects in the
302 301 // linearization parameter dictionary must be direct. We have to get the file position of the
303 302 // end of length in this case.
304   - QPDFObjectHandle length_obj = Hdict.getKey("/Length");
305   - if (length_obj.isIndirect()) {
  303 + auto length_og = Hdict.getKey("/Length").getObjGen();
  304 + if (length_og.isIndirect()) {
306 305 QTC::TC("qpdf", "QPDF hint table length indirect");
307   - // Force resolution
308   - (void)length_obj.getIntValue();
309   - ObjCache& oc2 = m->obj_cache[length_obj.getObjGen()];
310   - min_end_offset = oc2.end_before_space;
311   - max_end_offset = oc2.end_after_space;
  306 + min_end_offset = m->xref_table.end_before_space(length_og);
  307 + max_end_offset = m->xref_table.end_after_space(length_og);
312 308 } else {
313 309 QTC::TC("qpdf", "QPDF hint table length direct");
314 310 }
... ... @@ -503,13 +499,14 @@ QPDF::checkLinearizationInternal()
503 499 qpdf_offset_t max_E = -1;
504 500 for (auto const& oh: m->part6) {
505 501 QPDFObjGen og(oh.getObjGen());
506   - if (m->obj_cache.count(og) == 0) {
  502 + auto before = m->xref_table.end_before_space(og);
  503 + auto after = m->xref_table.end_after_space(og);
  504 + if (before <= 0) {
507 505 // All objects have to have been dereferenced to be classified.
508 506 throw std::logic_error("linearization part6 object not in cache");
509 507 }
510   - ObjCache const& oc = m->obj_cache[og];
511   - min_E = std::max(min_E, oc.end_before_space);
512   - max_E = std::max(max_E, oc.end_after_space);
  508 + min_E = std::max(min_E, before);
  509 + max_E = std::max(max_E, after);
513 510 }
514 511 if ((p.first_page_end < min_E) || (p.first_page_end > max_E)) {
515 512 QTC::TC("qpdf", "QPDF warn /E mismatch");
... ... @@ -536,10 +533,11 @@ QPDF::maxEnd(ObjUser const&amp; ou)
536 533 }
537 534 qpdf_offset_t end = 0;
538 535 for (auto const& og: m->obj_user_to_objects[ou]) {
539   - if (m->obj_cache.count(og) == 0) {
  536 + auto e = m->xref_table.end_after_space(og);
  537 + if (e <= 0) {
540 538 stopOnError("unknown object referenced in object user table");
541 539 }
542   - end = std::max(end, m->obj_cache[og].end_after_space);
  540 + end = std::max(end, e);
543 541 }
544 542 return end;
545 543 }
... ... @@ -599,15 +597,13 @@ QPDF::lengthNextN(int first_object, int n)
599 597 int length = 0;
600 598 for (int i = 0; i < n; ++i) {
601 599 QPDFObjGen og(first_object + i, 0);
602   - if (m->xref_table.type(og) == 0) {
  600 + auto end = m->xref_table.end_after_space(og);
  601 + if (end <= 0) {
603 602 linearizationWarning(
604 603 "no xref table entry for " + std::to_string(first_object + i) + " 0");
605   - } else {
606   - if (m->obj_cache.count(og) == 0) {
607   - stopOnError("found unknown object while calculating length for linearization data");
608   - }
609   - length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og));
  604 + continue;
610 605 }
  606 + length += toI(end - getLinearizationOffset(og));
611 607 }
612 608 return length;
613 609 }
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -151,6 +151,49 @@ class QPDF::Xref_table
151 151  
152 152 // For Linearization
153 153  
  154 + qpdf_offset_t
  155 + end_after_space(QPDFObjGen og)
  156 + {
  157 + auto& e = entry(toS(og.getObj()));
  158 + switch (e.type()) {
  159 + case 1:
  160 + return e.end_after_space_;
  161 + case 2:
  162 + {
  163 + auto es = entry(toS(e.stream_number()));
  164 + return es.type() == 1 ? es.end_after_space_ : 0;
  165 + }
  166 + default:
  167 + return 0;
  168 + }
  169 + }
  170 +
  171 + qpdf_offset_t
  172 + end_before_space(QPDFObjGen og)
  173 + {
  174 + auto& e = entry(toS(og.getObj()));
  175 + switch (e.type()) {
  176 + case 1:
  177 + return e.end_before_space_;
  178 + case 2:
  179 + {
  180 + auto es = entry(toS(e.stream_number()));
  181 + return es.type() == 1 ? es.end_before_space_ : 0;
  182 + }
  183 + default:
  184 + return 0;
  185 + }
  186 + }
  187 +
  188 + void
  189 + linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
  190 + {
  191 + if (type(id)) {
  192 + table[id].end_before_space_ = before;
  193 + table[id].end_after_space_ = after;
  194 + }
  195 + }
  196 +
154 197 bool
155 198 uncompressed_after_compressed() const noexcept
156 199 {
... ... @@ -192,6 +235,14 @@ class QPDF::Xref_table
192 235  
193 236 struct Entry
194 237 {
  238 + Entry() = default;
  239 +
  240 + Entry(int gen, Xref entry) :
  241 + gen_(gen),
  242 + entry(entry)
  243 + {
  244 + }
  245 +
195 246 int
196 247 gen() const noexcept
197 248 {
... ... @@ -224,8 +275,16 @@ class QPDF::Xref_table
224 275  
225 276 int gen_{0};
226 277 Xref entry;
  278 + qpdf_offset_t end_before_space_{0};
  279 + qpdf_offset_t end_after_space_{0};
227 280 };
228 281  
  282 + Entry&
  283 + entry(size_t id)
  284 + {
  285 + return id < table.size() ? table[id] : table[0];
  286 + }
  287 +
229 288 void read(qpdf_offset_t offset);
230 289  
231 290 // Methods to parse tables
... ... @@ -384,24 +443,14 @@ class QPDF::Pipe
384 443 class QPDF::ObjCache
385 444 {
386 445 public:
387   - ObjCache() :
388   - end_before_space(0),
389   - end_after_space(0)
390   - {
391   - }
392   - ObjCache(
393   - std::shared_ptr<QPDFObject> object,
394   - qpdf_offset_t end_before_space = 0,
395   - qpdf_offset_t end_after_space = 0) :
396   - object(object),
397   - end_before_space(end_before_space),
398   - end_after_space(end_after_space)
  446 + ObjCache() = default;
  447 +
  448 + ObjCache(std::shared_ptr<QPDFObject> object) :
  449 + object(object)
399 450 {
400 451 }
401 452  
402 453 std::shared_ptr<QPDFObject> object;
403   - qpdf_offset_t end_before_space;
404   - qpdf_offset_t end_after_space;
405 454 };
406 455  
407 456 class QPDF::ObjCopier
... ...