Commit cb7180b1bafd075a00d31dc598ebb01e755a2f09
1 parent
28c13f54
Move QPDF::ObjCache::end_before_space etc to Xref_table
Also, delay adjustments for compressed objects until needed by linearization checks.
Showing
4 changed files
with
93 additions
and
62 deletions
include/qpdf/QPDF.hh
| @@ -786,11 +786,7 @@ class QPDF | @@ -786,11 +786,7 @@ class QPDF | ||
| 786 | std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf); | 786 | std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf); |
| 787 | std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen); | 787 | std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen); |
| 788 | void removeObject(QPDFObjGen og); | 788 | void removeObject(QPDFObjGen og); |
| 789 | - void updateCache( | ||
| 790 | - QPDFObjGen const& og, | ||
| 791 | - std::shared_ptr<QPDFObject> const& object, | ||
| 792 | - qpdf_offset_t end_before_space, | ||
| 793 | - qpdf_offset_t end_after_space); | 789 | + void updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object); |
| 794 | static QPDFExc damagedPDF( | 790 | static QPDFExc damagedPDF( |
| 795 | InputSource& input, | 791 | InputSource& input, |
| 796 | std::string const& object, | 792 | std::string const& object, |
libqpdf/QPDF.cc
| @@ -1953,7 +1953,9 @@ QPDF::readObjectAtOffset( | @@ -1953,7 +1953,9 @@ QPDF::readObjectAtOffset( | ||
| 1953 | // could use !check_og in place of skip_cache_if_in_xref. | 1953 | // could use !check_og in place of skip_cache_if_in_xref. |
| 1954 | QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); | 1954 | QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); |
| 1955 | } else { | 1955 | } else { |
| 1956 | - updateCache(og, oh.getObj(), end_before_space, end_after_space); | 1956 | + m->xref_table.linearization_offsets( |
| 1957 | + toS(og.getObj()), end_before_space, end_after_space); | ||
| 1958 | + updateCache(og, oh.getObj()); | ||
| 1957 | } | 1959 | } |
| 1958 | } | 1960 | } |
| 1959 | 1961 | ||
| @@ -1972,7 +1974,7 @@ QPDF::resolve(QPDFObjGen og) | @@ -1972,7 +1974,7 @@ QPDF::resolve(QPDFObjGen og) | ||
| 1972 | // has to be resolved during object parsing, such as stream length. | 1974 | // has to be resolved during object parsing, such as stream length. |
| 1973 | QTC::TC("qpdf", "QPDF recursion loop in resolve"); | 1975 | QTC::TC("qpdf", "QPDF recursion loop in resolve"); |
| 1974 | warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); | 1976 | warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); |
| 1975 | - updateCache(og, QPDF_Null::create(), -1, -1); | 1977 | + updateCache(og, QPDF_Null::create()); |
| 1976 | return m->obj_cache[og].object.get(); | 1978 | return m->obj_cache[og].object.get(); |
| 1977 | } | 1979 | } |
| 1978 | ResolveRecorder rr(this, og); | 1980 | ResolveRecorder rr(this, og); |
| @@ -2008,7 +2010,7 @@ QPDF::resolve(QPDFObjGen og) | @@ -2008,7 +2010,7 @@ QPDF::resolve(QPDFObjGen og) | ||
| 2008 | if (isUnresolved(og)) { | 2010 | if (isUnresolved(og)) { |
| 2009 | // PDF spec says unknown objects resolve to the null object. | 2011 | // PDF spec says unknown objects resolve to the null object. |
| 2010 | QTC::TC("qpdf", "QPDF resolve failure to null"); | 2012 | QTC::TC("qpdf", "QPDF resolve failure to null"); |
| 2011 | - updateCache(og, QPDF_Null::create(), -1, -1); | 2013 | + updateCache(og, QPDF_Null::create()); |
| 2012 | } | 2014 | } |
| 2013 | 2015 | ||
| 2014 | auto result(m->obj_cache[og].object); | 2016 | auto result(m->obj_cache[og].object); |
| @@ -2030,12 +2032,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -2030,12 +2032,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 2030 | "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); | 2032 | "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); |
| 2031 | } | 2033 | } |
| 2032 | 2034 | ||
| 2033 | - // For linearization data in the object, use the data from the object stream for the objects in | ||
| 2034 | - // the stream. | ||
| 2035 | - QPDFObjGen stream_og(obj_stream_number, 0); | ||
| 2036 | - qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space; | ||
| 2037 | - qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space; | ||
| 2038 | - | ||
| 2039 | QPDFObjectHandle dict = obj_stream.getDict(); | 2035 | QPDFObjectHandle dict = obj_stream.getDict(); |
| 2040 | if (!dict.isDictionaryOfType("/ObjStm")) { | 2036 | if (!dict.isDictionaryOfType("/ObjStm")) { |
| 2041 | QTC::TC("qpdf", "QPDF ERR object stream with wrong type"); | 2037 | QTC::TC("qpdf", "QPDF ERR object stream with wrong type"); |
| @@ -2101,7 +2097,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | @@ -2101,7 +2097,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) | ||
| 2101 | int offset = iter.second; | 2097 | int offset = iter.second; |
| 2102 | input->seek(offset, SEEK_SET); | 2098 | input->seek(offset, SEEK_SET); |
| 2103 | QPDFObjectHandle oh = readObjectInStream(input, iter.first); | 2099 | QPDFObjectHandle oh = readObjectInStream(input, iter.first); |
| 2104 | - updateCache(og, oh.getObj(), end_before_space, end_after_space); | 2100 | + updateCache(og, oh.getObj()); |
| 2105 | } else { | 2101 | } else { |
| 2106 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); | 2102 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); |
| 2107 | } | 2103 | } |
| @@ -2116,20 +2112,14 @@ QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& obj) | @@ -2116,20 +2112,14 @@ QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& obj) | ||
| 2116 | } | 2112 | } |
| 2117 | 2113 | ||
| 2118 | void | 2114 | void |
| 2119 | -QPDF::updateCache( | ||
| 2120 | - QPDFObjGen const& og, | ||
| 2121 | - std::shared_ptr<QPDFObject> const& object, | ||
| 2122 | - qpdf_offset_t end_before_space, | ||
| 2123 | - qpdf_offset_t end_after_space) | 2115 | +QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object) |
| 2124 | { | 2116 | { |
| 2125 | object->setObjGen(this, og); | 2117 | object->setObjGen(this, og); |
| 2126 | if (isCached(og)) { | 2118 | if (isCached(og)) { |
| 2127 | auto& cache = m->obj_cache[og]; | 2119 | auto& cache = m->obj_cache[og]; |
| 2128 | cache.object->assign(object); | 2120 | cache.object->assign(object); |
| 2129 | - cache.end_before_space = end_before_space; | ||
| 2130 | - cache.end_after_space = end_after_space; | ||
| 2131 | } else { | 2121 | } else { |
| 2132 | - m->obj_cache[og] = ObjCache(object, end_before_space, end_after_space); | 2122 | + m->obj_cache[og] = ObjCache(object); |
| 2133 | } | 2123 | } |
| 2134 | } | 2124 | } |
| 2135 | 2125 | ||
| @@ -2159,7 +2149,7 @@ QPDFObjectHandle | @@ -2159,7 +2149,7 @@ QPDFObjectHandle | ||
| 2159 | QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj) | 2149 | QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj) |
| 2160 | { | 2150 | { |
| 2161 | QPDFObjGen next{nextObjGen()}; | 2151 | QPDFObjGen next{nextObjGen()}; |
| 2162 | - m->obj_cache[next] = ObjCache(obj, -1, -1); | 2152 | + m->obj_cache[next] = ObjCache(obj); |
| 2163 | return newIndirect(next, m->obj_cache[next].object); | 2153 | return newIndirect(next, m->obj_cache[next].object); |
| 2164 | } | 2154 | } |
| 2165 | 2155 | ||
| @@ -2246,7 +2236,7 @@ QPDF::getObject(QPDFObjGen const& og) | @@ -2246,7 +2236,7 @@ QPDF::getObject(QPDFObjGen const& og) | ||
| 2246 | } else if (m->xref_table.initialized() && !m->xref_table.type(og)) { | 2236 | } else if (m->xref_table.initialized() && !m->xref_table.type(og)) { |
| 2247 | return QPDF_Null::create(); | 2237 | return QPDF_Null::create(); |
| 2248 | } else { | 2238 | } else { |
| 2249 | - auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1); | 2239 | + auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og)); |
| 2250 | return {result.first->second.object}; | 2240 | return {result.first->second.object}; |
| 2251 | } | 2241 | } |
| 2252 | } | 2242 | } |
| @@ -2282,7 +2272,7 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh) | @@ -2282,7 +2272,7 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh) | ||
| 2282 | QTC::TC("qpdf", "QPDF replaceObject called with indirect object"); | 2272 | QTC::TC("qpdf", "QPDF replaceObject called with indirect object"); |
| 2283 | throw std::logic_error("QPDF::replaceObject called with indirect object handle"); | 2273 | throw std::logic_error("QPDF::replaceObject called with indirect object handle"); |
| 2284 | } | 2274 | } |
| 2285 | - updateCache(og, oh.getObj(), -1, -1); | 2275 | + updateCache(og, oh.getObj()); |
| 2286 | } | 2276 | } |
| 2287 | 2277 | ||
| 2288 | void | 2278 | void |
libqpdf/QPDF_linearization.cc
| @@ -288,9 +288,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | @@ -288,9 +288,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | ||
| 288 | QPDFObjGen og; | 288 | QPDFObjGen og; |
| 289 | QPDFObjectHandle H = | 289 | QPDFObjectHandle H = |
| 290 | readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false); | 290 | readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false); |
| 291 | - ObjCache& oc = m->obj_cache[og]; | ||
| 292 | - qpdf_offset_t min_end_offset = oc.end_before_space; | ||
| 293 | - qpdf_offset_t max_end_offset = oc.end_after_space; | 291 | + qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og); |
| 292 | + qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og); | ||
| 294 | if (!H.isStream()) { | 293 | if (!H.isStream()) { |
| 295 | throw damagedPDF("linearization dictionary", "hint table is not a stream"); | 294 | throw damagedPDF("linearization dictionary", "hint table is not a stream"); |
| 296 | } | 295 | } |
| @@ -301,14 +300,11 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | @@ -301,14 +300,11 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) | ||
| 301 | // increasing length to cover it, even though the specification says all objects in the | 300 | // increasing length to cover it, even though the specification says all objects in the |
| 302 | // linearization parameter dictionary must be direct. We have to get the file position of the | 301 | // linearization parameter dictionary must be direct. We have to get the file position of the |
| 303 | // end of length in this case. | 302 | // end of length in this case. |
| 304 | - QPDFObjectHandle length_obj = Hdict.getKey("/Length"); | ||
| 305 | - if (length_obj.isIndirect()) { | 303 | + auto length_og = Hdict.getKey("/Length").getObjGen(); |
| 304 | + if (length_og.isIndirect()) { | ||
| 306 | QTC::TC("qpdf", "QPDF hint table length indirect"); | 305 | QTC::TC("qpdf", "QPDF hint table length indirect"); |
| 307 | - // Force resolution | ||
| 308 | - (void)length_obj.getIntValue(); | ||
| 309 | - ObjCache& oc2 = m->obj_cache[length_obj.getObjGen()]; | ||
| 310 | - min_end_offset = oc2.end_before_space; | ||
| 311 | - max_end_offset = oc2.end_after_space; | 306 | + min_end_offset = m->xref_table.end_before_space(length_og); |
| 307 | + max_end_offset = m->xref_table.end_after_space(length_og); | ||
| 312 | } else { | 308 | } else { |
| 313 | QTC::TC("qpdf", "QPDF hint table length direct"); | 309 | QTC::TC("qpdf", "QPDF hint table length direct"); |
| 314 | } | 310 | } |
| @@ -503,13 +499,14 @@ QPDF::checkLinearizationInternal() | @@ -503,13 +499,14 @@ QPDF::checkLinearizationInternal() | ||
| 503 | qpdf_offset_t max_E = -1; | 499 | qpdf_offset_t max_E = -1; |
| 504 | for (auto const& oh: m->part6) { | 500 | for (auto const& oh: m->part6) { |
| 505 | QPDFObjGen og(oh.getObjGen()); | 501 | QPDFObjGen og(oh.getObjGen()); |
| 506 | - if (m->obj_cache.count(og) == 0) { | 502 | + auto before = m->xref_table.end_before_space(og); |
| 503 | + auto after = m->xref_table.end_after_space(og); | ||
| 504 | + if (before <= 0) { | ||
| 507 | // All objects have to have been dereferenced to be classified. | 505 | // All objects have to have been dereferenced to be classified. |
| 508 | throw std::logic_error("linearization part6 object not in cache"); | 506 | throw std::logic_error("linearization part6 object not in cache"); |
| 509 | } | 507 | } |
| 510 | - ObjCache const& oc = m->obj_cache[og]; | ||
| 511 | - min_E = std::max(min_E, oc.end_before_space); | ||
| 512 | - max_E = std::max(max_E, oc.end_after_space); | 508 | + min_E = std::max(min_E, before); |
| 509 | + max_E = std::max(max_E, after); | ||
| 513 | } | 510 | } |
| 514 | if ((p.first_page_end < min_E) || (p.first_page_end > max_E)) { | 511 | if ((p.first_page_end < min_E) || (p.first_page_end > max_E)) { |
| 515 | QTC::TC("qpdf", "QPDF warn /E mismatch"); | 512 | QTC::TC("qpdf", "QPDF warn /E mismatch"); |
| @@ -536,10 +533,11 @@ QPDF::maxEnd(ObjUser const& ou) | @@ -536,10 +533,11 @@ QPDF::maxEnd(ObjUser const& ou) | ||
| 536 | } | 533 | } |
| 537 | qpdf_offset_t end = 0; | 534 | qpdf_offset_t end = 0; |
| 538 | for (auto const& og: m->obj_user_to_objects[ou]) { | 535 | for (auto const& og: m->obj_user_to_objects[ou]) { |
| 539 | - if (m->obj_cache.count(og) == 0) { | 536 | + auto e = m->xref_table.end_after_space(og); |
| 537 | + if (e <= 0) { | ||
| 540 | stopOnError("unknown object referenced in object user table"); | 538 | stopOnError("unknown object referenced in object user table"); |
| 541 | } | 539 | } |
| 542 | - end = std::max(end, m->obj_cache[og].end_after_space); | 540 | + end = std::max(end, e); |
| 543 | } | 541 | } |
| 544 | return end; | 542 | return end; |
| 545 | } | 543 | } |
| @@ -599,15 +597,13 @@ QPDF::lengthNextN(int first_object, int n) | @@ -599,15 +597,13 @@ QPDF::lengthNextN(int first_object, int n) | ||
| 599 | int length = 0; | 597 | int length = 0; |
| 600 | for (int i = 0; i < n; ++i) { | 598 | for (int i = 0; i < n; ++i) { |
| 601 | QPDFObjGen og(first_object + i, 0); | 599 | QPDFObjGen og(first_object + i, 0); |
| 602 | - if (m->xref_table.type(og) == 0) { | 600 | + auto end = m->xref_table.end_after_space(og); |
| 601 | + if (end <= 0) { | ||
| 603 | linearizationWarning( | 602 | linearizationWarning( |
| 604 | "no xref table entry for " + std::to_string(first_object + i) + " 0"); | 603 | "no xref table entry for " + std::to_string(first_object + i) + " 0"); |
| 605 | - } else { | ||
| 606 | - if (m->obj_cache.count(og) == 0) { | ||
| 607 | - stopOnError("found unknown object while calculating length for linearization data"); | ||
| 608 | - } | ||
| 609 | - length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og)); | 604 | + continue; |
| 610 | } | 605 | } |
| 606 | + length += toI(end - getLinearizationOffset(og)); | ||
| 611 | } | 607 | } |
| 612 | return length; | 608 | return length; |
| 613 | } | 609 | } |
libqpdf/qpdf/QPDF_private.hh
| @@ -151,6 +151,49 @@ class QPDF::Xref_table | @@ -151,6 +151,49 @@ class QPDF::Xref_table | ||
| 151 | 151 | ||
| 152 | // For Linearization | 152 | // For Linearization |
| 153 | 153 | ||
| 154 | + qpdf_offset_t | ||
| 155 | + end_after_space(QPDFObjGen og) | ||
| 156 | + { | ||
| 157 | + auto& e = entry(toS(og.getObj())); | ||
| 158 | + switch (e.type()) { | ||
| 159 | + case 1: | ||
| 160 | + return e.end_after_space_; | ||
| 161 | + case 2: | ||
| 162 | + { | ||
| 163 | + auto es = entry(toS(e.stream_number())); | ||
| 164 | + return es.type() == 1 ? es.end_after_space_ : 0; | ||
| 165 | + } | ||
| 166 | + default: | ||
| 167 | + return 0; | ||
| 168 | + } | ||
| 169 | + } | ||
| 170 | + | ||
| 171 | + qpdf_offset_t | ||
| 172 | + end_before_space(QPDFObjGen og) | ||
| 173 | + { | ||
| 174 | + auto& e = entry(toS(og.getObj())); | ||
| 175 | + switch (e.type()) { | ||
| 176 | + case 1: | ||
| 177 | + return e.end_before_space_; | ||
| 178 | + case 2: | ||
| 179 | + { | ||
| 180 | + auto es = entry(toS(e.stream_number())); | ||
| 181 | + return es.type() == 1 ? es.end_before_space_ : 0; | ||
| 182 | + } | ||
| 183 | + default: | ||
| 184 | + return 0; | ||
| 185 | + } | ||
| 186 | + } | ||
| 187 | + | ||
| 188 | + void | ||
| 189 | + linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after) | ||
| 190 | + { | ||
| 191 | + if (type(id)) { | ||
| 192 | + table[id].end_before_space_ = before; | ||
| 193 | + table[id].end_after_space_ = after; | ||
| 194 | + } | ||
| 195 | + } | ||
| 196 | + | ||
| 154 | bool | 197 | bool |
| 155 | uncompressed_after_compressed() const noexcept | 198 | uncompressed_after_compressed() const noexcept |
| 156 | { | 199 | { |
| @@ -192,6 +235,14 @@ class QPDF::Xref_table | @@ -192,6 +235,14 @@ class QPDF::Xref_table | ||
| 192 | 235 | ||
| 193 | struct Entry | 236 | struct Entry |
| 194 | { | 237 | { |
| 238 | + Entry() = default; | ||
| 239 | + | ||
| 240 | + Entry(int gen, Xref entry) : | ||
| 241 | + gen_(gen), | ||
| 242 | + entry(entry) | ||
| 243 | + { | ||
| 244 | + } | ||
| 245 | + | ||
| 195 | int | 246 | int |
| 196 | gen() const noexcept | 247 | gen() const noexcept |
| 197 | { | 248 | { |
| @@ -224,8 +275,16 @@ class QPDF::Xref_table | @@ -224,8 +275,16 @@ class QPDF::Xref_table | ||
| 224 | 275 | ||
| 225 | int gen_{0}; | 276 | int gen_{0}; |
| 226 | Xref entry; | 277 | Xref entry; |
| 278 | + qpdf_offset_t end_before_space_{0}; | ||
| 279 | + qpdf_offset_t end_after_space_{0}; | ||
| 227 | }; | 280 | }; |
| 228 | 281 | ||
| 282 | + Entry& | ||
| 283 | + entry(size_t id) | ||
| 284 | + { | ||
| 285 | + return id < table.size() ? table[id] : table[0]; | ||
| 286 | + } | ||
| 287 | + | ||
| 229 | void read(qpdf_offset_t offset); | 288 | void read(qpdf_offset_t offset); |
| 230 | 289 | ||
| 231 | // Methods to parse tables | 290 | // Methods to parse tables |
| @@ -384,24 +443,14 @@ class QPDF::Pipe | @@ -384,24 +443,14 @@ class QPDF::Pipe | ||
| 384 | class QPDF::ObjCache | 443 | class QPDF::ObjCache |
| 385 | { | 444 | { |
| 386 | public: | 445 | public: |
| 387 | - ObjCache() : | ||
| 388 | - end_before_space(0), | ||
| 389 | - end_after_space(0) | ||
| 390 | - { | ||
| 391 | - } | ||
| 392 | - ObjCache( | ||
| 393 | - std::shared_ptr<QPDFObject> object, | ||
| 394 | - qpdf_offset_t end_before_space = 0, | ||
| 395 | - qpdf_offset_t end_after_space = 0) : | ||
| 396 | - object(object), | ||
| 397 | - end_before_space(end_before_space), | ||
| 398 | - end_after_space(end_after_space) | 446 | + ObjCache() = default; |
| 447 | + | ||
| 448 | + ObjCache(std::shared_ptr<QPDFObject> object) : | ||
| 449 | + object(object) | ||
| 399 | { | 450 | { |
| 400 | } | 451 | } |
| 401 | 452 | ||
| 402 | std::shared_ptr<QPDFObject> object; | 453 | std::shared_ptr<QPDFObject> object; |
| 403 | - qpdf_offset_t end_before_space; | ||
| 404 | - qpdf_offset_t end_after_space; | ||
| 405 | }; | 454 | }; |
| 406 | 455 | ||
| 407 | class QPDF::ObjCopier | 456 | class QPDF::ObjCopier |