Commit 264e25f391f83bcbeb60590f18ff96719b086454
1 parent
a6159858
Clear owning QPDF information for all objects, not just indirect
Showing
16 changed files
with
141 additions
and
15 deletions
TODO
| ... | ... | @@ -811,3 +811,10 @@ Rejected Ideas |
| 811 | 811 | Note that arrays and dictionaries still need to contain |
| 812 | 812 | QPDFObjectHandle because of indirect objects. This only pertains to |
| 813 | 813 | direct objects, which are always "resolved" in QPDFObjectHandle. |
| 814 | + | |
| 815 | + If this is addressed, read comments in QPDFWriter.cc::enqueueObject | |
| 816 | + near the call to getOwningQPDF, comments in QPDFValueProxy::reset, | |
| 817 | + and comments in QPDF::~QPDF() near the line that assigns to null. | |
| 818 | + This will also affect test 92 in test_driver.cc. All these | |
| 819 | + references were from the release of qpdf 11 (in case they have moved | |
| 820 | + by such time as this might be resurrected). | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -1534,6 +1534,23 @@ class QPDFObjectHandle |
| 1534 | 1534 | }; |
| 1535 | 1535 | friend class ObjAccessor; |
| 1536 | 1536 | |
| 1537 | + // Provide access to specific classes for recursive | |
| 1538 | + // reset(). | |
| 1539 | + class Resetter | |
| 1540 | + { | |
| 1541 | + friend class QPDF_Dictionary; | |
| 1542 | + friend class QPDF_Stream; | |
| 1543 | + friend class SparseOHArray; | |
| 1544 | + | |
| 1545 | + private: | |
| 1546 | + static void | |
| 1547 | + reset(QPDFObjectHandle& o) | |
| 1548 | + { | |
| 1549 | + o.reset(); | |
| 1550 | + } | |
| 1551 | + }; | |
| 1552 | + friend class Resetter; | |
| 1553 | + | |
| 1537 | 1554 | // Convenience routine: Throws if the assumption is violated. Your |
| 1538 | 1555 | // code will be better if you call one of the isType methods and |
| 1539 | 1556 | // handle the case of the type being wrong, but these can be |
| ... | ... | @@ -1631,6 +1648,7 @@ class QPDFObjectHandle |
| 1631 | 1648 | bool first_level_only, |
| 1632 | 1649 | bool stop_at_streams); |
| 1633 | 1650 | void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only); |
| 1651 | + void reset(); | |
| 1634 | 1652 | void setParsedOffset(qpdf_offset_t offset); |
| 1635 | 1653 | void parseContentStream_internal( |
| 1636 | 1654 | std::string const& description, ParserCallbacks* callbacks); | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -247,19 +247,24 @@ QPDF::~QPDF() |
| 247 | 247 | // having an array or dictionary that contains an indirect |
| 248 | 248 | // reference to the other), the circular references in the |
| 249 | 249 | // std::shared_ptr objects will prevent the objects from being |
| 250 | - // deleted. Walk through all objects in the object cache, which | |
| 251 | - // is those objects that we read from the file, and break all | |
| 252 | - // resolved indirect references by replacing them with direct | |
| 253 | - // null objects. At this point, obviously no one is still | |
| 254 | - // using the QPDF object, but we'll explicitly clear the xref | |
| 255 | - // table anyway just to prevent any possibility of resolve() | |
| 256 | - // succeeding. Note that we can't break references like this at | |
| 257 | - // any time when the QPDF object is active. | |
| 250 | + // deleted. Walk through all objects in the object cache, which is | |
| 251 | + // those objects that we read from the file, and break all | |
| 252 | + // resolved indirect references by replacing them with direct null | |
| 253 | + // objects. At this point, obviously no one is still using the | |
| 254 | + // QPDF object, but we'll explicitly clear the xref table anyway | |
| 255 | + // just to prevent any possibility of resolve() succeeding. Note | |
| 256 | + // that we can't break references like this at any time when the | |
| 257 | + // QPDF object is active. This also causes all QPDFObjectHandle | |
| 258 | + // objects that are reachable from this object to become nulls and | |
| 259 | + // release their association with this QPDF. | |
| 258 | 260 | this->m->xref_table.clear(); |
| 259 | 261 | auto null_obj = QPDF_Null::create(); |
| 260 | 262 | for (auto const& iter: this->m->obj_cache) { |
| 263 | + iter.second.object->reset(); | |
| 264 | + // If the issue discussed in QPDFValueProxy::reset were | |
| 265 | + // resolved, then this assignment to null_obj could be | |
| 266 | + // removed. | |
| 261 | 267 | iter.second.object->assign(null_obj); |
| 262 | - iter.second.object->resetObjGen(); | |
| 263 | 268 | } |
| 264 | 269 | } |
| 265 | 270 | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -248,6 +248,17 @@ QPDFObjectHandle::operator!=(QPDFObjectHandle const& rhs) const |
| 248 | 248 | return this->obj != rhs.obj; |
| 249 | 249 | } |
| 250 | 250 | |
| 251 | +void | |
| 252 | +QPDFObjectHandle::reset() | |
| 253 | +{ | |
| 254 | + // Recursively remove association with any QPDF object. This | |
| 255 | + // method may only be called during final destruction. See | |
| 256 | + // comments in QPDF::~QPDF(). | |
| 257 | + if (!isIndirect()) { | |
| 258 | + this->obj->reset(); | |
| 259 | + } | |
| 260 | +} | |
| 261 | + | |
| 251 | 262 | qpdf_object_type_e |
| 252 | 263 | QPDFObjectHandle::getTypeCode() |
| 253 | 264 | { | ... | ... |
libqpdf/QPDFWriter.cc
| ... | ... | @@ -1198,6 +1198,14 @@ void |
| 1198 | 1198 | QPDFWriter::enqueueObject(QPDFObjectHandle object) |
| 1199 | 1199 | { |
| 1200 | 1200 | if (object.isIndirect()) { |
| 1201 | + // This owner check should really be done for all objects, not | |
| 1202 | + // just indirect objects. As of the time of the release of | |
| 1203 | + // qpdf 11, it is known that there are cases of direct objects | |
| 1204 | + // from other files getting copied into multiple QPDF objects. | |
| 1205 | + // This definitely happens in the page splitting code. If we | |
| 1206 | + // were to implement strong checks to prevent objects from | |
| 1207 | + // having multiple owners, once that was complete phased in, | |
| 1208 | + // this check could be moved outside the if statement. | |
| 1201 | 1209 | if (object.getOwningQPDF() != &(this->m->pdf)) { |
| 1202 | 1210 | QTC::TC("qpdf", "QPDFWriter foreign object"); |
| 1203 | 1211 | throw std::logic_error( | ... | ... |
libqpdf/QPDF_Array.cc
libqpdf/QPDF_Dictionary.cc
| ... | ... | @@ -21,6 +21,14 @@ QPDF_Dictionary::shallowCopy() |
| 21 | 21 | return create(items); |
| 22 | 22 | } |
| 23 | 23 | |
| 24 | +void | |
| 25 | +QPDF_Dictionary::reset() | |
| 26 | +{ | |
| 27 | + for (auto& iter: this->items) { | |
| 28 | + QPDFObjectHandle::Resetter::reset(iter.second); | |
| 29 | + } | |
| 30 | +} | |
| 31 | + | |
| 24 | 32 | std::string |
| 25 | 33 | QPDF_Dictionary::unparse() |
| 26 | 34 | { | ... | ... |
libqpdf/QPDF_Stream.cc
| ... | ... | @@ -168,6 +168,13 @@ QPDF_Stream::getFilterOnWrite() const |
| 168 | 168 | } |
| 169 | 169 | |
| 170 | 170 | void |
| 171 | +QPDF_Stream::reset() | |
| 172 | +{ | |
| 173 | + this->stream_provider = nullptr; | |
| 174 | + QPDFObjectHandle::Resetter::reset(this->stream_dict); | |
| 175 | +} | |
| 176 | + | |
| 177 | +void | |
| 171 | 178 | QPDF_Stream::setObjGen(QPDFObjGen const& og) |
| 172 | 179 | { |
| 173 | 180 | if (this->og.isIndirect()) { | ... | ... |
libqpdf/SparseOHArray.cc
| ... | ... | @@ -49,6 +49,14 @@ SparseOHArray::remove_last() |
| 49 | 49 | } |
| 50 | 50 | |
| 51 | 51 | void |
| 52 | +SparseOHArray::reset() | |
| 53 | +{ | |
| 54 | + for (auto& iter: this->elements) { | |
| 55 | + QPDFObjectHandle::Resetter::reset(iter.second); | |
| 56 | + } | |
| 57 | +} | |
| 58 | + | |
| 59 | +void | |
| 52 | 60 | SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh) |
| 53 | 61 | { |
| 54 | 62 | if (idx >= this->n_elements) { | ... | ... |
libqpdf/qpdf/QPDFValue.hh
libqpdf/qpdf/QPDFValueProxy.hh
| ... | ... | @@ -110,8 +110,21 @@ class QPDFValueProxy |
| 110 | 110 | value->og = og; |
| 111 | 111 | } |
| 112 | 112 | void |
| 113 | - resetObjGen() | |
| 114 | - { | |
| 113 | + reset() | |
| 114 | + { | |
| 115 | + value->reset(); | |
| 116 | + // It would be better if, rather than clearing value->qpdf and | |
| 117 | + // value->og, we completely replaced value with a null object. | |
| 118 | + // However, at the time of the release of qpdf 11, this causes | |
| 119 | + // test failures and would likely break a lot of code since it | |
| 120 | + // possible for a direct object that recursively contains no | |
| 121 | + // indirect objects to be copied into multiple QPDF objects. | |
| 122 | + // For that reason, we have to break the association with the | |
| 123 | + // owning QPDF but not otherwise mutate the object. For | |
| 124 | + // indirect objects, QPDF::~QPDF replaces the object with | |
| 125 | + // null, which clears circular references. If this code were | |
| 126 | + // able to do the null replacement, that code would not have | |
| 127 | + // to. | |
| 115 | 128 | value->qpdf = nullptr; |
| 116 | 129 | value->og = QPDFObjGen(); |
| 117 | 130 | } | ... | ... |
libqpdf/qpdf/QPDF_Array.hh
| ... | ... | @@ -17,6 +17,7 @@ class QPDF_Array: public QPDFValue |
| 17 | 17 | virtual std::shared_ptr<QPDFValueProxy> shallowCopy(); |
| 18 | 18 | virtual std::string unparse(); |
| 19 | 19 | virtual JSON getJSON(int json_version); |
| 20 | + virtual void reset(); | |
| 20 | 21 | |
| 21 | 22 | int getNItems() const; |
| 22 | 23 | QPDFObjectHandle getItem(int n) const; | ... | ... |
libqpdf/qpdf/QPDF_Dictionary.hh
| ... | ... | @@ -17,6 +17,7 @@ class QPDF_Dictionary: public QPDFValue |
| 17 | 17 | virtual std::shared_ptr<QPDFValueProxy> shallowCopy(); |
| 18 | 18 | virtual std::string unparse(); |
| 19 | 19 | virtual JSON getJSON(int json_version); |
| 20 | + virtual void reset(); | |
| 20 | 21 | |
| 21 | 22 | // hasKey() and getKeys() treat keys with null values as if they |
| 22 | 23 | // aren't there. getKey() returns null for the value of a | ... | ... |
libqpdf/qpdf/QPDF_Stream.hh
| ... | ... | @@ -27,6 +27,7 @@ class QPDF_Stream: public QPDFValue |
| 27 | 27 | virtual std::string unparse(); |
| 28 | 28 | virtual JSON getJSON(int json_version); |
| 29 | 29 | virtual void setDescription(QPDF*, std::string const&); |
| 30 | + virtual void reset(); | |
| 30 | 31 | QPDFObjectHandle getDict() const; |
| 31 | 32 | bool isDataModified() const; |
| 32 | 33 | void setFilterOnWrite(bool); | ... | ... |
libqpdf/qpdf/SparseOHArray.hh
| ... | ... | @@ -15,6 +15,7 @@ class SparseOHArray |
| 15 | 15 | void setAt(size_t idx, QPDFObjectHandle oh); |
| 16 | 16 | void erase(size_t idx); |
| 17 | 17 | void insert(size_t idx, QPDFObjectHandle oh); |
| 18 | + void reset(); | |
| 18 | 19 | |
| 19 | 20 | typedef std::unordered_map<size_t, QPDFObjectHandle>::const_iterator |
| 20 | 21 | const_iterator; | ... | ... |
qpdf/test_driver.cc
| ... | ... | @@ -3274,13 +3274,40 @@ test_92(QPDF& pdf, char const* arg2) |
| 3274 | 3274 | { |
| 3275 | 3275 | // Exercise indirect objects owned by destroyed QPDF object. |
| 3276 | 3276 | auto qpdf = QPDF::create(); |
| 3277 | - qpdf->emptyPDF(); | |
| 3277 | + qpdf->processFile("minimal.pdf"); | |
| 3278 | 3278 | auto root = qpdf->getRoot(); |
| 3279 | - assert(root.getOwningQPDF() != nullptr); | |
| 3279 | + assert(root.getOwningQPDF() == qpdf.get()); | |
| 3280 | 3280 | assert(root.isIndirect()); |
| 3281 | + assert(root.isDictionary()); | |
| 3282 | + auto page1 = root.getKey("/Pages").getKey("/Kids").getArrayItem(0); | |
| 3283 | + assert(page1.getOwningQPDF() == qpdf.get()); | |
| 3284 | + assert(page1.isIndirect()); | |
| 3285 | + assert(page1.isDictionary()); | |
| 3286 | + auto resources = page1.getKey("/Resources"); | |
| 3287 | + assert(resources.getOwningQPDF() == qpdf.get()); | |
| 3288 | + assert(resources.isDictionary()); | |
| 3289 | + assert(!resources.isIndirect()); | |
| 3290 | + auto contents = page1.getKey("/Contents"); | |
| 3291 | + auto contents_dict = contents.getDict(); | |
| 3281 | 3292 | qpdf = nullptr; |
| 3282 | - assert(root.getOwningQPDF() == nullptr); | |
| 3283 | - assert(!root.isIndirect()); | |
| 3293 | + auto check = [](QPDFObjectHandle& oh) { | |
| 3294 | + assert(oh.getOwningQPDF() == nullptr); | |
| 3295 | + assert(!oh.isIndirect()); | |
| 3296 | + }; | |
| 3297 | + // All objects should no longer have an owning QPDF or be indirect. | |
| 3298 | + check(root); | |
| 3299 | + check(page1); | |
| 3300 | + check(resources); | |
| 3301 | + check(contents); | |
| 3302 | + check(contents_dict); | |
| 3303 | + // Objects that were originally indirect should be null. | |
| 3304 | + // Otherwise, they should have retained their old values. See | |
| 3305 | + // comments in QPDFValueProxy::reset for why this is the case. | |
| 3306 | + assert(root.isNull()); | |
| 3307 | + assert(page1.isNull()); | |
| 3308 | + assert(contents.isNull()); | |
| 3309 | + assert(!resources.isNull()); | |
| 3310 | + assert(!contents_dict.isNull()); | |
| 3284 | 3311 | } |
| 3285 | 3312 | |
| 3286 | 3313 | static void | ... | ... |