Commit 264e25f391f83bcbeb60590f18ff96719b086454

Authored by Jay Berkenbilt
1 parent a6159858

Clear owning QPDF information for all objects, not just indirect

... ... @@ -811,3 +811,10 @@ Rejected Ideas
811 811 Note that arrays and dictionaries still need to contain
812 812 QPDFObjectHandle because of indirect objects. This only pertains to
813 813 direct objects, which are always "resolved" in QPDFObjectHandle.
  814 +
  815 + If this is addressed, read comments in QPDFWriter.cc::enqueueObject
  816 + near the call to getOwningQPDF, comments in QPDFValueProxy::reset,
  817 + and comments in QPDF::~QPDF() near the line that assigns to null.
  818 + This will also affect test 92 in test_driver.cc. All these
  819 + references were from the release of qpdf 11 (in case they have moved
  820 + by such time as this might be resurrected).
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -1534,6 +1534,23 @@ class QPDFObjectHandle
1534 1534 };
1535 1535 friend class ObjAccessor;
1536 1536  
  1537 + // Provide access to specific classes for recursive
  1538 + // reset().
  1539 + class Resetter
  1540 + {
  1541 + friend class QPDF_Dictionary;
  1542 + friend class QPDF_Stream;
  1543 + friend class SparseOHArray;
  1544 +
  1545 + private:
  1546 + static void
  1547 + reset(QPDFObjectHandle& o)
  1548 + {
  1549 + o.reset();
  1550 + }
  1551 + };
  1552 + friend class Resetter;
  1553 +
1537 1554 // Convenience routine: Throws if the assumption is violated. Your
1538 1555 // code will be better if you call one of the isType methods and
1539 1556 // handle the case of the type being wrong, but these can be
... ... @@ -1631,6 +1648,7 @@ class QPDFObjectHandle
1631 1648 bool first_level_only,
1632 1649 bool stop_at_streams);
1633 1650 void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only);
  1651 + void reset();
1634 1652 void setParsedOffset(qpdf_offset_t offset);
1635 1653 void parseContentStream_internal(
1636 1654 std::string const& description, ParserCallbacks* callbacks);
... ...
libqpdf/QPDF.cc
... ... @@ -247,19 +247,24 @@ QPDF::~QPDF()
247 247 // having an array or dictionary that contains an indirect
248 248 // reference to the other), the circular references in the
249 249 // std::shared_ptr objects will prevent the objects from being
250   - // deleted. Walk through all objects in the object cache, which
251   - // is those objects that we read from the file, and break all
252   - // resolved indirect references by replacing them with direct
253   - // null objects. At this point, obviously no one is still
254   - // using the QPDF object, but we'll explicitly clear the xref
255   - // table anyway just to prevent any possibility of resolve()
256   - // succeeding. Note that we can't break references like this at
257   - // any time when the QPDF object is active.
  250 + // deleted. Walk through all objects in the object cache, which is
  251 + // those objects that we read from the file, and break all
  252 + // resolved indirect references by replacing them with direct null
  253 + // objects. At this point, obviously no one is still using the
  254 + // QPDF object, but we'll explicitly clear the xref table anyway
  255 + // just to prevent any possibility of resolve() succeeding. Note
  256 + // that we can't break references like this at any time when the
  257 + // QPDF object is active. This also causes all QPDFObjectHandle
  258 + // objects that are reachable from this object to become nulls and
  259 + // release their association with this QPDF.
258 260 this->m->xref_table.clear();
259 261 auto null_obj = QPDF_Null::create();
260 262 for (auto const& iter: this->m->obj_cache) {
  263 + iter.second.object->reset();
  264 + // If the issue discussed in QPDFValueProxy::reset were
  265 + // resolved, then this assignment to null_obj could be
  266 + // removed.
261 267 iter.second.object->assign(null_obj);
262   - iter.second.object->resetObjGen();
263 268 }
264 269 }
265 270  
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -248,6 +248,17 @@ QPDFObjectHandle::operator!=(QPDFObjectHandle const& rhs) const
248 248 return this->obj != rhs.obj;
249 249 }
250 250  
  251 +void
  252 +QPDFObjectHandle::reset()
  253 +{
  254 + // Recursively remove association with any QPDF object. This
  255 + // method may only be called during final destruction. See
  256 + // comments in QPDF::~QPDF().
  257 + if (!isIndirect()) {
  258 + this->obj->reset();
  259 + }
  260 +}
  261 +
251 262 qpdf_object_type_e
252 263 QPDFObjectHandle::getTypeCode()
253 264 {
... ...
libqpdf/QPDFWriter.cc
... ... @@ -1198,6 +1198,14 @@ void
1198 1198 QPDFWriter::enqueueObject(QPDFObjectHandle object)
1199 1199 {
1200 1200 if (object.isIndirect()) {
  1201 + // This owner check should really be done for all objects, not
  1202 + // just indirect objects. As of the time of the release of
  1203 + // qpdf 11, it is known that there are cases of direct objects
  1204 + // from other files getting copied into multiple QPDF objects.
  1205 + // This definitely happens in the page splitting code. If we
  1206 + // were to implement strong checks to prevent objects from
  1207 + // having multiple owners, once that was complete phased in,
  1208 + // this check could be moved outside the if statement.
1201 1209 if (object.getOwningQPDF() != &(this->m->pdf)) {
1202 1210 QTC::TC("qpdf", "QPDFWriter foreign object");
1203 1211 throw std::logic_error(
... ...
libqpdf/QPDF_Array.cc
... ... @@ -34,6 +34,12 @@ QPDF_Array::shallowCopy()
34 34 return create(elements);
35 35 }
36 36  
  37 +void
  38 +QPDF_Array::reset()
  39 +{
  40 + elements.reset();
  41 +}
  42 +
37 43 std::string
38 44 QPDF_Array::unparse()
39 45 {
... ...
libqpdf/QPDF_Dictionary.cc
... ... @@ -21,6 +21,14 @@ QPDF_Dictionary::shallowCopy()
21 21 return create(items);
22 22 }
23 23  
  24 +void
  25 +QPDF_Dictionary::reset()
  26 +{
  27 + for (auto& iter: this->items) {
  28 + QPDFObjectHandle::Resetter::reset(iter.second);
  29 + }
  30 +}
  31 +
24 32 std::string
25 33 QPDF_Dictionary::unparse()
26 34 {
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -168,6 +168,13 @@ QPDF_Stream::getFilterOnWrite() const
168 168 }
169 169  
170 170 void
  171 +QPDF_Stream::reset()
  172 +{
  173 + this->stream_provider = nullptr;
  174 + QPDFObjectHandle::Resetter::reset(this->stream_dict);
  175 +}
  176 +
  177 +void
171 178 QPDF_Stream::setObjGen(QPDFObjGen const& og)
172 179 {
173 180 if (this->og.isIndirect()) {
... ...
libqpdf/SparseOHArray.cc
... ... @@ -49,6 +49,14 @@ SparseOHArray::remove_last()
49 49 }
50 50  
51 51 void
  52 +SparseOHArray::reset()
  53 +{
  54 + for (auto& iter: this->elements) {
  55 + QPDFObjectHandle::Resetter::reset(iter.second);
  56 + }
  57 +}
  58 +
  59 +void
52 60 SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh)
53 61 {
54 62 if (idx >= this->n_elements) {
... ...
libqpdf/qpdf/QPDFValue.hh
... ... @@ -63,6 +63,10 @@ class QPDFValue
63 63 {
64 64 return og;
65 65 }
  66 + virtual void
  67 + reset()
  68 + {
  69 + }
66 70  
67 71 protected:
68 72 QPDFValue() :
... ...
libqpdf/qpdf/QPDFValueProxy.hh
... ... @@ -110,8 +110,21 @@ class QPDFValueProxy
110 110 value->og = og;
111 111 }
112 112 void
113   - resetObjGen()
114   - {
  113 + reset()
  114 + {
  115 + value->reset();
  116 + // It would be better if, rather than clearing value->qpdf and
  117 + // value->og, we completely replaced value with a null object.
  118 + // However, at the time of the release of qpdf 11, this causes
  119 + // test failures and would likely break a lot of code since it
  120 + // possible for a direct object that recursively contains no
  121 + // indirect objects to be copied into multiple QPDF objects.
  122 + // For that reason, we have to break the association with the
  123 + // owning QPDF but not otherwise mutate the object. For
  124 + // indirect objects, QPDF::~QPDF replaces the object with
  125 + // null, which clears circular references. If this code were
  126 + // able to do the null replacement, that code would not have
  127 + // to.
115 128 value->qpdf = nullptr;
116 129 value->og = QPDFObjGen();
117 130 }
... ...
libqpdf/qpdf/QPDF_Array.hh
... ... @@ -17,6 +17,7 @@ class QPDF_Array: public QPDFValue
17 17 virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
18 18 virtual std::string unparse();
19 19 virtual JSON getJSON(int json_version);
  20 + virtual void reset();
20 21  
21 22 int getNItems() const;
22 23 QPDFObjectHandle getItem(int n) const;
... ...
libqpdf/qpdf/QPDF_Dictionary.hh
... ... @@ -17,6 +17,7 @@ class QPDF_Dictionary: public QPDFValue
17 17 virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
18 18 virtual std::string unparse();
19 19 virtual JSON getJSON(int json_version);
  20 + virtual void reset();
20 21  
21 22 // hasKey() and getKeys() treat keys with null values as if they
22 23 // aren't there. getKey() returns null for the value of a
... ...
libqpdf/qpdf/QPDF_Stream.hh
... ... @@ -27,6 +27,7 @@ class QPDF_Stream: public QPDFValue
27 27 virtual std::string unparse();
28 28 virtual JSON getJSON(int json_version);
29 29 virtual void setDescription(QPDF*, std::string const&);
  30 + virtual void reset();
30 31 QPDFObjectHandle getDict() const;
31 32 bool isDataModified() const;
32 33 void setFilterOnWrite(bool);
... ...
libqpdf/qpdf/SparseOHArray.hh
... ... @@ -15,6 +15,7 @@ class SparseOHArray
15 15 void setAt(size_t idx, QPDFObjectHandle oh);
16 16 void erase(size_t idx);
17 17 void insert(size_t idx, QPDFObjectHandle oh);
  18 + void reset();
18 19  
19 20 typedef std::unordered_map<size_t, QPDFObjectHandle>::const_iterator
20 21 const_iterator;
... ...
qpdf/test_driver.cc
... ... @@ -3274,13 +3274,40 @@ test_92(QPDF&amp; pdf, char const* arg2)
3274 3274 {
3275 3275 // Exercise indirect objects owned by destroyed QPDF object.
3276 3276 auto qpdf = QPDF::create();
3277   - qpdf->emptyPDF();
  3277 + qpdf->processFile("minimal.pdf");
3278 3278 auto root = qpdf->getRoot();
3279   - assert(root.getOwningQPDF() != nullptr);
  3279 + assert(root.getOwningQPDF() == qpdf.get());
3280 3280 assert(root.isIndirect());
  3281 + assert(root.isDictionary());
  3282 + auto page1 = root.getKey("/Pages").getKey("/Kids").getArrayItem(0);
  3283 + assert(page1.getOwningQPDF() == qpdf.get());
  3284 + assert(page1.isIndirect());
  3285 + assert(page1.isDictionary());
  3286 + auto resources = page1.getKey("/Resources");
  3287 + assert(resources.getOwningQPDF() == qpdf.get());
  3288 + assert(resources.isDictionary());
  3289 + assert(!resources.isIndirect());
  3290 + auto contents = page1.getKey("/Contents");
  3291 + auto contents_dict = contents.getDict();
3281 3292 qpdf = nullptr;
3282   - assert(root.getOwningQPDF() == nullptr);
3283   - assert(!root.isIndirect());
  3293 + auto check = [](QPDFObjectHandle& oh) {
  3294 + assert(oh.getOwningQPDF() == nullptr);
  3295 + assert(!oh.isIndirect());
  3296 + };
  3297 + // All objects should no longer have an owning QPDF or be indirect.
  3298 + check(root);
  3299 + check(page1);
  3300 + check(resources);
  3301 + check(contents);
  3302 + check(contents_dict);
  3303 + // Objects that were originally indirect should be null.
  3304 + // Otherwise, they should have retained their old values. See
  3305 + // comments in QPDFValueProxy::reset for why this is the case.
  3306 + assert(root.isNull());
  3307 + assert(page1.isNull());
  3308 + assert(contents.isNull());
  3309 + assert(!resources.isNull());
  3310 + assert(!contents_dict.isNull());
3284 3311 }
3285 3312  
3286 3313 static void
... ...