Commit 264e25f391f83bcbeb60590f18ff96719b086454

Authored by Jay Berkenbilt
1 parent a6159858

Clear owning QPDF information for all objects, not just indirect

@@ -811,3 +811,10 @@ Rejected Ideas @@ -811,3 +811,10 @@ Rejected Ideas
811 Note that arrays and dictionaries still need to contain 811 Note that arrays and dictionaries still need to contain
812 QPDFObjectHandle because of indirect objects. This only pertains to 812 QPDFObjectHandle because of indirect objects. This only pertains to
813 direct objects, which are always "resolved" in QPDFObjectHandle. 813 direct objects, which are always "resolved" in QPDFObjectHandle.
  814 +
  815 + If this is addressed, read comments in QPDFWriter.cc::enqueueObject
  816 + near the call to getOwningQPDF, comments in QPDFValueProxy::reset,
  817 + and comments in QPDF::~QPDF() near the line that assigns to null.
  818 + This will also affect test 92 in test_driver.cc. All these
  819 + references were from the release of qpdf 11 (in case they have moved
  820 + by such time as this might be resurrected).
include/qpdf/QPDFObjectHandle.hh
@@ -1534,6 +1534,23 @@ class QPDFObjectHandle @@ -1534,6 +1534,23 @@ class QPDFObjectHandle
1534 }; 1534 };
1535 friend class ObjAccessor; 1535 friend class ObjAccessor;
1536 1536
  1537 + // Provide access to specific classes for recursive
  1538 + // reset().
  1539 + class Resetter
  1540 + {
  1541 + friend class QPDF_Dictionary;
  1542 + friend class QPDF_Stream;
  1543 + friend class SparseOHArray;
  1544 +
  1545 + private:
  1546 + static void
  1547 + reset(QPDFObjectHandle& o)
  1548 + {
  1549 + o.reset();
  1550 + }
  1551 + };
  1552 + friend class Resetter;
  1553 +
1537 // Convenience routine: Throws if the assumption is violated. Your 1554 // Convenience routine: Throws if the assumption is violated. Your
1538 // code will be better if you call one of the isType methods and 1555 // code will be better if you call one of the isType methods and
1539 // handle the case of the type being wrong, but these can be 1556 // handle the case of the type being wrong, but these can be
@@ -1631,6 +1648,7 @@ class QPDFObjectHandle @@ -1631,6 +1648,7 @@ class QPDFObjectHandle
1631 bool first_level_only, 1648 bool first_level_only,
1632 bool stop_at_streams); 1649 bool stop_at_streams);
1633 void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only); 1650 void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only);
  1651 + void reset();
1634 void setParsedOffset(qpdf_offset_t offset); 1652 void setParsedOffset(qpdf_offset_t offset);
1635 void parseContentStream_internal( 1653 void parseContentStream_internal(
1636 std::string const& description, ParserCallbacks* callbacks); 1654 std::string const& description, ParserCallbacks* callbacks);
libqpdf/QPDF.cc
@@ -247,19 +247,24 @@ QPDF::~QPDF() @@ -247,19 +247,24 @@ QPDF::~QPDF()
247 // having an array or dictionary that contains an indirect 247 // having an array or dictionary that contains an indirect
248 // reference to the other), the circular references in the 248 // reference to the other), the circular references in the
249 // std::shared_ptr objects will prevent the objects from being 249 // std::shared_ptr objects will prevent the objects from being
250 - // deleted. Walk through all objects in the object cache, which  
251 - // is those objects that we read from the file, and break all  
252 - // resolved indirect references by replacing them with direct  
253 - // null objects. At this point, obviously no one is still  
254 - // using the QPDF object, but we'll explicitly clear the xref  
255 - // table anyway just to prevent any possibility of resolve()  
256 - // succeeding. Note that we can't break references like this at  
257 - // any time when the QPDF object is active. 250 + // deleted. Walk through all objects in the object cache, which is
  251 + // those objects that we read from the file, and break all
  252 + // resolved indirect references by replacing them with direct null
  253 + // objects. At this point, obviously no one is still using the
  254 + // QPDF object, but we'll explicitly clear the xref table anyway
  255 + // just to prevent any possibility of resolve() succeeding. Note
  256 + // that we can't break references like this at any time when the
  257 + // QPDF object is active. This also causes all QPDFObjectHandle
  258 + // objects that are reachable from this object to become nulls and
  259 + // release their association with this QPDF.
258 this->m->xref_table.clear(); 260 this->m->xref_table.clear();
259 auto null_obj = QPDF_Null::create(); 261 auto null_obj = QPDF_Null::create();
260 for (auto const& iter: this->m->obj_cache) { 262 for (auto const& iter: this->m->obj_cache) {
  263 + iter.second.object->reset();
  264 + // If the issue discussed in QPDFValueProxy::reset were
  265 + // resolved, then this assignment to null_obj could be
  266 + // removed.
261 iter.second.object->assign(null_obj); 267 iter.second.object->assign(null_obj);
262 - iter.second.object->resetObjGen();  
263 } 268 }
264 } 269 }
265 270
libqpdf/QPDFObjectHandle.cc
@@ -248,6 +248,17 @@ QPDFObjectHandle::operator!=(QPDFObjectHandle const& rhs) const @@ -248,6 +248,17 @@ QPDFObjectHandle::operator!=(QPDFObjectHandle const& rhs) const
248 return this->obj != rhs.obj; 248 return this->obj != rhs.obj;
249 } 249 }
250 250
  251 +void
  252 +QPDFObjectHandle::reset()
  253 +{
  254 + // Recursively remove association with any QPDF object. This
  255 + // method may only be called during final destruction. See
  256 + // comments in QPDF::~QPDF().
  257 + if (!isIndirect()) {
  258 + this->obj->reset();
  259 + }
  260 +}
  261 +
251 qpdf_object_type_e 262 qpdf_object_type_e
252 QPDFObjectHandle::getTypeCode() 263 QPDFObjectHandle::getTypeCode()
253 { 264 {
libqpdf/QPDFWriter.cc
@@ -1198,6 +1198,14 @@ void @@ -1198,6 +1198,14 @@ void
1198 QPDFWriter::enqueueObject(QPDFObjectHandle object) 1198 QPDFWriter::enqueueObject(QPDFObjectHandle object)
1199 { 1199 {
1200 if (object.isIndirect()) { 1200 if (object.isIndirect()) {
  1201 + // This owner check should really be done for all objects, not
  1202 + // just indirect objects. As of the time of the release of
  1203 + // qpdf 11, it is known that there are cases of direct objects
  1204 + // from other files getting copied into multiple QPDF objects.
  1205 + // This definitely happens in the page splitting code. If we
  1206 + // were to implement strong checks to prevent objects from
  1207 + // having multiple owners, once that was complete phased in,
  1208 + // this check could be moved outside the if statement.
1201 if (object.getOwningQPDF() != &(this->m->pdf)) { 1209 if (object.getOwningQPDF() != &(this->m->pdf)) {
1202 QTC::TC("qpdf", "QPDFWriter foreign object"); 1210 QTC::TC("qpdf", "QPDFWriter foreign object");
1203 throw std::logic_error( 1211 throw std::logic_error(
libqpdf/QPDF_Array.cc
@@ -34,6 +34,12 @@ QPDF_Array::shallowCopy() @@ -34,6 +34,12 @@ QPDF_Array::shallowCopy()
34 return create(elements); 34 return create(elements);
35 } 35 }
36 36
  37 +void
  38 +QPDF_Array::reset()
  39 +{
  40 + elements.reset();
  41 +}
  42 +
37 std::string 43 std::string
38 QPDF_Array::unparse() 44 QPDF_Array::unparse()
39 { 45 {
libqpdf/QPDF_Dictionary.cc
@@ -21,6 +21,14 @@ QPDF_Dictionary::shallowCopy() @@ -21,6 +21,14 @@ QPDF_Dictionary::shallowCopy()
21 return create(items); 21 return create(items);
22 } 22 }
23 23
  24 +void
  25 +QPDF_Dictionary::reset()
  26 +{
  27 + for (auto& iter: this->items) {
  28 + QPDFObjectHandle::Resetter::reset(iter.second);
  29 + }
  30 +}
  31 +
24 std::string 32 std::string
25 QPDF_Dictionary::unparse() 33 QPDF_Dictionary::unparse()
26 { 34 {
libqpdf/QPDF_Stream.cc
@@ -168,6 +168,13 @@ QPDF_Stream::getFilterOnWrite() const @@ -168,6 +168,13 @@ QPDF_Stream::getFilterOnWrite() const
168 } 168 }
169 169
170 void 170 void
  171 +QPDF_Stream::reset()
  172 +{
  173 + this->stream_provider = nullptr;
  174 + QPDFObjectHandle::Resetter::reset(this->stream_dict);
  175 +}
  176 +
  177 +void
171 QPDF_Stream::setObjGen(QPDFObjGen const& og) 178 QPDF_Stream::setObjGen(QPDFObjGen const& og)
172 { 179 {
173 if (this->og.isIndirect()) { 180 if (this->og.isIndirect()) {
libqpdf/SparseOHArray.cc
@@ -49,6 +49,14 @@ SparseOHArray::remove_last() @@ -49,6 +49,14 @@ SparseOHArray::remove_last()
49 } 49 }
50 50
51 void 51 void
  52 +SparseOHArray::reset()
  53 +{
  54 + for (auto& iter: this->elements) {
  55 + QPDFObjectHandle::Resetter::reset(iter.second);
  56 + }
  57 +}
  58 +
  59 +void
52 SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh) 60 SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh)
53 { 61 {
54 if (idx >= this->n_elements) { 62 if (idx >= this->n_elements) {
libqpdf/qpdf/QPDFValue.hh
@@ -63,6 +63,10 @@ class QPDFValue @@ -63,6 +63,10 @@ class QPDFValue
63 { 63 {
64 return og; 64 return og;
65 } 65 }
  66 + virtual void
  67 + reset()
  68 + {
  69 + }
66 70
67 protected: 71 protected:
68 QPDFValue() : 72 QPDFValue() :
libqpdf/qpdf/QPDFValueProxy.hh
@@ -110,8 +110,21 @@ class QPDFValueProxy @@ -110,8 +110,21 @@ class QPDFValueProxy
110 value->og = og; 110 value->og = og;
111 } 111 }
112 void 112 void
113 - resetObjGen()  
114 - { 113 + reset()
  114 + {
  115 + value->reset();
  116 + // It would be better if, rather than clearing value->qpdf and
  117 + // value->og, we completely replaced value with a null object.
  118 + // However, at the time of the release of qpdf 11, this causes
  119 + // test failures and would likely break a lot of code since it
  120 + // possible for a direct object that recursively contains no
  121 + // indirect objects to be copied into multiple QPDF objects.
  122 + // For that reason, we have to break the association with the
  123 + // owning QPDF but not otherwise mutate the object. For
  124 + // indirect objects, QPDF::~QPDF replaces the object with
  125 + // null, which clears circular references. If this code were
  126 + // able to do the null replacement, that code would not have
  127 + // to.
115 value->qpdf = nullptr; 128 value->qpdf = nullptr;
116 value->og = QPDFObjGen(); 129 value->og = QPDFObjGen();
117 } 130 }
libqpdf/qpdf/QPDF_Array.hh
@@ -17,6 +17,7 @@ class QPDF_Array: public QPDFValue @@ -17,6 +17,7 @@ class QPDF_Array: public QPDFValue
17 virtual std::shared_ptr<QPDFValueProxy> shallowCopy(); 17 virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
18 virtual std::string unparse(); 18 virtual std::string unparse();
19 virtual JSON getJSON(int json_version); 19 virtual JSON getJSON(int json_version);
  20 + virtual void reset();
20 21
21 int getNItems() const; 22 int getNItems() const;
22 QPDFObjectHandle getItem(int n) const; 23 QPDFObjectHandle getItem(int n) const;
libqpdf/qpdf/QPDF_Dictionary.hh
@@ -17,6 +17,7 @@ class QPDF_Dictionary: public QPDFValue @@ -17,6 +17,7 @@ class QPDF_Dictionary: public QPDFValue
17 virtual std::shared_ptr<QPDFValueProxy> shallowCopy(); 17 virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
18 virtual std::string unparse(); 18 virtual std::string unparse();
19 virtual JSON getJSON(int json_version); 19 virtual JSON getJSON(int json_version);
  20 + virtual void reset();
20 21
21 // hasKey() and getKeys() treat keys with null values as if they 22 // hasKey() and getKeys() treat keys with null values as if they
22 // aren't there. getKey() returns null for the value of a 23 // aren't there. getKey() returns null for the value of a
libqpdf/qpdf/QPDF_Stream.hh
@@ -27,6 +27,7 @@ class QPDF_Stream: public QPDFValue @@ -27,6 +27,7 @@ class QPDF_Stream: public QPDFValue
27 virtual std::string unparse(); 27 virtual std::string unparse();
28 virtual JSON getJSON(int json_version); 28 virtual JSON getJSON(int json_version);
29 virtual void setDescription(QPDF*, std::string const&); 29 virtual void setDescription(QPDF*, std::string const&);
  30 + virtual void reset();
30 QPDFObjectHandle getDict() const; 31 QPDFObjectHandle getDict() const;
31 bool isDataModified() const; 32 bool isDataModified() const;
32 void setFilterOnWrite(bool); 33 void setFilterOnWrite(bool);
libqpdf/qpdf/SparseOHArray.hh
@@ -15,6 +15,7 @@ class SparseOHArray @@ -15,6 +15,7 @@ class SparseOHArray
15 void setAt(size_t idx, QPDFObjectHandle oh); 15 void setAt(size_t idx, QPDFObjectHandle oh);
16 void erase(size_t idx); 16 void erase(size_t idx);
17 void insert(size_t idx, QPDFObjectHandle oh); 17 void insert(size_t idx, QPDFObjectHandle oh);
  18 + void reset();
18 19
19 typedef std::unordered_map<size_t, QPDFObjectHandle>::const_iterator 20 typedef std::unordered_map<size_t, QPDFObjectHandle>::const_iterator
20 const_iterator; 21 const_iterator;
qpdf/test_driver.cc
@@ -3274,13 +3274,40 @@ test_92(QPDF&amp; pdf, char const* arg2) @@ -3274,13 +3274,40 @@ test_92(QPDF&amp; pdf, char const* arg2)
3274 { 3274 {
3275 // Exercise indirect objects owned by destroyed QPDF object. 3275 // Exercise indirect objects owned by destroyed QPDF object.
3276 auto qpdf = QPDF::create(); 3276 auto qpdf = QPDF::create();
3277 - qpdf->emptyPDF(); 3277 + qpdf->processFile("minimal.pdf");
3278 auto root = qpdf->getRoot(); 3278 auto root = qpdf->getRoot();
3279 - assert(root.getOwningQPDF() != nullptr); 3279 + assert(root.getOwningQPDF() == qpdf.get());
3280 assert(root.isIndirect()); 3280 assert(root.isIndirect());
  3281 + assert(root.isDictionary());
  3282 + auto page1 = root.getKey("/Pages").getKey("/Kids").getArrayItem(0);
  3283 + assert(page1.getOwningQPDF() == qpdf.get());
  3284 + assert(page1.isIndirect());
  3285 + assert(page1.isDictionary());
  3286 + auto resources = page1.getKey("/Resources");
  3287 + assert(resources.getOwningQPDF() == qpdf.get());
  3288 + assert(resources.isDictionary());
  3289 + assert(!resources.isIndirect());
  3290 + auto contents = page1.getKey("/Contents");
  3291 + auto contents_dict = contents.getDict();
3281 qpdf = nullptr; 3292 qpdf = nullptr;
3282 - assert(root.getOwningQPDF() == nullptr);  
3283 - assert(!root.isIndirect()); 3293 + auto check = [](QPDFObjectHandle& oh) {
  3294 + assert(oh.getOwningQPDF() == nullptr);
  3295 + assert(!oh.isIndirect());
  3296 + };
  3297 + // All objects should no longer have an owning QPDF or be indirect.
  3298 + check(root);
  3299 + check(page1);
  3300 + check(resources);
  3301 + check(contents);
  3302 + check(contents_dict);
  3303 + // Objects that were originally indirect should be null.
  3304 + // Otherwise, they should have retained their old values. See
  3305 + // comments in QPDFValueProxy::reset for why this is the case.
  3306 + assert(root.isNull());
  3307 + assert(page1.isNull());
  3308 + assert(contents.isNull());
  3309 + assert(!resources.isNull());
  3310 + assert(!contents_dict.isNull());
3284 } 3311 }
3285 3312
3286 static void 3313 static void