Commit dba61da1bfb7e4d74c723f369d1c017df9707a14

Authored by Jay Berkenbilt
1 parent 264e25f3

Create a special "destroyed" type rather than using null

When a QPDF is destroyed, changing indirect objects to direct nulls
makes them effectively disappear silently when they sneak into other
places. Instead, we should treat this as an error. Adding a destroyed
object type makes this possible.
ChangeLog
  1 +2022-09-08 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add QPDFObjectHandle::isDestroyed() to test whether an indirect
  4 + object was from a QPDF that has been destroyed.
  5 +
1 6 2022-09-07 Jay Berkenbilt <ejb@ql.org>
2 7  
3 8 * Add QPDFObjectHandle::getQPDF(), which returns a reference, as
... ...
... ... @@ -812,9 +812,11 @@ Rejected Ideas
812 812 QPDFObjectHandle because of indirect objects. This only pertains to
813 813 direct objects, which are always "resolved" in QPDFObjectHandle.
814 814  
815   - If this is addressed, read comments in QPDFWriter.cc::enqueueObject
816   - near the call to getOwningQPDF, comments in QPDFValueProxy::reset,
817   - and comments in QPDF::~QPDF() near the line that assigns to null.
818   - This will also affect test 92 in test_driver.cc. All these
819   - references were from the release of qpdf 11 (in case they have moved
820   - by such time as this might be resurrected).
  815 + If this is addressed, read comments in the following places:
  816 + * QPDFWriter.cc::enqueueObject near the call to getOwningQPDF
  817 + * QPDFValueProxy::reset and QPDFValueProxy::destroy
  818 + * QPDF::~QPDF()
  819 + * test 92 in test_driver.cc
  820 + * QPDFObjectHandle.hh near isDestroyed
  821 + All these references were from the release of qpdf 11 (in case they
  822 + have moved by such time as this might be resurrected).
... ...
include/qpdf/Constants.h
... ... @@ -123,6 +123,7 @@ enum qpdf_object_type_e {
123 123 ot_inlineimage,
124 124 /* Object types internal to qpdf */
125 125 ot_unresolved,
  126 + ot_destroyed,
126 127 };
127 128  
128 129 /* Write Parameters. See QPDFWriter.hh for details. */
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -391,6 +391,11 @@ class QPDFObjectHandle
391 391 QPDF_DLL
392 392 inline bool isIndirect() const;
393 393  
  394 + // This returns true for indirect objects from a QPDF that has
  395 + // been destroyed.
  396 + QPDF_DLL
  397 + bool isDestroyed();
  398 +
394 399 // True for everything except array, dictionary, stream, word, and
395 400 // inline image.
396 401 QPDF_DLL
... ...
libqpdf/CMakeLists.txt
... ... @@ -90,6 +90,7 @@ set(libqpdf_SOURCES
90 90 QPDFXRefEntry.cc
91 91 QPDF_Array.cc
92 92 QPDF_Bool.cc
  93 + QPDF_Destroyed.cc
93 94 QPDF_Dictionary.cc
94 95 QPDF_InlineImage.cc
95 96 QPDF_Integer.cc
... ...
libqpdf/QPDF.cc
... ... @@ -249,22 +249,23 @@ QPDF::~QPDF()
249 249 // std::shared_ptr objects will prevent the objects from being
250 250 // deleted. Walk through all objects in the object cache, which is
251 251 // those objects that we read from the file, and break all
252   - // resolved indirect references by replacing them with direct null
253   - // objects. At this point, obviously no one is still using the
254   - // QPDF object, but we'll explicitly clear the xref table anyway
255   - // just to prevent any possibility of resolve() succeeding. Note
  252 + // resolved indirect references by replacing them with an internal
  253 + // object type representing that they have been destroyed. Note
256 254 // that we can't break references like this at any time when the
257   - // QPDF object is active. This also causes all QPDFObjectHandle
258   - // objects that are reachable from this object to become nulls and
  255 + // QPDF object is active. The call to reset also causes all
  256 + // QPDFObjectHandle objects that are reachable from this object to
259 257 // release their association with this QPDF.
  258 +
  259 + // At this point, obviously no one is still using the QPDF object,
  260 + // but we'll explicitly clear the xref table anyway just to
  261 + // prevent any possibility of resolve() succeeding.
260 262 this->m->xref_table.clear();
261 263 auto null_obj = QPDF_Null::create();
262 264 for (auto const& iter: this->m->obj_cache) {
263 265 iter.second.object->reset();
264   - // If the issue discussed in QPDFValueProxy::reset were
265   - // resolved, then this assignment to null_obj could be
266   - // removed.
267   - iter.second.object->assign(null_obj);
  266 + // It would be better if reset() could call destroy(), but it
  267 + // can't -- see comments in QPDFValueProxy::reset().
  268 + iter.second.object->destroy();
268 269 }
269 270 }
270 271  
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -252,8 +252,10 @@ void
252 252 QPDFObjectHandle::reset()
253 253 {
254 254 // Recursively remove association with any QPDF object. This
255   - // method may only be called during final destruction. See
256   - // comments in QPDF::~QPDF().
  255 + // method may only be called during final destruction.
  256 + // QPDF::~QPDF() calls it for indirect objects using the object
  257 + // pointer itself, so we don't do that here. Other objects call it
  258 + // through this method.
257 259 if (!isIndirect()) {
258 260 this->obj->reset();
259 261 }
... ... @@ -352,6 +354,12 @@ QPDFObjectHandle::asString()
352 354 }
353 355  
354 356 bool
  357 +QPDFObjectHandle::isDestroyed()
  358 +{
  359 + return dereference() && (obj->getTypeCode() == ::ot_destroyed);
  360 +}
  361 +
  362 +bool
355 363 QPDFObjectHandle::isBool()
356 364 {
357 365 return dereference() && (obj->getTypeCode() == ::ot_boolean);
... ...
libqpdf/QPDFValueProxy.cc
1 1 #include <qpdf/QPDFValueProxy.hh>
2 2  
3 3 #include <qpdf/QPDF.hh>
  4 +#include <qpdf/QPDF_Destroyed.hh>
4 5  
5 6 void
6 7 QPDFValueProxy::doResolve()
... ... @@ -8,3 +9,10 @@ QPDFValueProxy::doResolve()
8 9 auto og = value->og;
9 10 QPDF::Resolver::resolve(value->qpdf, og);
10 11 }
  12 +
  13 +void
  14 +QPDFValueProxy::destroy()
  15 +{
  16 + // See comments in reset() for why this isn't part of reset.
  17 + value = QPDF_Destroyed::getInstance();
  18 +}
... ...
libqpdf/QPDF_Destroyed.cc 0 โ†’ 100644
  1 +#include <qpdf/QPDF_Destroyed.hh>
  2 +
  3 +#include <stdexcept>
  4 +
  5 +QPDF_Destroyed::QPDF_Destroyed() :
  6 + QPDFValue(::ot_destroyed, "destroyed")
  7 +{
  8 +}
  9 +
  10 +std::shared_ptr<QPDFValue>
  11 +QPDF_Destroyed::getInstance()
  12 +{
  13 + static std::shared_ptr<QPDFValue> instance(new QPDF_Destroyed());
  14 + return instance;
  15 +}
  16 +
  17 +std::shared_ptr<QPDFValueProxy>
  18 +QPDF_Destroyed::shallowCopy()
  19 +{
  20 + throw std::logic_error(
  21 + "attempted to shallow copy QPDFObjectHandle from destroyed QPDF");
  22 + return nullptr;
  23 +}
  24 +
  25 +std::string
  26 +QPDF_Destroyed::unparse()
  27 +{
  28 + throw std::logic_error(
  29 + "attempted to unparse a QPDFObjectHandle from a destroyed QPDF");
  30 + return "";
  31 +}
  32 +
  33 +JSON
  34 +QPDF_Destroyed::getJSON(int json_version)
  35 +{
  36 + throw std::logic_error(
  37 + "attempted to get JSON from a QPDFObjectHandle from a destroyed QPDF");
  38 + return JSON::makeNull();
  39 +}
... ...
libqpdf/QPDF_Reserved.cc
... ... @@ -31,6 +31,6 @@ JSON
31 31 QPDF_Reserved::getJSON(int json_version)
32 32 {
33 33 throw std::logic_error(
34   - "QPDFObjectHandle: attempting to unparse a reserved object");
  34 + "QPDFObjectHandle: attempting to get JSON from a reserved object");
35 35 return JSON::makeNull();
36 36 }
... ...
libqpdf/QPDF_Unresolved.cc
... ... @@ -17,7 +17,7 @@ std::shared_ptr&lt;QPDFValueProxy&gt;
17 17 QPDF_Unresolved::shallowCopy()
18 18 {
19 19 throw std::logic_error(
20   - "attempted to shallow copy unresolved QPDFObjectHandle");
  20 + "attempted to shallow copy an unresolved QPDFObjectHandle");
21 21 return nullptr;
22 22 }
23 23  
... ... @@ -32,5 +32,7 @@ QPDF_Unresolved::unparse()
32 32 JSON
33 33 QPDF_Unresolved::getJSON(int json_version)
34 34 {
  35 + throw std::logic_error(
  36 + "attempted to get JSON from an unresolved QPDFObjectHandle");
35 37 return JSON::makeNull();
36 38 }
... ...
libqpdf/qpdf/QPDFValueProxy.hh
... ... @@ -114,21 +114,23 @@ class QPDFValueProxy
114 114 {
115 115 value->reset();
116 116 // It would be better if, rather than clearing value->qpdf and
117   - // value->og, we completely replaced value with a null object.
118   - // However, at the time of the release of qpdf 11, this causes
119   - // test failures and would likely break a lot of code since it
120   - // possible for a direct object that recursively contains no
121   - // indirect objects to be copied into multiple QPDF objects.
122   - // For that reason, we have to break the association with the
123   - // owning QPDF but not otherwise mutate the object. For
124   - // indirect objects, QPDF::~QPDF replaces the object with
125   - // null, which clears circular references. If this code were
126   - // able to do the null replacement, that code would not have
127   - // to.
  117 + // value->og, we completely replaced value with
  118 + // QPDF_Destroyed. However, at the time of the release of qpdf
  119 + // 11, this causes test failures and would likely break a lot
  120 + // of code since it possible for a direct object that
  121 + // recursively contains no indirect objects to be copied into
  122 + // multiple QPDF objects. For that reason, we have to break
  123 + // the association with the owning QPDF but not otherwise
  124 + // mutate the object. For indirect objects, QPDF::~QPDF
  125 + // replaces indirect objects with QPDF_Destroyed, which clears
  126 + // circular references. If this code were able to do that,
  127 + // that code would not have to.
128 128 value->qpdf = nullptr;
129 129 value->og = QPDFObjGen();
130 130 }
131 131  
  132 + void destroy();
  133 +
132 134 bool
133 135 isUnresolved() const
134 136 {
... ...
libqpdf/qpdf/QPDF_Destroyed.hh 0 โ†’ 100644
  1 +#ifndef QPDF_DESTROYED_HH
  2 +#define QPDF_DESTROYED_HH
  3 +
  4 +#include <qpdf/QPDFValue.hh>
  5 +
  6 +class QPDF_Destroyed: public QPDFValue
  7 +{
  8 + public:
  9 + virtual ~QPDF_Destroyed() = default;
  10 + virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
  11 + virtual std::string unparse();
  12 + virtual JSON getJSON(int json_version);
  13 + static std::shared_ptr<QPDFValue> getInstance();
  14 +
  15 + private:
  16 + QPDF_Destroyed();
  17 +};
  18 +
  19 +#endif // QPDF_DESTROYED_HH
... ...
manual/release-notes.rst
... ... @@ -202,11 +202,13 @@ For a detailed list of changes, please see the file
202 202 ``replaceKeyAndGetOld``, a ``null`` object if the object was not
203 203 previously there.
204 204  
205   - - The method ``QPDFObjectHandle::getOwningQPDF`` now returns a
206   - null pointer (rather than an invalid pointer) if the owning
207   - ``QPDF`` object has been destroyed. This situation should
208   - generally not happen for correct code, but at least the
209   - situation is detectible now.
  205 + - It is now possible to detect when a ``QPDFObjectHandle`` is an
  206 + indirect object that belongs to a ``QPDF`` that has been
  207 + destroyed. Any attempt to unparse this type of
  208 + ``QPDFObjectHandle`` will throw a logic error. You can detect
  209 + this by calling the new ``QPDFObjectHandle::isDestroyed``
  210 + method. Also the ``QPDFObjectHandle::getOwningQPDF`` method now
  211 + returns a null pointer rather than an invalid pointer.
210 212  
211 213 - The method ``QPDFObjectHandle::getQPDF`` returns a ``QPDF&``
212 214 (rather than a ``QPDF*``) and is an alternative to
... ...
qpdf/qtest/qpdf/foreign-in-write.out
1 1 logic error: QPDFObjectHandle from different QPDF found while writing. Use QPDF::copyForeignObject to add objects from another file.
  2 +logic error: attempted to unparse a QPDFObjectHandle from a destroyed QPDF
2 3 logic error: Attempting to add an object from a different QPDF. Use QPDF::copyForeignObject to add objects from another file.
3 4 test 29 done
... ...
qpdf/test_driver.cc
... ... @@ -1135,8 +1135,8 @@ test_29(QPDF&amp; pdf, char const* arg2)
1135 1135 {
1136 1136 // Detect mixed objects in QPDFWriter
1137 1137 assert(arg2 != 0);
1138   - QPDF other;
1139   - other.processFile(arg2);
  1138 + auto other = QPDF::create();
  1139 + other->processFile(arg2);
1140 1140 // We need to create a QPDF with mixed ownership to exercise
1141 1141 // QPDFWriter's ownership check. To do this, we have to sneak the
1142 1142 // foreign object inside an ownerless direct object to avoid
... ... @@ -1146,10 +1146,25 @@ test_29(QPDF&amp; pdf, char const* arg2)
1146 1146 // explicitly change the ownership to the wrong value.
1147 1147 auto dict = QPDFObjectHandle::newDictionary();
1148 1148 dict.replaceKey("/QTest", pdf.getTrailer().getKey("/QTest"));
1149   - other.getTrailer().replaceKey("/QTest", dict);
  1149 + other->getTrailer().replaceKey("/QTest", dict);
1150 1150  
1151 1151 try {
1152   - QPDFWriter w(other, "a.pdf");
  1152 + QPDFWriter w(*other, "a.pdf");
  1153 + w.write();
  1154 + std::cout << "oops -- didn't throw" << std::endl;
  1155 + } catch (std::logic_error const& e) {
  1156 + std::cout << "logic error: " << e.what() << std::endl;
  1157 + }
  1158 +
  1159 + // Make sure deleting the other source doesn't prevent detection.
  1160 + auto other2 = QPDF::create();
  1161 + other2->emptyPDF();
  1162 + dict = QPDFObjectHandle::newDictionary();
  1163 + dict.replaceKey("/QTest", other2->getRoot());
  1164 + other->getTrailer().replaceKey("/QTest", dict);
  1165 + other2 = nullptr;
  1166 + try {
  1167 + QPDFWriter w(*other, "a.pdf");
1153 1168 w.write();
1154 1169 std::cout << "oops -- didn't throw" << std::endl;
1155 1170 } catch (std::logic_error const& e) {
... ... @@ -1158,7 +1173,7 @@ test_29(QPDF&amp; pdf, char const* arg2)
1158 1173  
1159 1174 // Detect adding a foreign object
1160 1175 auto root1 = pdf.getRoot();
1161   - auto root2 = other.getRoot();
  1176 + auto root2 = other->getRoot();
1162 1177 try {
1163 1178 root1.replaceKey("/Oops", root2);
1164 1179 } catch (std::logic_error const& e) {
... ... @@ -3300,14 +3315,20 @@ test_92(QPDF&amp; pdf, char const* arg2)
3300 3315 check(resources);
3301 3316 check(contents);
3302 3317 check(contents_dict);
3303   - // Objects that were originally indirect should be null.
  3318 + // Objects that were originally indirect should be destroyed.
3304 3319 // Otherwise, they should have retained their old values. See
3305 3320 // comments in QPDFValueProxy::reset for why this is the case.
3306   - assert(root.isNull());
3307   - assert(page1.isNull());
3308   - assert(contents.isNull());
3309   - assert(!resources.isNull());
3310   - assert(!contents_dict.isNull());
  3321 + assert(root.isDestroyed());
  3322 + assert(page1.isDestroyed());
  3323 + assert(contents.isDestroyed());
  3324 + assert(resources.isDictionary());
  3325 + assert(contents_dict.isDictionary());
  3326 + try {
  3327 + root.unparse();
  3328 + assert(false);
  3329 + } catch (std::logic_error&) {
  3330 + // Expected
  3331 + }
3311 3332 }
3312 3333  
3313 3334 static void
... ...