Commit a4d6589ff26007c966db8912e4dae1aa937a5968

Authored by Jay Berkenbilt
1 parent ec6719fd

Have QPDFObjectHandle notice when replaceObject was called

This results in a performance penalty of 1% to 2% when replaceObject
and swapObjects are never called and a somewhat larger penalty if they
are called, but it's worth it to avoid very confusing behavior as
discussed in depth in qpdf#507.
ChangeLog
  1 +2021-02-25 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Bug fix/behavior change: when QPDF::replaceObject or
  4 + QPDF::swapObjects is called, existing QPDFObjectHandle instances
  5 + will now notice the change. This removes a long-standing source of
  6 + bugs and confusing behavior.
  7 +
1 8 2021-02-23 Jay Berkenbilt <ejb@ql.org>
2 9  
3 10 * The test for the input and output files being the same wasn't
... ...
include/qpdf/QPDF.hh
... ... @@ -278,34 +278,26 @@ class QPDF
278 278 QPDFObjectHandle getObjectByID(int objid, int generation);
279 279  
280 280 // Replace the object with the given object id with the given
281   - // object. The object handle passed in must be a direct object,
  281 + // object. The object handle passed in must be a direct object,
282 282 // though it may contain references to other indirect objects
283   - // within it. Calling this method can have somewhat confusing
284   - // results. Any existing QPDFObjectHandle instances that point to
285   - // the old object and that have been resolved (which happens
286   - // automatically if you access them in any way) will continue to
287   - // point to the old object even though that object will no longer
288   - // be associated with the PDF file. Note that replacing an object
289   - // with QPDFObjectHandle::newNull() effectively removes the object
290   - // from the file since a non-existent object is treated as a null
291   - // object. To replace a reserved object, call replaceReserved
  283 + // within it. Prior to qpdf 10.2.1, after calling this method,
  284 + // existing QPDFObjectHandle instances that pointed to the
  285 + // original object still pointed to the original object, resulting
  286 + // in confusing and incorrect behavior. This was fixed in 10.2.1,
  287 + // so existing QPDFObjectHandle objects will start pointing to the
  288 + // newly replaced object. Note that replacing an object with
  289 + // QPDFObjectHandle::newNull() effectively removes the object from
  290 + // the file since a non-existent object is treated as a null
  291 + // object. To replace a reserved object, call replaceReserved
292 292 // instead.
293 293 QPDF_DLL
294 294 void replaceObject(QPDFObjGen const& og, QPDFObjectHandle);
295 295 QPDF_DLL
296 296 void replaceObject(int objid, int generation, QPDFObjectHandle);
297 297  
298   - // Swap two objects given by ID. Calling this method can have
299   - // confusing results. After swapping two objects, existing
300   - // QPDFObjectHandle instances that reference them will still
301   - // reference the same underlying objects, at which point those
302   - // existing QPDFObjectHandle instances will have incorrect
303   - // information about the object and generation number of those
304   - // objects. While this does not necessarily cause a problem, it
305   - // can certainly be confusing. It is therefore recommended that
306   - // you replace any existing QPDFObjectHandle instances that point
307   - // to the swapped objects with new ones, possibly by calling
308   - // getObjectByID.
  298 + // Swap two objects given by ID. Prior to qpdf 10.2.1, existing
  299 + // QPDFObjectHandle instances that reference them objects not
  300 + // notice the swap, but this was fixed in 10.2.1.
309 301 QPDF_DLL
310 302 void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2);
311 303 QPDF_DLL
... ... @@ -697,6 +689,11 @@ class QPDF
697 689 {
698 690 return qpdf->resolve(objid, generation);
699 691 }
  692 + static bool objectChanged(
  693 + QPDF* qpdf, QPDFObjGen const& og, PointerHolder<QPDFObject>& oph)
  694 + {
  695 + return qpdf->objectChanged(og, oph);
  696 + }
700 697 };
701 698 friend class Resolver;
702 699  
... ... @@ -930,6 +927,7 @@ class QPDF
930 927 qpdf_offset_t offset, std::string const& description,
931 928 int exp_objid, int exp_generation,
932 929 int& act_objid, int& act_generation);
  930 + bool objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph);
933 931 PointerHolder<QPDFObject> resolve(int objid, int generation);
934 932 void resolveObjectsInStream(int obj_stream_number);
935 933 void stopOnError(std::string const& message);
... ... @@ -1441,6 +1439,7 @@ class QPDF
1441 1439 bool in_parse;
1442 1440 bool parsed;
1443 1441 std::set<int> resolved_object_streams;
  1442 + bool ever_replaced_objects;
1444 1443  
1445 1444 // Linearization data
1446 1445 qpdf_offset_t first_xref_item_offset; // actual value from file
... ...
libqpdf/QPDF.cc
... ... @@ -210,6 +210,7 @@ QPDF::Members::Members() :
210 210 immediate_copy_from(false),
211 211 in_parse(false),
212 212 parsed(false),
  213 + ever_replaced_objects(false),
213 214 first_xref_item_offset(0),
214 215 uncompressed_after_compressed(false)
215 216 {
... ... @@ -2063,6 +2064,30 @@ QPDF::readObjectAtOffset(bool try_recovery,
2063 2064 return oh;
2064 2065 }
2065 2066  
  2067 +bool
  2068 +QPDF::objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph)
  2069 +{
  2070 + // See if the object cached at og, if any, is the one passed in.
  2071 + // QPDFObjectHandle uses this to detect outdated handles to
  2072 + // replaced or swapped objects. This is a somewhat expensive check
  2073 + // because it happens with every dereference of a
  2074 + // QPDFObjectHandle. To reduce the hit somewhat, short-circuit the
  2075 + // check if we never called a function that replaces an object
  2076 + // already in cache. It is important for functions that do this to
  2077 + // set ever_replaced_objects = true.
  2078 +
  2079 + if (! this->m->ever_replaced_objects)
  2080 + {
  2081 + return false;
  2082 + }
  2083 + auto c = this->m->obj_cache.find(og);
  2084 + if (c == this->m->obj_cache.end())
  2085 + {
  2086 + return true;
  2087 + }
  2088 + return (c->second.object.getPointer() != oph.getPointer());
  2089 +}
  2090 +
2066 2091 PointerHolder<QPDFObject>
2067 2092 QPDF::resolve(int objid, int generation)
2068 2093 {
... ... @@ -2308,6 +2333,7 @@ QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
2308 2333  
2309 2334 // Replace the object in the object cache
2310 2335 QPDFObjGen og(objid, generation);
  2336 + this->m->ever_replaced_objects = true;
2311 2337 this->m->obj_cache[og] =
2312 2338 ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1);
2313 2339 }
... ... @@ -2695,6 +2721,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
2695 2721 QPDFObjGen og1(objid1, generation1);
2696 2722 QPDFObjGen og2(objid2, generation2);
2697 2723 ObjCache t = this->m->obj_cache[og1];
  2724 + this->m->ever_replaced_objects = true;
2698 2725 this->m->obj_cache[og1] = this->m->obj_cache[og2];
2699 2726 this->m->obj_cache[og2] = t;
2700 2727 }
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -3194,6 +3194,12 @@ QPDFObjectHandle::dereference()
3194 3194 throw std::logic_error(
3195 3195 "attempted to dereference an uninitialized QPDFObjectHandle");
3196 3196 }
  3197 + if (this->obj.getPointer() && this->objid &&
  3198 + QPDF::Resolver::objectChanged(
  3199 + this->qpdf, QPDFObjGen(this->objid, this->generation), this->obj))
  3200 + {
  3201 + this->obj = nullptr;
  3202 + }
3197 3203 if (this->obj.getPointer() == 0)
3198 3204 {
3199 3205 PointerHolder<QPDFObject> obj = QPDF::Resolver::resolve(
... ...
manual/qpdf-manual.xml
... ... @@ -5061,6 +5061,32 @@ print &quot;\n&quot;;
5061 5061 </varlistentry>
5062 5062 -->
5063 5063 <varlistentry>
  5064 + <term>XXX 10.2.1: Month dd, YYYY</term>
  5065 + <listitem>
  5066 + <itemizedlist>
  5067 + <listitem>
  5068 + <para>
  5069 + Bug Fixes
  5070 + </para>
  5071 + <itemizedlist>
  5072 + <listitem>
  5073 + <para>
  5074 + When <function>QPDF::replaceObject</function> or
  5075 + <function>QPDF::swapObjects</function> is called, existing
  5076 + <classname>QPDFObjectHandle</classname> instances no longer
  5077 + point to the old objects. The next time they are
  5078 + accessed, they automatically notice the change to the
  5079 + underlying object and update themselves. This resolves a
  5080 + very longstanding source of confusion, albeit in a very
  5081 + rarely used method call.
  5082 + </para>
  5083 + </listitem>
  5084 + </itemizedlist>
  5085 + </listitem>
  5086 + </itemizedlist>
  5087 + </listitem>
  5088 + </varlistentry>
  5089 + <varlistentry>
5064 5090 <term>10.2.0: February 23, 2021</term>
5065 5091 <listitem>
5066 5092 <itemizedlist>
... ...
qpdf/qtest/qpdf/test14-in.pdf
... ... @@ -65,6 +65,7 @@ endobj
65 65 612
66 66 792
67 67 ]
  68 + /OrigPage 2
68 69 /Parent 4 0 R
69 70 /Resources <<
70 71 /Font <<
... ... @@ -86,6 +87,7 @@ endobj
86 87 612
87 88 792
88 89 ]
  90 + /OrigPage 3
89 91 /Parent 4 0 R
90 92 /Resources <<
91 93 /Font <<
... ... @@ -237,21 +239,21 @@ xref
237 239 0000000140 00000 n
238 240 0000000252 00000 n
239 241 0000000456 00000 n
240   -0000000661 00000 n
241   -0000000866 00000 n
242   -0000001084 00000 n
243   -0000001186 00000 n
244   -0000001206 00000 n
245   -0000001325 00000 n
246   -0000001384 00000 n
247   -0000001487 00000 n
248   -0000001507 00000 n
249   -0000001566 00000 n
250   -0000001669 00000 n
251   -0000001689 00000 n
252   -0000001748 00000 n
253   -0000001851 00000 n
254   -0000001871 00000 n
  242 +0000000675 00000 n
  243 +0000000894 00000 n
  244 +0000001112 00000 n
  245 +0000001214 00000 n
  246 +0000001234 00000 n
  247 +0000001353 00000 n
  248 +0000001412 00000 n
  249 +0000001515 00000 n
  250 +0000001535 00000 n
  251 +0000001594 00000 n
  252 +0000001697 00000 n
  253 +0000001717 00000 n
  254 +0000001776 00000 n
  255 +0000001879 00000 n
  256 +0000001899 00000 n
255 257 trailer <<
256 258 /QArray 2 0 R
257 259 /QDict 3 0 R
... ... @@ -260,5 +262,5 @@ trailer &lt;&lt;
260 262 /ID [<20eb74876a3e8212c1b4fd43153860b0><1bb7a926da191c58f675435d77997d21>]
261 263 >>
262 264 startxref
263   -1907
  265 +1935
264 266 %%EOF
... ...
qpdf/qtest/qpdf/test14-out.pdf
... ... @@ -16,10 +16,10 @@ endobj
16 16 << /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >>
17 17 endobj
18 18 6 0 obj
19   -<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >>
  19 +<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 3 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >>
20 20 endobj
21 21 7 0 obj
22   -<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >>
  22 +<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 2 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >>
23 23 endobj
24 24 8 0 obj
25 25 << /Contents 16 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 17 0 R >> /Type /Page >>
... ... @@ -88,18 +88,18 @@ xref
88 88 0000000122 00000 n
89 89 0000000199 00000 n
90 90 0000000344 00000 n
91   -0000000490 00000 n
92   -0000000636 00000 n
93   -0000000782 00000 n
94   -0000000877 00000 n
95   -0000000985 00000 n
96   -0000001016 00000 n
97   -0000001112 00000 n
98   -0000001143 00000 n
99   -0000001239 00000 n
100   -0000001270 00000 n
101   -0000001366 00000 n
  91 +0000000502 00000 n
  92 +0000000660 00000 n
  93 +0000000806 00000 n
  94 +0000000901 00000 n
  95 +0000001009 00000 n
  96 +0000001040 00000 n
  97 +0000001136 00000 n
  98 +0000001167 00000 n
  99 +0000001263 00000 n
  100 +0000001294 00000 n
  101 +0000001390 00000 n
102 102 trailer << /QArray 2 0 R /QDict 3 0 R /Root 1 0 R /Size 18 /ID [<20eb74876a3e8212c1b4fd43153860b0><31415926535897932384626433832795>] >>
103 103 startxref
104   -1397
  104 +1421
105 105 %%EOF
... ...
qpdf/qtest/qpdf/test14.out
1 1 caught logic error as expected
2   -old dict: 1
3   -old dict: 1
  2 +old dict: 2
  3 +swapped array: /Array
4 4 new dict: 2
5 5 swapped array: /Array
6 6 array and dictionary contents are correct
... ...
qpdf/test_driver.cc
... ... @@ -740,7 +740,13 @@ void runtest(int n, char const* filename1, char const* arg2)
740 740 " not called 4-page file");
741 741 }
742 742 // Swap pages 2 and 3
743   - pdf.swapObjects(pages.at(1).getObjGen(), pages.at(2).getObjGen());
  743 + auto orig_page2 = pages.at(1);
  744 + auto orig_page3 = pages.at(2);
  745 + assert(orig_page2.getKey("/OrigPage").getIntValue() == 2);
  746 + assert(orig_page3.getKey("/OrigPage").getIntValue() == 3);
  747 + pdf.swapObjects(orig_page2.getObjGen(), orig_page3.getObjGen());
  748 + assert(orig_page2.getKey("/OrigPage").getIntValue() == 3);
  749 + assert(orig_page3.getKey("/OrigPage").getIntValue() == 2);
744 750 // Replace object and swap objects
745 751 QPDFObjectHandle trailer = pdf.getTrailer();
746 752 QPDFObjectHandle qdict = trailer.getKey("/QDict");
... ... @@ -759,18 +765,18 @@ void runtest(int n, char const* filename1, char const* arg2)
759 765 std::cout << "caught logic error as expected" << std::endl;
760 766 }
761 767 pdf.replaceObject(qdict.getObjGen(), new_dict);
762   - // Now qdict still points to the old dictionary
763   - std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue()
  768 + // Now qdict points to the new dictionary
  769 + std::cout << "old dict: " << qdict.getKey("/NewDict").getIntValue()
764 770 << std::endl;
765 771 // Swap dict and array
766 772 pdf.swapObjects(qdict.getObjGen(), qarray.getObjGen());
767   - // Now qarray will resolve to new object but qdict is still
768   - // the old object
769   - std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue()
  773 + // Now qarray will resolve to new object and qdict resolves to
  774 + // the array
  775 + std::cout << "swapped array: " << qdict.getArrayItem(0).getName()
770 776 << std::endl;
771 777 std::cout << "new dict: " << qarray.getKey("/NewDict").getIntValue()
772 778 << std::endl;
773   - // Reread qdict, now pointing to an array
  779 + // Reread qdict, still pointing to an array
774 780 qdict = pdf.getObjectByObjGen(qdict.getObjGen());
775 781 std::cout << "swapped array: " << qdict.getArrayItem(0).getName()
776 782 << std::endl;
... ...