Commit a4d6589ff26007c966db8912e4dae1aa937a5968

Authored by Jay Berkenbilt
1 parent ec6719fd

Have QPDFObjectHandle notice when replaceObject was called

This results in a performance penalty of 1% to 2% when replaceObject
and swapObjects are never called and a somewhat larger penalty if they
are called, but it's worth it to avoid very confusing behavior as
discussed in depth in qpdf#507.
ChangeLog
  1 +2021-02-25 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Bug fix/behavior change: when QPDF::replaceObject or
  4 + QPDF::swapObjects is called, existing QPDFObjectHandle instances
  5 + will now notice the change. This removes a long-standing source of
  6 + bugs and confusing behavior.
  7 +
1 2021-02-23 Jay Berkenbilt <ejb@ql.org> 8 2021-02-23 Jay Berkenbilt <ejb@ql.org>
2 9
3 * The test for the input and output files being the same wasn't 10 * The test for the input and output files being the same wasn't
include/qpdf/QPDF.hh
@@ -278,34 +278,26 @@ class QPDF @@ -278,34 +278,26 @@ class QPDF
278 QPDFObjectHandle getObjectByID(int objid, int generation); 278 QPDFObjectHandle getObjectByID(int objid, int generation);
279 279
280 // Replace the object with the given object id with the given 280 // Replace the object with the given object id with the given
281 - // object. The object handle passed in must be a direct object, 281 + // object. The object handle passed in must be a direct object,
282 // though it may contain references to other indirect objects 282 // though it may contain references to other indirect objects
283 - // within it. Calling this method can have somewhat confusing  
284 - // results. Any existing QPDFObjectHandle instances that point to  
285 - // the old object and that have been resolved (which happens  
286 - // automatically if you access them in any way) will continue to  
287 - // point to the old object even though that object will no longer  
288 - // be associated with the PDF file. Note that replacing an object  
289 - // with QPDFObjectHandle::newNull() effectively removes the object  
290 - // from the file since a non-existent object is treated as a null  
291 - // object. To replace a reserved object, call replaceReserved 283 + // within it. Prior to qpdf 10.2.1, after calling this method,
  284 + // existing QPDFObjectHandle instances that pointed to the
  285 + // original object still pointed to the original object, resulting
  286 + // in confusing and incorrect behavior. This was fixed in 10.2.1,
  287 + // so existing QPDFObjectHandle objects will start pointing to the
  288 + // newly replaced object. Note that replacing an object with
  289 + // QPDFObjectHandle::newNull() effectively removes the object from
  290 + // the file since a non-existent object is treated as a null
  291 + // object. To replace a reserved object, call replaceReserved
292 // instead. 292 // instead.
293 QPDF_DLL 293 QPDF_DLL
294 void replaceObject(QPDFObjGen const& og, QPDFObjectHandle); 294 void replaceObject(QPDFObjGen const& og, QPDFObjectHandle);
295 QPDF_DLL 295 QPDF_DLL
296 void replaceObject(int objid, int generation, QPDFObjectHandle); 296 void replaceObject(int objid, int generation, QPDFObjectHandle);
297 297
298 - // Swap two objects given by ID. Calling this method can have  
299 - // confusing results. After swapping two objects, existing  
300 - // QPDFObjectHandle instances that reference them will still  
301 - // reference the same underlying objects, at which point those  
302 - // existing QPDFObjectHandle instances will have incorrect  
303 - // information about the object and generation number of those  
304 - // objects. While this does not necessarily cause a problem, it  
305 - // can certainly be confusing. It is therefore recommended that  
306 - // you replace any existing QPDFObjectHandle instances that point  
307 - // to the swapped objects with new ones, possibly by calling  
308 - // getObjectByID. 298 + // Swap two objects given by ID. Prior to qpdf 10.2.1, existing
  299 + // QPDFObjectHandle instances that reference them objects not
  300 + // notice the swap, but this was fixed in 10.2.1.
309 QPDF_DLL 301 QPDF_DLL
310 void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2); 302 void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2);
311 QPDF_DLL 303 QPDF_DLL
@@ -697,6 +689,11 @@ class QPDF @@ -697,6 +689,11 @@ class QPDF
697 { 689 {
698 return qpdf->resolve(objid, generation); 690 return qpdf->resolve(objid, generation);
699 } 691 }
  692 + static bool objectChanged(
  693 + QPDF* qpdf, QPDFObjGen const& og, PointerHolder<QPDFObject>& oph)
  694 + {
  695 + return qpdf->objectChanged(og, oph);
  696 + }
700 }; 697 };
701 friend class Resolver; 698 friend class Resolver;
702 699
@@ -930,6 +927,7 @@ class QPDF @@ -930,6 +927,7 @@ class QPDF
930 qpdf_offset_t offset, std::string const& description, 927 qpdf_offset_t offset, std::string const& description,
931 int exp_objid, int exp_generation, 928 int exp_objid, int exp_generation,
932 int& act_objid, int& act_generation); 929 int& act_objid, int& act_generation);
  930 + bool objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph);
933 PointerHolder<QPDFObject> resolve(int objid, int generation); 931 PointerHolder<QPDFObject> resolve(int objid, int generation);
934 void resolveObjectsInStream(int obj_stream_number); 932 void resolveObjectsInStream(int obj_stream_number);
935 void stopOnError(std::string const& message); 933 void stopOnError(std::string const& message);
@@ -1441,6 +1439,7 @@ class QPDF @@ -1441,6 +1439,7 @@ class QPDF
1441 bool in_parse; 1439 bool in_parse;
1442 bool parsed; 1440 bool parsed;
1443 std::set<int> resolved_object_streams; 1441 std::set<int> resolved_object_streams;
  1442 + bool ever_replaced_objects;
1444 1443
1445 // Linearization data 1444 // Linearization data
1446 qpdf_offset_t first_xref_item_offset; // actual value from file 1445 qpdf_offset_t first_xref_item_offset; // actual value from file
libqpdf/QPDF.cc
@@ -210,6 +210,7 @@ QPDF::Members::Members() : @@ -210,6 +210,7 @@ QPDF::Members::Members() :
210 immediate_copy_from(false), 210 immediate_copy_from(false),
211 in_parse(false), 211 in_parse(false),
212 parsed(false), 212 parsed(false),
  213 + ever_replaced_objects(false),
213 first_xref_item_offset(0), 214 first_xref_item_offset(0),
214 uncompressed_after_compressed(false) 215 uncompressed_after_compressed(false)
215 { 216 {
@@ -2063,6 +2064,30 @@ QPDF::readObjectAtOffset(bool try_recovery, @@ -2063,6 +2064,30 @@ QPDF::readObjectAtOffset(bool try_recovery,
2063 return oh; 2064 return oh;
2064 } 2065 }
2065 2066
  2067 +bool
  2068 +QPDF::objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph)
  2069 +{
  2070 + // See if the object cached at og, if any, is the one passed in.
  2071 + // QPDFObjectHandle uses this to detect outdated handles to
  2072 + // replaced or swapped objects. This is a somewhat expensive check
  2073 + // because it happens with every dereference of a
  2074 + // QPDFObjectHandle. To reduce the hit somewhat, short-circuit the
  2075 + // check if we never called a function that replaces an object
  2076 + // already in cache. It is important for functions that do this to
  2077 + // set ever_replaced_objects = true.
  2078 +
  2079 + if (! this->m->ever_replaced_objects)
  2080 + {
  2081 + return false;
  2082 + }
  2083 + auto c = this->m->obj_cache.find(og);
  2084 + if (c == this->m->obj_cache.end())
  2085 + {
  2086 + return true;
  2087 + }
  2088 + return (c->second.object.getPointer() != oph.getPointer());
  2089 +}
  2090 +
2066 PointerHolder<QPDFObject> 2091 PointerHolder<QPDFObject>
2067 QPDF::resolve(int objid, int generation) 2092 QPDF::resolve(int objid, int generation)
2068 { 2093 {
@@ -2308,6 +2333,7 @@ QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh) @@ -2308,6 +2333,7 @@ QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
2308 2333
2309 // Replace the object in the object cache 2334 // Replace the object in the object cache
2310 QPDFObjGen og(objid, generation); 2335 QPDFObjGen og(objid, generation);
  2336 + this->m->ever_replaced_objects = true;
2311 this->m->obj_cache[og] = 2337 this->m->obj_cache[og] =
2312 ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); 2338 ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1);
2313 } 2339 }
@@ -2695,6 +2721,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) @@ -2695,6 +2721,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
2695 QPDFObjGen og1(objid1, generation1); 2721 QPDFObjGen og1(objid1, generation1);
2696 QPDFObjGen og2(objid2, generation2); 2722 QPDFObjGen og2(objid2, generation2);
2697 ObjCache t = this->m->obj_cache[og1]; 2723 ObjCache t = this->m->obj_cache[og1];
  2724 + this->m->ever_replaced_objects = true;
2698 this->m->obj_cache[og1] = this->m->obj_cache[og2]; 2725 this->m->obj_cache[og1] = this->m->obj_cache[og2];
2699 this->m->obj_cache[og2] = t; 2726 this->m->obj_cache[og2] = t;
2700 } 2727 }
libqpdf/QPDFObjectHandle.cc
@@ -3194,6 +3194,12 @@ QPDFObjectHandle::dereference() @@ -3194,6 +3194,12 @@ QPDFObjectHandle::dereference()
3194 throw std::logic_error( 3194 throw std::logic_error(
3195 "attempted to dereference an uninitialized QPDFObjectHandle"); 3195 "attempted to dereference an uninitialized QPDFObjectHandle");
3196 } 3196 }
  3197 + if (this->obj.getPointer() && this->objid &&
  3198 + QPDF::Resolver::objectChanged(
  3199 + this->qpdf, QPDFObjGen(this->objid, this->generation), this->obj))
  3200 + {
  3201 + this->obj = nullptr;
  3202 + }
3197 if (this->obj.getPointer() == 0) 3203 if (this->obj.getPointer() == 0)
3198 { 3204 {
3199 PointerHolder<QPDFObject> obj = QPDF::Resolver::resolve( 3205 PointerHolder<QPDFObject> obj = QPDF::Resolver::resolve(
manual/qpdf-manual.xml
@@ -5061,6 +5061,32 @@ print &quot;\n&quot;; @@ -5061,6 +5061,32 @@ print &quot;\n&quot;;
5061 </varlistentry> 5061 </varlistentry>
5062 --> 5062 -->
5063 <varlistentry> 5063 <varlistentry>
  5064 + <term>XXX 10.2.1: Month dd, YYYY</term>
  5065 + <listitem>
  5066 + <itemizedlist>
  5067 + <listitem>
  5068 + <para>
  5069 + Bug Fixes
  5070 + </para>
  5071 + <itemizedlist>
  5072 + <listitem>
  5073 + <para>
  5074 + When <function>QPDF::replaceObject</function> or
  5075 + <function>QPDF::swapObjects</function> is called, existing
  5076 + <classname>QPDFObjectHandle</classname> instances no longer
  5077 + point to the old objects. The next time they are
  5078 + accessed, they automatically notice the change to the
  5079 + underlying object and update themselves. This resolves a
  5080 + very longstanding source of confusion, albeit in a very
  5081 + rarely used method call.
  5082 + </para>
  5083 + </listitem>
  5084 + </itemizedlist>
  5085 + </listitem>
  5086 + </itemizedlist>
  5087 + </listitem>
  5088 + </varlistentry>
  5089 + <varlistentry>
5064 <term>10.2.0: February 23, 2021</term> 5090 <term>10.2.0: February 23, 2021</term>
5065 <listitem> 5091 <listitem>
5066 <itemizedlist> 5092 <itemizedlist>
qpdf/qtest/qpdf/test14-in.pdf
@@ -65,6 +65,7 @@ endobj @@ -65,6 +65,7 @@ endobj
65 612 65 612
66 792 66 792
67 ] 67 ]
  68 + /OrigPage 2
68 /Parent 4 0 R 69 /Parent 4 0 R
69 /Resources << 70 /Resources <<
70 /Font << 71 /Font <<
@@ -86,6 +87,7 @@ endobj @@ -86,6 +87,7 @@ endobj
86 612 87 612
87 792 88 792
88 ] 89 ]
  90 + /OrigPage 3
89 /Parent 4 0 R 91 /Parent 4 0 R
90 /Resources << 92 /Resources <<
91 /Font << 93 /Font <<
@@ -237,21 +239,21 @@ xref @@ -237,21 +239,21 @@ xref
237 0000000140 00000 n 239 0000000140 00000 n
238 0000000252 00000 n 240 0000000252 00000 n
239 0000000456 00000 n 241 0000000456 00000 n
240 -0000000661 00000 n  
241 -0000000866 00000 n  
242 -0000001084 00000 n  
243 -0000001186 00000 n  
244 -0000001206 00000 n  
245 -0000001325 00000 n  
246 -0000001384 00000 n  
247 -0000001487 00000 n  
248 -0000001507 00000 n  
249 -0000001566 00000 n  
250 -0000001669 00000 n  
251 -0000001689 00000 n  
252 -0000001748 00000 n  
253 -0000001851 00000 n  
254 -0000001871 00000 n 242 +0000000675 00000 n
  243 +0000000894 00000 n
  244 +0000001112 00000 n
  245 +0000001214 00000 n
  246 +0000001234 00000 n
  247 +0000001353 00000 n
  248 +0000001412 00000 n
  249 +0000001515 00000 n
  250 +0000001535 00000 n
  251 +0000001594 00000 n
  252 +0000001697 00000 n
  253 +0000001717 00000 n
  254 +0000001776 00000 n
  255 +0000001879 00000 n
  256 +0000001899 00000 n
255 trailer << 257 trailer <<
256 /QArray 2 0 R 258 /QArray 2 0 R
257 /QDict 3 0 R 259 /QDict 3 0 R
@@ -260,5 +262,5 @@ trailer &lt;&lt; @@ -260,5 +262,5 @@ trailer &lt;&lt;
260 /ID [<20eb74876a3e8212c1b4fd43153860b0><1bb7a926da191c58f675435d77997d21>] 262 /ID [<20eb74876a3e8212c1b4fd43153860b0><1bb7a926da191c58f675435d77997d21>]
261 >> 263 >>
262 startxref 264 startxref
263 -1907 265 +1935
264 %%EOF 266 %%EOF
qpdf/qtest/qpdf/test14-out.pdf
@@ -16,10 +16,10 @@ endobj @@ -16,10 +16,10 @@ endobj
16 << /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >> 16 << /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >>
17 endobj 17 endobj
18 6 0 obj 18 6 0 obj
19 -<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >> 19 +<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 3 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >>
20 endobj 20 endobj
21 7 0 obj 21 7 0 obj
22 -<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >> 22 +<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 2 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >>
23 endobj 23 endobj
24 8 0 obj 24 8 0 obj
25 << /Contents 16 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 17 0 R >> /Type /Page >> 25 << /Contents 16 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 17 0 R >> /Type /Page >>
@@ -88,18 +88,18 @@ xref @@ -88,18 +88,18 @@ xref
88 0000000122 00000 n 88 0000000122 00000 n
89 0000000199 00000 n 89 0000000199 00000 n
90 0000000344 00000 n 90 0000000344 00000 n
91 -0000000490 00000 n  
92 -0000000636 00000 n  
93 -0000000782 00000 n  
94 -0000000877 00000 n  
95 -0000000985 00000 n  
96 -0000001016 00000 n  
97 -0000001112 00000 n  
98 -0000001143 00000 n  
99 -0000001239 00000 n  
100 -0000001270 00000 n  
101 -0000001366 00000 n 91 +0000000502 00000 n
  92 +0000000660 00000 n
  93 +0000000806 00000 n
  94 +0000000901 00000 n
  95 +0000001009 00000 n
  96 +0000001040 00000 n
  97 +0000001136 00000 n
  98 +0000001167 00000 n
  99 +0000001263 00000 n
  100 +0000001294 00000 n
  101 +0000001390 00000 n
102 trailer << /QArray 2 0 R /QDict 3 0 R /Root 1 0 R /Size 18 /ID [<20eb74876a3e8212c1b4fd43153860b0><31415926535897932384626433832795>] >> 102 trailer << /QArray 2 0 R /QDict 3 0 R /Root 1 0 R /Size 18 /ID [<20eb74876a3e8212c1b4fd43153860b0><31415926535897932384626433832795>] >>
103 startxref 103 startxref
104 -1397 104 +1421
105 %%EOF 105 %%EOF
qpdf/qtest/qpdf/test14.out
1 caught logic error as expected 1 caught logic error as expected
2 -old dict: 1  
3 -old dict: 1 2 +old dict: 2
  3 +swapped array: /Array
4 new dict: 2 4 new dict: 2
5 swapped array: /Array 5 swapped array: /Array
6 array and dictionary contents are correct 6 array and dictionary contents are correct
qpdf/test_driver.cc
@@ -740,7 +740,13 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -740,7 +740,13 @@ void runtest(int n, char const* filename1, char const* arg2)
740 " not called 4-page file"); 740 " not called 4-page file");
741 } 741 }
742 // Swap pages 2 and 3 742 // Swap pages 2 and 3
743 - pdf.swapObjects(pages.at(1).getObjGen(), pages.at(2).getObjGen()); 743 + auto orig_page2 = pages.at(1);
  744 + auto orig_page3 = pages.at(2);
  745 + assert(orig_page2.getKey("/OrigPage").getIntValue() == 2);
  746 + assert(orig_page3.getKey("/OrigPage").getIntValue() == 3);
  747 + pdf.swapObjects(orig_page2.getObjGen(), orig_page3.getObjGen());
  748 + assert(orig_page2.getKey("/OrigPage").getIntValue() == 3);
  749 + assert(orig_page3.getKey("/OrigPage").getIntValue() == 2);
744 // Replace object and swap objects 750 // Replace object and swap objects
745 QPDFObjectHandle trailer = pdf.getTrailer(); 751 QPDFObjectHandle trailer = pdf.getTrailer();
746 QPDFObjectHandle qdict = trailer.getKey("/QDict"); 752 QPDFObjectHandle qdict = trailer.getKey("/QDict");
@@ -759,18 +765,18 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -759,18 +765,18 @@ void runtest(int n, char const* filename1, char const* arg2)
759 std::cout << "caught logic error as expected" << std::endl; 765 std::cout << "caught logic error as expected" << std::endl;
760 } 766 }
761 pdf.replaceObject(qdict.getObjGen(), new_dict); 767 pdf.replaceObject(qdict.getObjGen(), new_dict);
762 - // Now qdict still points to the old dictionary  
763 - std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue() 768 + // Now qdict points to the new dictionary
  769 + std::cout << "old dict: " << qdict.getKey("/NewDict").getIntValue()
764 << std::endl; 770 << std::endl;
765 // Swap dict and array 771 // Swap dict and array
766 pdf.swapObjects(qdict.getObjGen(), qarray.getObjGen()); 772 pdf.swapObjects(qdict.getObjGen(), qarray.getObjGen());
767 - // Now qarray will resolve to new object but qdict is still  
768 - // the old object  
769 - std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue() 773 + // Now qarray will resolve to new object and qdict resolves to
  774 + // the array
  775 + std::cout << "swapped array: " << qdict.getArrayItem(0).getName()
770 << std::endl; 776 << std::endl;
771 std::cout << "new dict: " << qarray.getKey("/NewDict").getIntValue() 777 std::cout << "new dict: " << qarray.getKey("/NewDict").getIntValue()
772 << std::endl; 778 << std::endl;
773 - // Reread qdict, now pointing to an array 779 + // Reread qdict, still pointing to an array
774 qdict = pdf.getObjectByObjGen(qdict.getObjGen()); 780 qdict = pdf.getObjectByObjGen(qdict.getObjGen());
775 std::cout << "swapped array: " << qdict.getArrayItem(0).getName() 781 std::cout << "swapped array: " << qdict.getArrayItem(0).getName()
776 << std::endl; 782 << std::endl;