Commit a4d6589ff26007c966db8912e4dae1aa937a5968
1 parent
ec6719fd
Have QPDFObjectHandle notice when replaceObject was called
This results in a performance penalty of 1% to 2% when replaceObject and swapObjects are never called and a somewhat larger penalty if they are called, but it's worth it to avoid very confusing behavior as discussed in depth in qpdf#507.
Showing
9 changed files
with
133 additions
and
60 deletions
ChangeLog
| 1 | +2021-02-25 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Bug fix/behavior change: when QPDF::replaceObject or | |
| 4 | + QPDF::swapObjects is called, existing QPDFObjectHandle instances | |
| 5 | + will now notice the change. This removes a long-standing source of | |
| 6 | + bugs and confusing behavior. | |
| 7 | + | |
| 1 | 8 | 2021-02-23 Jay Berkenbilt <ejb@ql.org> |
| 2 | 9 | |
| 3 | 10 | * The test for the input and output files being the same wasn't | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -278,34 +278,26 @@ class QPDF |
| 278 | 278 | QPDFObjectHandle getObjectByID(int objid, int generation); |
| 279 | 279 | |
| 280 | 280 | // Replace the object with the given object id with the given |
| 281 | - // object. The object handle passed in must be a direct object, | |
| 281 | + // object. The object handle passed in must be a direct object, | |
| 282 | 282 | // though it may contain references to other indirect objects |
| 283 | - // within it. Calling this method can have somewhat confusing | |
| 284 | - // results. Any existing QPDFObjectHandle instances that point to | |
| 285 | - // the old object and that have been resolved (which happens | |
| 286 | - // automatically if you access them in any way) will continue to | |
| 287 | - // point to the old object even though that object will no longer | |
| 288 | - // be associated with the PDF file. Note that replacing an object | |
| 289 | - // with QPDFObjectHandle::newNull() effectively removes the object | |
| 290 | - // from the file since a non-existent object is treated as a null | |
| 291 | - // object. To replace a reserved object, call replaceReserved | |
| 283 | + // within it. Prior to qpdf 10.2.1, after calling this method, | |
| 284 | + // existing QPDFObjectHandle instances that pointed to the | |
| 285 | + // original object still pointed to the original object, resulting | |
| 286 | + // in confusing and incorrect behavior. This was fixed in 10.2.1, | |
| 287 | + // so existing QPDFObjectHandle objects will start pointing to the | |
| 288 | + // newly replaced object. Note that replacing an object with | |
| 289 | + // QPDFObjectHandle::newNull() effectively removes the object from | |
| 290 | + // the file since a non-existent object is treated as a null | |
| 291 | + // object. To replace a reserved object, call replaceReserved | |
| 292 | 292 | // instead. |
| 293 | 293 | QPDF_DLL |
| 294 | 294 | void replaceObject(QPDFObjGen const& og, QPDFObjectHandle); |
| 295 | 295 | QPDF_DLL |
| 296 | 296 | void replaceObject(int objid, int generation, QPDFObjectHandle); |
| 297 | 297 | |
| 298 | - // Swap two objects given by ID. Calling this method can have | |
| 299 | - // confusing results. After swapping two objects, existing | |
| 300 | - // QPDFObjectHandle instances that reference them will still | |
| 301 | - // reference the same underlying objects, at which point those | |
| 302 | - // existing QPDFObjectHandle instances will have incorrect | |
| 303 | - // information about the object and generation number of those | |
| 304 | - // objects. While this does not necessarily cause a problem, it | |
| 305 | - // can certainly be confusing. It is therefore recommended that | |
| 306 | - // you replace any existing QPDFObjectHandle instances that point | |
| 307 | - // to the swapped objects with new ones, possibly by calling | |
| 308 | - // getObjectByID. | |
| 298 | + // Swap two objects given by ID. Prior to qpdf 10.2.1, existing | |
| 299 | + // QPDFObjectHandle instances that reference them objects not | |
| 300 | + // notice the swap, but this was fixed in 10.2.1. | |
| 309 | 301 | QPDF_DLL |
| 310 | 302 | void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2); |
| 311 | 303 | QPDF_DLL |
| ... | ... | @@ -697,6 +689,11 @@ class QPDF |
| 697 | 689 | { |
| 698 | 690 | return qpdf->resolve(objid, generation); |
| 699 | 691 | } |
| 692 | + static bool objectChanged( | |
| 693 | + QPDF* qpdf, QPDFObjGen const& og, PointerHolder<QPDFObject>& oph) | |
| 694 | + { | |
| 695 | + return qpdf->objectChanged(og, oph); | |
| 696 | + } | |
| 700 | 697 | }; |
| 701 | 698 | friend class Resolver; |
| 702 | 699 | |
| ... | ... | @@ -930,6 +927,7 @@ class QPDF |
| 930 | 927 | qpdf_offset_t offset, std::string const& description, |
| 931 | 928 | int exp_objid, int exp_generation, |
| 932 | 929 | int& act_objid, int& act_generation); |
| 930 | + bool objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph); | |
| 933 | 931 | PointerHolder<QPDFObject> resolve(int objid, int generation); |
| 934 | 932 | void resolveObjectsInStream(int obj_stream_number); |
| 935 | 933 | void stopOnError(std::string const& message); |
| ... | ... | @@ -1441,6 +1439,7 @@ class QPDF |
| 1441 | 1439 | bool in_parse; |
| 1442 | 1440 | bool parsed; |
| 1443 | 1441 | std::set<int> resolved_object_streams; |
| 1442 | + bool ever_replaced_objects; | |
| 1444 | 1443 | |
| 1445 | 1444 | // Linearization data |
| 1446 | 1445 | qpdf_offset_t first_xref_item_offset; // actual value from file | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -210,6 +210,7 @@ QPDF::Members::Members() : |
| 210 | 210 | immediate_copy_from(false), |
| 211 | 211 | in_parse(false), |
| 212 | 212 | parsed(false), |
| 213 | + ever_replaced_objects(false), | |
| 213 | 214 | first_xref_item_offset(0), |
| 214 | 215 | uncompressed_after_compressed(false) |
| 215 | 216 | { |
| ... | ... | @@ -2063,6 +2064,30 @@ QPDF::readObjectAtOffset(bool try_recovery, |
| 2063 | 2064 | return oh; |
| 2064 | 2065 | } |
| 2065 | 2066 | |
| 2067 | +bool | |
| 2068 | +QPDF::objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph) | |
| 2069 | +{ | |
| 2070 | + // See if the object cached at og, if any, is the one passed in. | |
| 2071 | + // QPDFObjectHandle uses this to detect outdated handles to | |
| 2072 | + // replaced or swapped objects. This is a somewhat expensive check | |
| 2073 | + // because it happens with every dereference of a | |
| 2074 | + // QPDFObjectHandle. To reduce the hit somewhat, short-circuit the | |
| 2075 | + // check if we never called a function that replaces an object | |
| 2076 | + // already in cache. It is important for functions that do this to | |
| 2077 | + // set ever_replaced_objects = true. | |
| 2078 | + | |
| 2079 | + if (! this->m->ever_replaced_objects) | |
| 2080 | + { | |
| 2081 | + return false; | |
| 2082 | + } | |
| 2083 | + auto c = this->m->obj_cache.find(og); | |
| 2084 | + if (c == this->m->obj_cache.end()) | |
| 2085 | + { | |
| 2086 | + return true; | |
| 2087 | + } | |
| 2088 | + return (c->second.object.getPointer() != oph.getPointer()); | |
| 2089 | +} | |
| 2090 | + | |
| 2066 | 2091 | PointerHolder<QPDFObject> |
| 2067 | 2092 | QPDF::resolve(int objid, int generation) |
| 2068 | 2093 | { |
| ... | ... | @@ -2308,6 +2333,7 @@ QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh) |
| 2308 | 2333 | |
| 2309 | 2334 | // Replace the object in the object cache |
| 2310 | 2335 | QPDFObjGen og(objid, generation); |
| 2336 | + this->m->ever_replaced_objects = true; | |
| 2311 | 2337 | this->m->obj_cache[og] = |
| 2312 | 2338 | ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); |
| 2313 | 2339 | } |
| ... | ... | @@ -2695,6 +2721,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) |
| 2695 | 2721 | QPDFObjGen og1(objid1, generation1); |
| 2696 | 2722 | QPDFObjGen og2(objid2, generation2); |
| 2697 | 2723 | ObjCache t = this->m->obj_cache[og1]; |
| 2724 | + this->m->ever_replaced_objects = true; | |
| 2698 | 2725 | this->m->obj_cache[og1] = this->m->obj_cache[og2]; |
| 2699 | 2726 | this->m->obj_cache[og2] = t; |
| 2700 | 2727 | } | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -3194,6 +3194,12 @@ QPDFObjectHandle::dereference() |
| 3194 | 3194 | throw std::logic_error( |
| 3195 | 3195 | "attempted to dereference an uninitialized QPDFObjectHandle"); |
| 3196 | 3196 | } |
| 3197 | + if (this->obj.getPointer() && this->objid && | |
| 3198 | + QPDF::Resolver::objectChanged( | |
| 3199 | + this->qpdf, QPDFObjGen(this->objid, this->generation), this->obj)) | |
| 3200 | + { | |
| 3201 | + this->obj = nullptr; | |
| 3202 | + } | |
| 3197 | 3203 | if (this->obj.getPointer() == 0) |
| 3198 | 3204 | { |
| 3199 | 3205 | PointerHolder<QPDFObject> obj = QPDF::Resolver::resolve( | ... | ... |
manual/qpdf-manual.xml
| ... | ... | @@ -5061,6 +5061,32 @@ print "\n"; |
| 5061 | 5061 | </varlistentry> |
| 5062 | 5062 | --> |
| 5063 | 5063 | <varlistentry> |
| 5064 | + <term>XXX 10.2.1: Month dd, YYYY</term> | |
| 5065 | + <listitem> | |
| 5066 | + <itemizedlist> | |
| 5067 | + <listitem> | |
| 5068 | + <para> | |
| 5069 | + Bug Fixes | |
| 5070 | + </para> | |
| 5071 | + <itemizedlist> | |
| 5072 | + <listitem> | |
| 5073 | + <para> | |
| 5074 | + When <function>QPDF::replaceObject</function> or | |
| 5075 | + <function>QPDF::swapObjects</function> is called, existing | |
| 5076 | + <classname>QPDFObjectHandle</classname> instances no longer | |
| 5077 | + point to the old objects. The next time they are | |
| 5078 | + accessed, they automatically notice the change to the | |
| 5079 | + underlying object and update themselves. This resolves a | |
| 5080 | + very longstanding source of confusion, albeit in a very | |
| 5081 | + rarely used method call. | |
| 5082 | + </para> | |
| 5083 | + </listitem> | |
| 5084 | + </itemizedlist> | |
| 5085 | + </listitem> | |
| 5086 | + </itemizedlist> | |
| 5087 | + </listitem> | |
| 5088 | + </varlistentry> | |
| 5089 | + <varlistentry> | |
| 5064 | 5090 | <term>10.2.0: February 23, 2021</term> |
| 5065 | 5091 | <listitem> |
| 5066 | 5092 | <itemizedlist> | ... | ... |
qpdf/qtest/qpdf/test14-in.pdf
| ... | ... | @@ -65,6 +65,7 @@ endobj |
| 65 | 65 | 612 |
| 66 | 66 | 792 |
| 67 | 67 | ] |
| 68 | + /OrigPage 2 | |
| 68 | 69 | /Parent 4 0 R |
| 69 | 70 | /Resources << |
| 70 | 71 | /Font << |
| ... | ... | @@ -86,6 +87,7 @@ endobj |
| 86 | 87 | 612 |
| 87 | 88 | 792 |
| 88 | 89 | ] |
| 90 | + /OrigPage 3 | |
| 89 | 91 | /Parent 4 0 R |
| 90 | 92 | /Resources << |
| 91 | 93 | /Font << |
| ... | ... | @@ -237,21 +239,21 @@ xref |
| 237 | 239 | 0000000140 00000 n |
| 238 | 240 | 0000000252 00000 n |
| 239 | 241 | 0000000456 00000 n |
| 240 | -0000000661 00000 n | |
| 241 | -0000000866 00000 n | |
| 242 | -0000001084 00000 n | |
| 243 | -0000001186 00000 n | |
| 244 | -0000001206 00000 n | |
| 245 | -0000001325 00000 n | |
| 246 | -0000001384 00000 n | |
| 247 | -0000001487 00000 n | |
| 248 | -0000001507 00000 n | |
| 249 | -0000001566 00000 n | |
| 250 | -0000001669 00000 n | |
| 251 | -0000001689 00000 n | |
| 252 | -0000001748 00000 n | |
| 253 | -0000001851 00000 n | |
| 254 | -0000001871 00000 n | |
| 242 | +0000000675 00000 n | |
| 243 | +0000000894 00000 n | |
| 244 | +0000001112 00000 n | |
| 245 | +0000001214 00000 n | |
| 246 | +0000001234 00000 n | |
| 247 | +0000001353 00000 n | |
| 248 | +0000001412 00000 n | |
| 249 | +0000001515 00000 n | |
| 250 | +0000001535 00000 n | |
| 251 | +0000001594 00000 n | |
| 252 | +0000001697 00000 n | |
| 253 | +0000001717 00000 n | |
| 254 | +0000001776 00000 n | |
| 255 | +0000001879 00000 n | |
| 256 | +0000001899 00000 n | |
| 255 | 257 | trailer << |
| 256 | 258 | /QArray 2 0 R |
| 257 | 259 | /QDict 3 0 R |
| ... | ... | @@ -260,5 +262,5 @@ trailer << |
| 260 | 262 | /ID [<20eb74876a3e8212c1b4fd43153860b0><1bb7a926da191c58f675435d77997d21>] |
| 261 | 263 | >> |
| 262 | 264 | startxref |
| 263 | -1907 | |
| 265 | +1935 | |
| 264 | 266 | %%EOF | ... | ... |
qpdf/qtest/qpdf/test14-out.pdf
| ... | ... | @@ -16,10 +16,10 @@ endobj |
| 16 | 16 | << /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >> |
| 17 | 17 | endobj |
| 18 | 18 | 6 0 obj |
| 19 | -<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >> | |
| 19 | +<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 3 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >> | |
| 20 | 20 | endobj |
| 21 | 21 | 7 0 obj |
| 22 | -<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >> | |
| 22 | +<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 2 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >> | |
| 23 | 23 | endobj |
| 24 | 24 | 8 0 obj |
| 25 | 25 | << /Contents 16 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 17 0 R >> /Type /Page >> |
| ... | ... | @@ -88,18 +88,18 @@ xref |
| 88 | 88 | 0000000122 00000 n |
| 89 | 89 | 0000000199 00000 n |
| 90 | 90 | 0000000344 00000 n |
| 91 | -0000000490 00000 n | |
| 92 | -0000000636 00000 n | |
| 93 | -0000000782 00000 n | |
| 94 | -0000000877 00000 n | |
| 95 | -0000000985 00000 n | |
| 96 | -0000001016 00000 n | |
| 97 | -0000001112 00000 n | |
| 98 | -0000001143 00000 n | |
| 99 | -0000001239 00000 n | |
| 100 | -0000001270 00000 n | |
| 101 | -0000001366 00000 n | |
| 91 | +0000000502 00000 n | |
| 92 | +0000000660 00000 n | |
| 93 | +0000000806 00000 n | |
| 94 | +0000000901 00000 n | |
| 95 | +0000001009 00000 n | |
| 96 | +0000001040 00000 n | |
| 97 | +0000001136 00000 n | |
| 98 | +0000001167 00000 n | |
| 99 | +0000001263 00000 n | |
| 100 | +0000001294 00000 n | |
| 101 | +0000001390 00000 n | |
| 102 | 102 | trailer << /QArray 2 0 R /QDict 3 0 R /Root 1 0 R /Size 18 /ID [<20eb74876a3e8212c1b4fd43153860b0><31415926535897932384626433832795>] >> |
| 103 | 103 | startxref |
| 104 | -1397 | |
| 104 | +1421 | |
| 105 | 105 | %%EOF | ... | ... |
qpdf/qtest/qpdf/test14.out
qpdf/test_driver.cc
| ... | ... | @@ -740,7 +740,13 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 740 | 740 | " not called 4-page file"); |
| 741 | 741 | } |
| 742 | 742 | // Swap pages 2 and 3 |
| 743 | - pdf.swapObjects(pages.at(1).getObjGen(), pages.at(2).getObjGen()); | |
| 743 | + auto orig_page2 = pages.at(1); | |
| 744 | + auto orig_page3 = pages.at(2); | |
| 745 | + assert(orig_page2.getKey("/OrigPage").getIntValue() == 2); | |
| 746 | + assert(orig_page3.getKey("/OrigPage").getIntValue() == 3); | |
| 747 | + pdf.swapObjects(orig_page2.getObjGen(), orig_page3.getObjGen()); | |
| 748 | + assert(orig_page2.getKey("/OrigPage").getIntValue() == 3); | |
| 749 | + assert(orig_page3.getKey("/OrigPage").getIntValue() == 2); | |
| 744 | 750 | // Replace object and swap objects |
| 745 | 751 | QPDFObjectHandle trailer = pdf.getTrailer(); |
| 746 | 752 | QPDFObjectHandle qdict = trailer.getKey("/QDict"); |
| ... | ... | @@ -759,18 +765,18 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 759 | 765 | std::cout << "caught logic error as expected" << std::endl; |
| 760 | 766 | } |
| 761 | 767 | pdf.replaceObject(qdict.getObjGen(), new_dict); |
| 762 | - // Now qdict still points to the old dictionary | |
| 763 | - std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue() | |
| 768 | + // Now qdict points to the new dictionary | |
| 769 | + std::cout << "old dict: " << qdict.getKey("/NewDict").getIntValue() | |
| 764 | 770 | << std::endl; |
| 765 | 771 | // Swap dict and array |
| 766 | 772 | pdf.swapObjects(qdict.getObjGen(), qarray.getObjGen()); |
| 767 | - // Now qarray will resolve to new object but qdict is still | |
| 768 | - // the old object | |
| 769 | - std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue() | |
| 773 | + // Now qarray will resolve to new object and qdict resolves to | |
| 774 | + // the array | |
| 775 | + std::cout << "swapped array: " << qdict.getArrayItem(0).getName() | |
| 770 | 776 | << std::endl; |
| 771 | 777 | std::cout << "new dict: " << qarray.getKey("/NewDict").getIntValue() |
| 772 | 778 | << std::endl; |
| 773 | - // Reread qdict, now pointing to an array | |
| 779 | + // Reread qdict, still pointing to an array | |
| 774 | 780 | qdict = pdf.getObjectByObjGen(qdict.getObjGen()); |
| 775 | 781 | std::cout << "swapped array: " << qdict.getArrayItem(0).getName() |
| 776 | 782 | << std::endl; | ... | ... |