Commit d24a120c7ffb4cbfd2dcebe63577d8704442f7bd
1 parent
6b15579a
Add QPDF::setImmediateCopyFrom
Showing
6 changed files
with
154 additions
and
43 deletions
ChangeLog
| 1 | +2019-01-10 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Add new method QPDF::setImmediateCopyFrom. When called on a | |
| 4 | + source QPDF object, streams can be copied FROM that object to | |
| 5 | + other ones without having to keep the source QPDF or its input | |
| 6 | + source around. The cost is copying the streams into RAM. See | |
| 7 | + comments in QPDF.hh for setImmediateCopyFrom for a detailed | |
| 8 | + explanation. | |
| 9 | + | |
| 1 | 10 | 2019-01-07 Jay Berkenbilt <ejb@ql.org> |
| 2 | 11 | |
| 3 | 12 | * 8.3.0: release | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -160,6 +160,39 @@ class QPDF |
| 160 | 160 | QPDF_DLL |
| 161 | 161 | void setAttemptRecovery(bool); |
| 162 | 162 | |
| 163 | + // Tell other QPDF objects that streams copied from this QPDF need | |
| 164 | + // to be fully copied when copyForeignObject is called on them. | |
| 165 | + // Calling setIgnoreXRefStreams(true) on a QPDF object makes it | |
| 166 | + // possible for the object and its input source to disappear | |
| 167 | + // before streams copied from it are written with the destination | |
| 168 | + // QPDF object. Confused? Ordinarily, if you are going to copy | |
| 169 | + // objects from a source QPDF object to a destination QPDF object | |
| 170 | + // using copyForeignObject or addPage, the source object's input | |
| 171 | + // source must stick around until after the destination PDF is | |
| 172 | + // written. If you call this method on the source QPDF object, it | |
| 173 | + // sends a signal to the destination object that it must fully | |
| 174 | + // copy the stream data when copyForeignObject. It will do this by | |
| 175 | + // making a copy in RAM. Ordinarily the stream data is copied | |
| 176 | + // lazily to avoid unnecessary duplication of the stream data. | |
| 177 | + // Note that the stream data is copied into RAM only once | |
| 178 | + // regardless of how many objects the stream is copied into. The | |
| 179 | + // result is that, if you called setImmediateCopyFrom(true) on a | |
| 180 | + // given QPDF object prior to copying any of its streams, you do | |
| 181 | + // not need to keep it or its input source around after copying | |
| 182 | + // its objects to another QPDF. This is true even if the source | |
| 183 | + // streams use StreamDataProvider. Note that this method is called | |
| 184 | + // on the QPDF object you are copying FROM, not the one you are | |
| 185 | + // copying to. The reasoning for this is that there's no reason a | |
| 186 | + // given QPDF may not get objects copied to it from a variety of | |
| 187 | + // other objects, some transient and some not. Since what's | |
| 188 | + // relevant is whether the source QPDF is transient, the method | |
| 189 | + // must be called on the source QPDF, not the destination one. | |
| 190 | + // Since this method will make a copy of the stream in RAM, so be | |
| 191 | + // sure you have enough memory to simultaneously hold all the | |
| 192 | + // streams you're copying. | |
| 193 | + QPDF_DLL | |
| 194 | + void setImmediateCopyFrom(bool); | |
| 195 | + | |
| 163 | 196 | // Other public methods |
| 164 | 197 | |
| 165 | 198 | // Return the list of warnings that have been issued so far and |
| ... | ... | @@ -248,6 +281,13 @@ class QPDF |
| 248 | 281 | // original stream's QPDF object must stick around because the |
| 249 | 282 | // QPDF object is itself the source of the original stream data. |
| 250 | 283 | // For a more in-depth discussion, please see the TODO file. |
| 284 | + // Starting in 8.3.1, you can call setImmediateCopyFrom(true) on | |
| 285 | + // the SOURCE QPDF object (the one you're copying FROM). If you do | |
| 286 | + // this prior to copying any of its objects, then neither the | |
| 287 | + // source QPDF object nor its input source needs to stick around | |
| 288 | + // at all regardless of the source. The cost is that the stream | |
| 289 | + // data is copied into RAM at the time copyForeignObject is | |
| 290 | + // called. See setImmediateCopyFrom for more information. | |
| 251 | 291 | // |
| 252 | 292 | // The return value of this method is an indirect reference to the |
| 253 | 293 | // copied object in this file. This method is intended to be used |
| ... | ... | @@ -1283,6 +1323,7 @@ class QPDF |
| 1283 | 1323 | std::set<QPDFObjGen> attachment_streams; |
| 1284 | 1324 | bool reconstructed_xref; |
| 1285 | 1325 | bool fixed_dangling_refs; |
| 1326 | + bool immediate_copy_from; | |
| 1286 | 1327 | |
| 1287 | 1328 | // Linearization data |
| 1288 | 1329 | qpdf_offset_t first_xref_item_offset; // actual value from file | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -147,6 +147,7 @@ QPDF::Members::Members() : |
| 147 | 147 | copied_stream_data_provider(0), |
| 148 | 148 | reconstructed_xref(false), |
| 149 | 149 | fixed_dangling_refs(false), |
| 150 | + immediate_copy_from(false), | |
| 150 | 151 | first_xref_item_offset(0), |
| 151 | 152 | uncompressed_after_compressed(false) |
| 152 | 153 | { |
| ... | ... | @@ -269,6 +270,12 @@ QPDF::setAttemptRecovery(bool val) |
| 269 | 270 | this->m->attempt_recovery = val; |
| 270 | 271 | } |
| 271 | 272 | |
| 273 | +void | |
| 274 | +QPDF::setImmediateCopyFrom(bool val) | |
| 275 | +{ | |
| 276 | + this->m->immediate_copy_from = val; | |
| 277 | +} | |
| 278 | + | |
| 272 | 279 | std::vector<QPDFExc> |
| 273 | 280 | QPDF::getWarnings() |
| 274 | 281 | { |
| ... | ... | @@ -2376,6 +2383,19 @@ QPDF::replaceForeignIndirectObjects( |
| 2376 | 2383 | } |
| 2377 | 2384 | PointerHolder<Buffer> stream_buffer = |
| 2378 | 2385 | stream->getStreamDataBuffer(); |
| 2386 | + if ((foreign_stream_qpdf->m->immediate_copy_from) && | |
| 2387 | + (stream_buffer.getPointer() == 0)) | |
| 2388 | + { | |
| 2389 | + // Pull the stream data into a buffer before attempting | |
| 2390 | + // the copy operation. Do it on the source stream so that | |
| 2391 | + // if the source stream is copied multiple times, we don't | |
| 2392 | + // have to keep duplicating the memory. | |
| 2393 | + QTC::TC("qpdf", "QPDF immediate copy stream data"); | |
| 2394 | + foreign.replaceStreamData(foreign.getRawStreamData(), | |
| 2395 | + dict.getKey("/Filter"), | |
| 2396 | + dict.getKey("/DecodeParms")); | |
| 2397 | + stream_buffer = stream->getStreamDataBuffer(); | |
| 2398 | + } | |
| 2379 | 2399 | PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider = |
| 2380 | 2400 | stream->getStreamDataProvider(); |
| 2381 | 2401 | if (stream_buffer.getPointer()) | ... | ... |
qpdf/qpdf.testcov
qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf
| 1 | 1 | %PDF-1.3 |
| 2 | 2 | %ยฟรทยขรพ |
| 3 | 3 | 1 0 obj |
| 4 | -<< /Pages 5 0 R /Type /Catalog >> | |
| 4 | +<< /Pages 6 0 R /Type /Catalog >> | |
| 5 | 5 | endobj |
| 6 | 6 | 2 0 obj |
| 7 | -<< /O1 6 0 R /O2 7 0 R /O3 8 0 R /This-is-QTest true >> | |
| 7 | +<< /O1 7 0 R /O2 8 0 R /O3 9 0 R /This-is-QTest true >> | |
| 8 | 8 | endobj |
| 9 | 9 | 3 0 obj |
| 10 | 10 | << /Length 20 >> |
| ... | ... | @@ -19,39 +19,45 @@ potato |
| 19 | 19 | endstream |
| 20 | 20 | endobj |
| 21 | 21 | 5 0 obj |
| 22 | -<< /Count 3 /Kids [ 9 0 R 10 0 R 8 0 R ] /Type /Pages >> | |
| 22 | +<< /Length 21 >> | |
| 23 | +stream | |
| 24 | +more data for stream | |
| 25 | +endstream | |
| 23 | 26 | endobj |
| 24 | 27 | 6 0 obj |
| 25 | -[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 7 0 R >> 2.17828 ] >> /salad /O2 7 0 R /Stream1 11 0 R ] | |
| 28 | +<< /Count 3 /Kids [ 10 0 R 11 0 R 9 0 R ] /Type /Pages >> | |
| 26 | 29 | endobj |
| 27 | 30 | 7 0 obj |
| 28 | -<< /K1 [ 2.236 /O1 6 0 R 1.732 ] /O1 6 0 R /This-is-O2 true >> | |
| 31 | +[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 8 0 R >> 2.17828 ] >> /salad /O2 8 0 R /Stream1 12 0 R ] | |
| 29 | 32 | endobj |
| 30 | 33 | 8 0 obj |
| 31 | -<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 10 0 R /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >> | |
| 34 | +<< /K1 [ 2.236 /O1 7 0 R 1.732 ] /O1 7 0 R /This-is-O2 true >> | |
| 32 | 35 | endobj |
| 33 | 36 | 9 0 obj |
| 34 | -<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 15 0 R >> /ProcSet 16 0 R >> /Type /Page >> | |
| 37 | +<< /Contents 13 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 11 0 R /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >> | |
| 35 | 38 | endobj |
| 36 | 39 | 10 0 obj |
| 37 | -<< /Contents 17 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >> | |
| 40 | +<< /Contents 15 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 16 0 R >> /ProcSet 17 0 R >> /Type /Page >> | |
| 38 | 41 | endobj |
| 39 | 42 | 11 0 obj |
| 40 | -<< /Stream2 18 0 R /This-is-Stream1 true /Length 18 >> | |
| 43 | +<< /Contents 18 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >> | |
| 44 | +endobj | |
| 45 | +12 0 obj | |
| 46 | +<< /Stream2 19 0 R /This-is-Stream1 true /Length 18 >> | |
| 41 | 47 | stream |
| 42 | 48 | This is stream 1. |
| 43 | 49 | endstream |
| 44 | 50 | endobj |
| 45 | -12 0 obj | |
| 51 | +13 0 obj | |
| 46 | 52 | << /Length 47 >> |
| 47 | 53 | stream |
| 48 | 54 | BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET |
| 49 | 55 | endstream |
| 50 | 56 | endobj |
| 51 | -13 0 obj | |
| 57 | +14 0 obj | |
| 52 | 58 | << /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> |
| 53 | 59 | endobj |
| 54 | -14 0 obj | |
| 60 | +15 0 obj | |
| 55 | 61 | << /Length 44 >> |
| 56 | 62 | stream |
| 57 | 63 | BT |
| ... | ... | @@ -61,46 +67,47 @@ BT |
| 61 | 67 | ET |
| 62 | 68 | endstream |
| 63 | 69 | endobj |
| 64 | -15 0 obj | |
| 70 | +16 0 obj | |
| 65 | 71 | << /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> |
| 66 | 72 | endobj |
| 67 | -16 0 obj | |
| 73 | +17 0 obj | |
| 68 | 74 | [ /PDF /Text ] |
| 69 | 75 | endobj |
| 70 | -17 0 obj | |
| 76 | +18 0 obj | |
| 71 | 77 | << /Length 47 >> |
| 72 | 78 | stream |
| 73 | 79 | BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET |
| 74 | 80 | endstream |
| 75 | 81 | endobj |
| 76 | -18 0 obj | |
| 77 | -<< /Stream1 11 0 R /This-is-Stream2 true /Length 18 >> | |
| 82 | +19 0 obj | |
| 83 | +<< /Stream1 12 0 R /This-is-Stream2 true /Length 18 >> | |
| 78 | 84 | stream |
| 79 | 85 | This is stream 2. |
| 80 | 86 | endstream |
| 81 | 87 | endobj |
| 82 | 88 | xref |
| 83 | -0 19 | |
| 89 | +0 20 | |
| 84 | 90 | 0000000000 65535 f |
| 85 | 91 | 0000000015 00000 n |
| 86 | 92 | 0000000064 00000 n |
| 87 | 93 | 0000000135 00000 n |
| 88 | 94 | 0000000204 00000 n |
| 89 | 95 | 0000000259 00000 n |
| 90 | -0000000331 00000 n | |
| 91 | -0000000449 00000 n | |
| 92 | -0000000527 00000 n | |
| 93 | -0000000728 00000 n | |
| 94 | -0000000874 00000 n | |
| 95 | -0000001069 00000 n | |
| 96 | -0000001175 00000 n | |
| 97 | -0000001272 00000 n | |
| 98 | -0000001372 00000 n | |
| 99 | -0000001466 00000 n | |
| 100 | -0000001574 00000 n | |
| 101 | -0000001605 00000 n | |
| 102 | -0000001702 00000 n | |
| 103 | -trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R ] /Root 1 0 R /Size 19 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> | |
| 96 | +0000000329 00000 n | |
| 97 | +0000000402 00000 n | |
| 98 | +0000000520 00000 n | |
| 99 | +0000000598 00000 n | |
| 100 | +0000000799 00000 n | |
| 101 | +0000000946 00000 n | |
| 102 | +0000001141 00000 n | |
| 103 | +0000001247 00000 n | |
| 104 | +0000001344 00000 n | |
| 105 | +0000001444 00000 n | |
| 106 | +0000001538 00000 n | |
| 107 | +0000001646 00000 n | |
| 108 | +0000001677 00000 n | |
| 109 | +0000001774 00000 n | |
| 110 | +trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R 5 0 R ] /Root 1 0 R /Size 20 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> | |
| 104 | 111 | startxref |
| 105 | -1808 | |
| 112 | +1880 | |
| 106 | 113 | %%EOF | ... | ... |
qpdf/test_driver.cc
| ... | ... | @@ -1130,25 +1130,56 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 1130 | 1130 | // Should get qtest plus only the O3 page and the page that O3 |
| 1131 | 1131 | // points to. Inherited objects should be preserved. This test |
| 1132 | 1132 | // also exercises copying from a stream that has a buffer and |
| 1133 | - // a provider, including copying a provider multiple times. | |
| 1133 | + // a provider, including copying a provider multiple times. We | |
| 1134 | + // also exercise setImmediateCopyFrom. | |
| 1134 | 1135 | |
| 1135 | - Pl_Buffer p1("buffer"); | |
| 1136 | - p1.write(QUtil::unsigned_char_pointer("new data for stream\n"), | |
| 1137 | - 20); // no null! | |
| 1138 | - p1.finish(); | |
| 1139 | - PointerHolder<Buffer> b = p1.getBuffer(); | |
| 1140 | - Provider* provider = new Provider(b); | |
| 1141 | - PointerHolder<QPDFObjectHandle::StreamDataProvider> p = provider; | |
| 1136 | + // Create a provider. The provider stays in scope. | |
| 1137 | + PointerHolder<QPDFObjectHandle::StreamDataProvider> p1; | |
| 1138 | + { | |
| 1139 | + // Local scope | |
| 1140 | + Pl_Buffer pl("buffer"); | |
| 1141 | + pl.write(QUtil::unsigned_char_pointer("new data for stream\n"), | |
| 1142 | + 20); // no null! | |
| 1143 | + pl.finish(); | |
| 1144 | + PointerHolder<Buffer> b = pl.getBuffer(); | |
| 1145 | + Provider* provider = new Provider(b); | |
| 1146 | + p1 = provider; | |
| 1147 | + } | |
| 1148 | + // Create a stream that uses a provider in empty1 and copy it | |
| 1149 | + // to empty2. It is copied from empty2 to the final pdf. | |
| 1142 | 1150 | QPDF empty1; |
| 1143 | 1151 | empty1.emptyPDF(); |
| 1144 | 1152 | QPDFObjectHandle s1 = QPDFObjectHandle::newStream(&empty1); |
| 1145 | 1153 | s1.replaceStreamData( |
| 1146 | - p, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 1154 | + p1, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 1147 | 1155 | QPDF empty2; |
| 1148 | 1156 | empty2.emptyPDF(); |
| 1149 | 1157 | s1 = empty2.copyForeignObject(s1); |
| 1150 | 1158 | { |
| 1151 | - // Make sure original PDF is out of scope when we write. | |
| 1159 | + // Make sure some source PDFs are out of scope when we | |
| 1160 | + // write. | |
| 1161 | + | |
| 1162 | + PointerHolder<QPDFObjectHandle::StreamDataProvider> p2; | |
| 1163 | + // Create another provider. This one will go out of scope | |
| 1164 | + // along with its containing qpdf, which has | |
| 1165 | + // setImmediateCopyFrom(true). | |
| 1166 | + { | |
| 1167 | + // Local scope | |
| 1168 | + Pl_Buffer pl("buffer"); | |
| 1169 | + pl.write(QUtil::unsigned_char_pointer( | |
| 1170 | + "more data for stream\n"), | |
| 1171 | + 21); // no null! | |
| 1172 | + pl.finish(); | |
| 1173 | + PointerHolder<Buffer> b = pl.getBuffer(); | |
| 1174 | + Provider* provider = new Provider(b); | |
| 1175 | + p2 = provider; | |
| 1176 | + } | |
| 1177 | + QPDF empty3; | |
| 1178 | + empty3.emptyPDF(); | |
| 1179 | + empty3.setImmediateCopyFrom(true); | |
| 1180 | + QPDFObjectHandle s3 = QPDFObjectHandle::newStream(&empty3); | |
| 1181 | + s3.replaceStreamData( | |
| 1182 | + p2, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 1152 | 1183 | assert(arg2 != 0); |
| 1153 | 1184 | QPDF oldpdf; |
| 1154 | 1185 | oldpdf.processFile(arg2); |
| ... | ... | @@ -1167,6 +1198,8 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 1167 | 1198 | pdf.copyForeignObject(s1)); |
| 1168 | 1199 | pdf.getTrailer().getKey("/QTest2").appendItem( |
| 1169 | 1200 | pdf.copyForeignObject(s2)); |
| 1201 | + pdf.getTrailer().getKey("/QTest2").appendItem( | |
| 1202 | + pdf.copyForeignObject(s3)); | |
| 1170 | 1203 | } |
| 1171 | 1204 | |
| 1172 | 1205 | QPDFWriter w(pdf, "a.pdf"); | ... | ... |