Commit d24a120c7ffb4cbfd2dcebe63577d8704442f7bd

Authored by Jay Berkenbilt
1 parent 6b15579a

Add QPDF::setImmediateCopyFrom

ChangeLog
  1 +2019-01-10 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add new method QPDF::setImmediateCopyFrom. When called on a
  4 + source QPDF object, streams can be copied FROM that object to
  5 + other ones without having to keep the source QPDF or its input
  6 + source around. The cost is copying the streams into RAM. See
  7 + comments in QPDF.hh for setImmediateCopyFrom for a detailed
  8 + explanation.
  9 +
1 2019-01-07 Jay Berkenbilt <ejb@ql.org> 10 2019-01-07 Jay Berkenbilt <ejb@ql.org>
2 11
3 * 8.3.0: release 12 * 8.3.0: release
include/qpdf/QPDF.hh
@@ -160,6 +160,39 @@ class QPDF @@ -160,6 +160,39 @@ class QPDF
160 QPDF_DLL 160 QPDF_DLL
161 void setAttemptRecovery(bool); 161 void setAttemptRecovery(bool);
162 162
  163 + // Tell other QPDF objects that streams copied from this QPDF need
  164 + // to be fully copied when copyForeignObject is called on them.
  165 + // Calling setIgnoreXRefStreams(true) on a QPDF object makes it
  166 + // possible for the object and its input source to disappear
  167 + // before streams copied from it are written with the destination
  168 + // QPDF object. Confused? Ordinarily, if you are going to copy
  169 + // objects from a source QPDF object to a destination QPDF object
  170 + // using copyForeignObject or addPage, the source object's input
  171 + // source must stick around until after the destination PDF is
  172 + // written. If you call this method on the source QPDF object, it
  173 + // sends a signal to the destination object that it must fully
  174 + // copy the stream data when copyForeignObject. It will do this by
  175 + // making a copy in RAM. Ordinarily the stream data is copied
  176 + // lazily to avoid unnecessary duplication of the stream data.
  177 + // Note that the stream data is copied into RAM only once
  178 + // regardless of how many objects the stream is copied into. The
  179 + // result is that, if you called setImmediateCopyFrom(true) on a
  180 + // given QPDF object prior to copying any of its streams, you do
  181 + // not need to keep it or its input source around after copying
  182 + // its objects to another QPDF. This is true even if the source
  183 + // streams use StreamDataProvider. Note that this method is called
  184 + // on the QPDF object you are copying FROM, not the one you are
  185 + // copying to. The reasoning for this is that there's no reason a
  186 + // given QPDF may not get objects copied to it from a variety of
  187 + // other objects, some transient and some not. Since what's
  188 + // relevant is whether the source QPDF is transient, the method
  189 + // must be called on the source QPDF, not the destination one.
  190 + // Since this method will make a copy of the stream in RAM, so be
  191 + // sure you have enough memory to simultaneously hold all the
  192 + // streams you're copying.
  193 + QPDF_DLL
  194 + void setImmediateCopyFrom(bool);
  195 +
163 // Other public methods 196 // Other public methods
164 197
165 // Return the list of warnings that have been issued so far and 198 // Return the list of warnings that have been issued so far and
@@ -248,6 +281,13 @@ class QPDF @@ -248,6 +281,13 @@ class QPDF
248 // original stream's QPDF object must stick around because the 281 // original stream's QPDF object must stick around because the
249 // QPDF object is itself the source of the original stream data. 282 // QPDF object is itself the source of the original stream data.
250 // For a more in-depth discussion, please see the TODO file. 283 // For a more in-depth discussion, please see the TODO file.
  284 + // Starting in 8.3.1, you can call setImmediateCopyFrom(true) on
  285 + // the SOURCE QPDF object (the one you're copying FROM). If you do
  286 + // this prior to copying any of its objects, then neither the
  287 + // source QPDF object nor its input source needs to stick around
  288 + // at all regardless of the source. The cost is that the stream
  289 + // data is copied into RAM at the time copyForeignObject is
  290 + // called. See setImmediateCopyFrom for more information.
251 // 291 //
252 // The return value of this method is an indirect reference to the 292 // The return value of this method is an indirect reference to the
253 // copied object in this file. This method is intended to be used 293 // copied object in this file. This method is intended to be used
@@ -1283,6 +1323,7 @@ class QPDF @@ -1283,6 +1323,7 @@ class QPDF
1283 std::set<QPDFObjGen> attachment_streams; 1323 std::set<QPDFObjGen> attachment_streams;
1284 bool reconstructed_xref; 1324 bool reconstructed_xref;
1285 bool fixed_dangling_refs; 1325 bool fixed_dangling_refs;
  1326 + bool immediate_copy_from;
1286 1327
1287 // Linearization data 1328 // Linearization data
1288 qpdf_offset_t first_xref_item_offset; // actual value from file 1329 qpdf_offset_t first_xref_item_offset; // actual value from file
libqpdf/QPDF.cc
@@ -147,6 +147,7 @@ QPDF::Members::Members() : @@ -147,6 +147,7 @@ QPDF::Members::Members() :
147 copied_stream_data_provider(0), 147 copied_stream_data_provider(0),
148 reconstructed_xref(false), 148 reconstructed_xref(false),
149 fixed_dangling_refs(false), 149 fixed_dangling_refs(false),
  150 + immediate_copy_from(false),
150 first_xref_item_offset(0), 151 first_xref_item_offset(0),
151 uncompressed_after_compressed(false) 152 uncompressed_after_compressed(false)
152 { 153 {
@@ -269,6 +270,12 @@ QPDF::setAttemptRecovery(bool val) @@ -269,6 +270,12 @@ QPDF::setAttemptRecovery(bool val)
269 this->m->attempt_recovery = val; 270 this->m->attempt_recovery = val;
270 } 271 }
271 272
  273 +void
  274 +QPDF::setImmediateCopyFrom(bool val)
  275 +{
  276 + this->m->immediate_copy_from = val;
  277 +}
  278 +
272 std::vector<QPDFExc> 279 std::vector<QPDFExc>
273 QPDF::getWarnings() 280 QPDF::getWarnings()
274 { 281 {
@@ -2376,6 +2383,19 @@ QPDF::replaceForeignIndirectObjects( @@ -2376,6 +2383,19 @@ QPDF::replaceForeignIndirectObjects(
2376 } 2383 }
2377 PointerHolder<Buffer> stream_buffer = 2384 PointerHolder<Buffer> stream_buffer =
2378 stream->getStreamDataBuffer(); 2385 stream->getStreamDataBuffer();
  2386 + if ((foreign_stream_qpdf->m->immediate_copy_from) &&
  2387 + (stream_buffer.getPointer() == 0))
  2388 + {
  2389 + // Pull the stream data into a buffer before attempting
  2390 + // the copy operation. Do it on the source stream so that
  2391 + // if the source stream is copied multiple times, we don't
  2392 + // have to keep duplicating the memory.
  2393 + QTC::TC("qpdf", "QPDF immediate copy stream data");
  2394 + foreign.replaceStreamData(foreign.getRawStreamData(),
  2395 + dict.getKey("/Filter"),
  2396 + dict.getKey("/DecodeParms"));
  2397 + stream_buffer = stream->getStreamDataBuffer();
  2398 + }
2379 PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider = 2399 PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider =
2380 stream->getStreamDataProvider(); 2400 stream->getStreamDataProvider();
2381 if (stream_buffer.getPointer()) 2401 if (stream_buffer.getPointer())
qpdf/qpdf.testcov
@@ -410,3 +410,4 @@ QPDF_encryption attachment stream 0 @@ -410,3 +410,4 @@ QPDF_encryption attachment stream 0
410 QPDF pipe foreign encrypted stream 0 410 QPDF pipe foreign encrypted stream 0
411 QPDF copy foreign stream with provider 0 411 QPDF copy foreign stream with provider 0
412 QPDF copy foreign stream with buffer 0 412 QPDF copy foreign stream with buffer 0
  413 +QPDF immediate copy stream data 0
qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf
1 %PDF-1.3 1 %PDF-1.3
2 %ยฟรทยขรพ 2 %ยฟรทยขรพ
3 1 0 obj 3 1 0 obj
4 -<< /Pages 5 0 R /Type /Catalog >> 4 +<< /Pages 6 0 R /Type /Catalog >>
5 endobj 5 endobj
6 2 0 obj 6 2 0 obj
7 -<< /O1 6 0 R /O2 7 0 R /O3 8 0 R /This-is-QTest true >> 7 +<< /O1 7 0 R /O2 8 0 R /O3 9 0 R /This-is-QTest true >>
8 endobj 8 endobj
9 3 0 obj 9 3 0 obj
10 << /Length 20 >> 10 << /Length 20 >>
@@ -19,39 +19,45 @@ potato @@ -19,39 +19,45 @@ potato
19 endstream 19 endstream
20 endobj 20 endobj
21 5 0 obj 21 5 0 obj
22 -<< /Count 3 /Kids [ 9 0 R 10 0 R 8 0 R ] /Type /Pages >> 22 +<< /Length 21 >>
  23 +stream
  24 +more data for stream
  25 +endstream
23 endobj 26 endobj
24 6 0 obj 27 6 0 obj
25 -[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 7 0 R >> 2.17828 ] >> /salad /O2 7 0 R /Stream1 11 0 R ] 28 +<< /Count 3 /Kids [ 10 0 R 11 0 R 9 0 R ] /Type /Pages >>
26 endobj 29 endobj
27 7 0 obj 30 7 0 obj
28 -<< /K1 [ 2.236 /O1 6 0 R 1.732 ] /O1 6 0 R /This-is-O2 true >> 31 +[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 8 0 R >> 2.17828 ] >> /salad /O2 8 0 R /Stream1 12 0 R ]
29 endobj 32 endobj
30 8 0 obj 33 8 0 obj
31 -<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 10 0 R /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >> 34 +<< /K1 [ 2.236 /O1 7 0 R 1.732 ] /O1 7 0 R /This-is-O2 true >>
32 endobj 35 endobj
33 9 0 obj 36 9 0 obj
34 -<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 15 0 R >> /ProcSet 16 0 R >> /Type /Page >> 37 +<< /Contents 13 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 11 0 R /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
35 endobj 38 endobj
36 10 0 obj 39 10 0 obj
37 -<< /Contents 17 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >> 40 +<< /Contents 15 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 16 0 R >> /ProcSet 17 0 R >> /Type /Page >>
38 endobj 41 endobj
39 11 0 obj 42 11 0 obj
40 -<< /Stream2 18 0 R /This-is-Stream1 true /Length 18 >> 43 +<< /Contents 18 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >>
  44 +endobj
  45 +12 0 obj
  46 +<< /Stream2 19 0 R /This-is-Stream1 true /Length 18 >>
41 stream 47 stream
42 This is stream 1. 48 This is stream 1.
43 endstream 49 endstream
44 endobj 50 endobj
45 -12 0 obj 51 +13 0 obj
46 << /Length 47 >> 52 << /Length 47 >>
47 stream 53 stream
48 BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET 54 BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET
49 endstream 55 endstream
50 endobj 56 endobj
51 -13 0 obj 57 +14 0 obj
52 << /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> 58 << /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
53 endobj 59 endobj
54 -14 0 obj 60 +15 0 obj
55 << /Length 44 >> 61 << /Length 44 >>
56 stream 62 stream
57 BT 63 BT
@@ -61,46 +67,47 @@ BT @@ -61,46 +67,47 @@ BT
61 ET 67 ET
62 endstream 68 endstream
63 endobj 69 endobj
64 -15 0 obj 70 +16 0 obj
65 << /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> 71 << /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
66 endobj 72 endobj
67 -16 0 obj 73 +17 0 obj
68 [ /PDF /Text ] 74 [ /PDF /Text ]
69 endobj 75 endobj
70 -17 0 obj 76 +18 0 obj
71 << /Length 47 >> 77 << /Length 47 >>
72 stream 78 stream
73 BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET 79 BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET
74 endstream 80 endstream
75 endobj 81 endobj
76 -18 0 obj  
77 -<< /Stream1 11 0 R /This-is-Stream2 true /Length 18 >> 82 +19 0 obj
  83 +<< /Stream1 12 0 R /This-is-Stream2 true /Length 18 >>
78 stream 84 stream
79 This is stream 2. 85 This is stream 2.
80 endstream 86 endstream
81 endobj 87 endobj
82 xref 88 xref
83 -0 19 89 +0 20
84 0000000000 65535 f 90 0000000000 65535 f
85 0000000015 00000 n 91 0000000015 00000 n
86 0000000064 00000 n 92 0000000064 00000 n
87 0000000135 00000 n 93 0000000135 00000 n
88 0000000204 00000 n 94 0000000204 00000 n
89 0000000259 00000 n 95 0000000259 00000 n
90 -0000000331 00000 n  
91 -0000000449 00000 n  
92 -0000000527 00000 n  
93 -0000000728 00000 n  
94 -0000000874 00000 n  
95 -0000001069 00000 n  
96 -0000001175 00000 n  
97 -0000001272 00000 n  
98 -0000001372 00000 n  
99 -0000001466 00000 n  
100 -0000001574 00000 n  
101 -0000001605 00000 n  
102 -0000001702 00000 n  
103 -trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R ] /Root 1 0 R /Size 19 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> 96 +0000000329 00000 n
  97 +0000000402 00000 n
  98 +0000000520 00000 n
  99 +0000000598 00000 n
  100 +0000000799 00000 n
  101 +0000000946 00000 n
  102 +0000001141 00000 n
  103 +0000001247 00000 n
  104 +0000001344 00000 n
  105 +0000001444 00000 n
  106 +0000001538 00000 n
  107 +0000001646 00000 n
  108 +0000001677 00000 n
  109 +0000001774 00000 n
  110 +trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R 5 0 R ] /Root 1 0 R /Size 20 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
104 startxref 111 startxref
105 -1808 112 +1880
106 %%EOF 113 %%EOF
qpdf/test_driver.cc
@@ -1130,25 +1130,56 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -1130,25 +1130,56 @@ void runtest(int n, char const* filename1, char const* arg2)
1130 // Should get qtest plus only the O3 page and the page that O3 1130 // Should get qtest plus only the O3 page and the page that O3
1131 // points to. Inherited objects should be preserved. This test 1131 // points to. Inherited objects should be preserved. This test
1132 // also exercises copying from a stream that has a buffer and 1132 // also exercises copying from a stream that has a buffer and
1133 - // a provider, including copying a provider multiple times. 1133 + // a provider, including copying a provider multiple times. We
  1134 + // also exercise setImmediateCopyFrom.
1134 1135
1135 - Pl_Buffer p1("buffer");  
1136 - p1.write(QUtil::unsigned_char_pointer("new data for stream\n"),  
1137 - 20); // no null!  
1138 - p1.finish();  
1139 - PointerHolder<Buffer> b = p1.getBuffer();  
1140 - Provider* provider = new Provider(b);  
1141 - PointerHolder<QPDFObjectHandle::StreamDataProvider> p = provider; 1136 + // Create a provider. The provider stays in scope.
  1137 + PointerHolder<QPDFObjectHandle::StreamDataProvider> p1;
  1138 + {
  1139 + // Local scope
  1140 + Pl_Buffer pl("buffer");
  1141 + pl.write(QUtil::unsigned_char_pointer("new data for stream\n"),
  1142 + 20); // no null!
  1143 + pl.finish();
  1144 + PointerHolder<Buffer> b = pl.getBuffer();
  1145 + Provider* provider = new Provider(b);
  1146 + p1 = provider;
  1147 + }
  1148 + // Create a stream that uses a provider in empty1 and copy it
  1149 + // to empty2. It is copied from empty2 to the final pdf.
1142 QPDF empty1; 1150 QPDF empty1;
1143 empty1.emptyPDF(); 1151 empty1.emptyPDF();
1144 QPDFObjectHandle s1 = QPDFObjectHandle::newStream(&empty1); 1152 QPDFObjectHandle s1 = QPDFObjectHandle::newStream(&empty1);
1145 s1.replaceStreamData( 1153 s1.replaceStreamData(
1146 - p, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); 1154 + p1, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
1147 QPDF empty2; 1155 QPDF empty2;
1148 empty2.emptyPDF(); 1156 empty2.emptyPDF();
1149 s1 = empty2.copyForeignObject(s1); 1157 s1 = empty2.copyForeignObject(s1);
1150 { 1158 {
1151 - // Make sure original PDF is out of scope when we write. 1159 + // Make sure some source PDFs are out of scope when we
  1160 + // write.
  1161 +
  1162 + PointerHolder<QPDFObjectHandle::StreamDataProvider> p2;
  1163 + // Create another provider. This one will go out of scope
  1164 + // along with its containing qpdf, which has
  1165 + // setImmediateCopyFrom(true).
  1166 + {
  1167 + // Local scope
  1168 + Pl_Buffer pl("buffer");
  1169 + pl.write(QUtil::unsigned_char_pointer(
  1170 + "more data for stream\n"),
  1171 + 21); // no null!
  1172 + pl.finish();
  1173 + PointerHolder<Buffer> b = pl.getBuffer();
  1174 + Provider* provider = new Provider(b);
  1175 + p2 = provider;
  1176 + }
  1177 + QPDF empty3;
  1178 + empty3.emptyPDF();
  1179 + empty3.setImmediateCopyFrom(true);
  1180 + QPDFObjectHandle s3 = QPDFObjectHandle::newStream(&empty3);
  1181 + s3.replaceStreamData(
  1182 + p2, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
1152 assert(arg2 != 0); 1183 assert(arg2 != 0);
1153 QPDF oldpdf; 1184 QPDF oldpdf;
1154 oldpdf.processFile(arg2); 1185 oldpdf.processFile(arg2);
@@ -1167,6 +1198,8 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -1167,6 +1198,8 @@ void runtest(int n, char const* filename1, char const* arg2)
1167 pdf.copyForeignObject(s1)); 1198 pdf.copyForeignObject(s1));
1168 pdf.getTrailer().getKey("/QTest2").appendItem( 1199 pdf.getTrailer().getKey("/QTest2").appendItem(
1169 pdf.copyForeignObject(s2)); 1200 pdf.copyForeignObject(s2));
  1201 + pdf.getTrailer().getKey("/QTest2").appendItem(
  1202 + pdf.copyForeignObject(s3));
1170 } 1203 }
1171 1204
1172 QPDFWriter w(pdf, "a.pdf"); 1205 QPDFWriter w(pdf, "a.pdf");