Commit d24a120c7ffb4cbfd2dcebe63577d8704442f7bd

Authored by Jay Berkenbilt
1 parent 6b15579a

Add QPDF::setImmediateCopyFrom

ChangeLog
  1 +2019-01-10 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add new method QPDF::setImmediateCopyFrom. When called on a
  4 + source QPDF object, streams can be copied FROM that object to
  5 + other ones without having to keep the source QPDF or its input
  6 + source around. The cost is copying the streams into RAM. See
  7 + comments in QPDF.hh for setImmediateCopyFrom for a detailed
  8 + explanation.
  9 +
1 10 2019-01-07 Jay Berkenbilt <ejb@ql.org>
2 11  
3 12 * 8.3.0: release
... ...
include/qpdf/QPDF.hh
... ... @@ -160,6 +160,39 @@ class QPDF
160 160 QPDF_DLL
161 161 void setAttemptRecovery(bool);
162 162  
  163 + // Tell other QPDF objects that streams copied from this QPDF need
  164 + // to be fully copied when copyForeignObject is called on them.
  165 + // Calling setIgnoreXRefStreams(true) on a QPDF object makes it
  166 + // possible for the object and its input source to disappear
  167 + // before streams copied from it are written with the destination
  168 + // QPDF object. Confused? Ordinarily, if you are going to copy
  169 + // objects from a source QPDF object to a destination QPDF object
  170 + // using copyForeignObject or addPage, the source object's input
  171 + // source must stick around until after the destination PDF is
  172 + // written. If you call this method on the source QPDF object, it
  173 + // sends a signal to the destination object that it must fully
  174 + // copy the stream data when copyForeignObject. It will do this by
  175 + // making a copy in RAM. Ordinarily the stream data is copied
  176 + // lazily to avoid unnecessary duplication of the stream data.
  177 + // Note that the stream data is copied into RAM only once
  178 + // regardless of how many objects the stream is copied into. The
  179 + // result is that, if you called setImmediateCopyFrom(true) on a
  180 + // given QPDF object prior to copying any of its streams, you do
  181 + // not need to keep it or its input source around after copying
  182 + // its objects to another QPDF. This is true even if the source
  183 + // streams use StreamDataProvider. Note that this method is called
  184 + // on the QPDF object you are copying FROM, not the one you are
  185 + // copying to. The reasoning for this is that there's no reason a
  186 + // given QPDF may not get objects copied to it from a variety of
  187 + // other objects, some transient and some not. Since what's
  188 + // relevant is whether the source QPDF is transient, the method
  189 + // must be called on the source QPDF, not the destination one.
  190 + // Since this method will make a copy of the stream in RAM, so be
  191 + // sure you have enough memory to simultaneously hold all the
  192 + // streams you're copying.
  193 + QPDF_DLL
  194 + void setImmediateCopyFrom(bool);
  195 +
163 196 // Other public methods
164 197  
165 198 // Return the list of warnings that have been issued so far and
... ... @@ -248,6 +281,13 @@ class QPDF
248 281 // original stream's QPDF object must stick around because the
249 282 // QPDF object is itself the source of the original stream data.
250 283 // For a more in-depth discussion, please see the TODO file.
  284 + // Starting in 8.3.1, you can call setImmediateCopyFrom(true) on
  285 + // the SOURCE QPDF object (the one you're copying FROM). If you do
  286 + // this prior to copying any of its objects, then neither the
  287 + // source QPDF object nor its input source needs to stick around
  288 + // at all regardless of the source. The cost is that the stream
  289 + // data is copied into RAM at the time copyForeignObject is
  290 + // called. See setImmediateCopyFrom for more information.
251 291 //
252 292 // The return value of this method is an indirect reference to the
253 293 // copied object in this file. This method is intended to be used
... ... @@ -1283,6 +1323,7 @@ class QPDF
1283 1323 std::set<QPDFObjGen> attachment_streams;
1284 1324 bool reconstructed_xref;
1285 1325 bool fixed_dangling_refs;
  1326 + bool immediate_copy_from;
1286 1327  
1287 1328 // Linearization data
1288 1329 qpdf_offset_t first_xref_item_offset; // actual value from file
... ...
libqpdf/QPDF.cc
... ... @@ -147,6 +147,7 @@ QPDF::Members::Members() :
147 147 copied_stream_data_provider(0),
148 148 reconstructed_xref(false),
149 149 fixed_dangling_refs(false),
  150 + immediate_copy_from(false),
150 151 first_xref_item_offset(0),
151 152 uncompressed_after_compressed(false)
152 153 {
... ... @@ -269,6 +270,12 @@ QPDF::setAttemptRecovery(bool val)
269 270 this->m->attempt_recovery = val;
270 271 }
271 272  
  273 +void
  274 +QPDF::setImmediateCopyFrom(bool val)
  275 +{
  276 + this->m->immediate_copy_from = val;
  277 +}
  278 +
272 279 std::vector<QPDFExc>
273 280 QPDF::getWarnings()
274 281 {
... ... @@ -2376,6 +2383,19 @@ QPDF::replaceForeignIndirectObjects(
2376 2383 }
2377 2384 PointerHolder<Buffer> stream_buffer =
2378 2385 stream->getStreamDataBuffer();
  2386 + if ((foreign_stream_qpdf->m->immediate_copy_from) &&
  2387 + (stream_buffer.getPointer() == 0))
  2388 + {
  2389 + // Pull the stream data into a buffer before attempting
  2390 + // the copy operation. Do it on the source stream so that
  2391 + // if the source stream is copied multiple times, we don't
  2392 + // have to keep duplicating the memory.
  2393 + QTC::TC("qpdf", "QPDF immediate copy stream data");
  2394 + foreign.replaceStreamData(foreign.getRawStreamData(),
  2395 + dict.getKey("/Filter"),
  2396 + dict.getKey("/DecodeParms"));
  2397 + stream_buffer = stream->getStreamDataBuffer();
  2398 + }
2379 2399 PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider =
2380 2400 stream->getStreamDataProvider();
2381 2401 if (stream_buffer.getPointer())
... ...
qpdf/qpdf.testcov
... ... @@ -410,3 +410,4 @@ QPDF_encryption attachment stream 0
410 410 QPDF pipe foreign encrypted stream 0
411 411 QPDF copy foreign stream with provider 0
412 412 QPDF copy foreign stream with buffer 0
  413 +QPDF immediate copy stream data 0
... ...
qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf
1 1 %PDF-1.3
2 2 %ยฟรทยขรพ
3 3 1 0 obj
4   -<< /Pages 5 0 R /Type /Catalog >>
  4 +<< /Pages 6 0 R /Type /Catalog >>
5 5 endobj
6 6 2 0 obj
7   -<< /O1 6 0 R /O2 7 0 R /O3 8 0 R /This-is-QTest true >>
  7 +<< /O1 7 0 R /O2 8 0 R /O3 9 0 R /This-is-QTest true >>
8 8 endobj
9 9 3 0 obj
10 10 << /Length 20 >>
... ... @@ -19,39 +19,45 @@ potato
19 19 endstream
20 20 endobj
21 21 5 0 obj
22   -<< /Count 3 /Kids [ 9 0 R 10 0 R 8 0 R ] /Type /Pages >>
  22 +<< /Length 21 >>
  23 +stream
  24 +more data for stream
  25 +endstream
23 26 endobj
24 27 6 0 obj
25   -[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 7 0 R >> 2.17828 ] >> /salad /O2 7 0 R /Stream1 11 0 R ]
  28 +<< /Count 3 /Kids [ 10 0 R 11 0 R 9 0 R ] /Type /Pages >>
26 29 endobj
27 30 7 0 obj
28   -<< /K1 [ 2.236 /O1 6 0 R 1.732 ] /O1 6 0 R /This-is-O2 true >>
  31 +[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 8 0 R >> 2.17828 ] >> /salad /O2 8 0 R /Stream1 12 0 R ]
29 32 endobj
30 33 8 0 obj
31   -<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 10 0 R /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
  34 +<< /K1 [ 2.236 /O1 7 0 R 1.732 ] /O1 7 0 R /This-is-O2 true >>
32 35 endobj
33 36 9 0 obj
34   -<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 15 0 R >> /ProcSet 16 0 R >> /Type /Page >>
  37 +<< /Contents 13 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 11 0 R /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
35 38 endobj
36 39 10 0 obj
37   -<< /Contents 17 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >>
  40 +<< /Contents 15 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 16 0 R >> /ProcSet 17 0 R >> /Type /Page >>
38 41 endobj
39 42 11 0 obj
40   -<< /Stream2 18 0 R /This-is-Stream1 true /Length 18 >>
  43 +<< /Contents 18 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >>
  44 +endobj
  45 +12 0 obj
  46 +<< /Stream2 19 0 R /This-is-Stream1 true /Length 18 >>
41 47 stream
42 48 This is stream 1.
43 49 endstream
44 50 endobj
45   -12 0 obj
  51 +13 0 obj
46 52 << /Length 47 >>
47 53 stream
48 54 BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET
49 55 endstream
50 56 endobj
51   -13 0 obj
  57 +14 0 obj
52 58 << /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
53 59 endobj
54   -14 0 obj
  60 +15 0 obj
55 61 << /Length 44 >>
56 62 stream
57 63 BT
... ... @@ -61,46 +67,47 @@ BT
61 67 ET
62 68 endstream
63 69 endobj
64   -15 0 obj
  70 +16 0 obj
65 71 << /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
66 72 endobj
67   -16 0 obj
  73 +17 0 obj
68 74 [ /PDF /Text ]
69 75 endobj
70   -17 0 obj
  76 +18 0 obj
71 77 << /Length 47 >>
72 78 stream
73 79 BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET
74 80 endstream
75 81 endobj
76   -18 0 obj
77   -<< /Stream1 11 0 R /This-is-Stream2 true /Length 18 >>
  82 +19 0 obj
  83 +<< /Stream1 12 0 R /This-is-Stream2 true /Length 18 >>
78 84 stream
79 85 This is stream 2.
80 86 endstream
81 87 endobj
82 88 xref
83   -0 19
  89 +0 20
84 90 0000000000 65535 f
85 91 0000000015 00000 n
86 92 0000000064 00000 n
87 93 0000000135 00000 n
88 94 0000000204 00000 n
89 95 0000000259 00000 n
90   -0000000331 00000 n
91   -0000000449 00000 n
92   -0000000527 00000 n
93   -0000000728 00000 n
94   -0000000874 00000 n
95   -0000001069 00000 n
96   -0000001175 00000 n
97   -0000001272 00000 n
98   -0000001372 00000 n
99   -0000001466 00000 n
100   -0000001574 00000 n
101   -0000001605 00000 n
102   -0000001702 00000 n
103   -trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R ] /Root 1 0 R /Size 19 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
  96 +0000000329 00000 n
  97 +0000000402 00000 n
  98 +0000000520 00000 n
  99 +0000000598 00000 n
  100 +0000000799 00000 n
  101 +0000000946 00000 n
  102 +0000001141 00000 n
  103 +0000001247 00000 n
  104 +0000001344 00000 n
  105 +0000001444 00000 n
  106 +0000001538 00000 n
  107 +0000001646 00000 n
  108 +0000001677 00000 n
  109 +0000001774 00000 n
  110 +trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R 5 0 R ] /Root 1 0 R /Size 20 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
104 111 startxref
105   -1808
  112 +1880
106 113 %%EOF
... ...
qpdf/test_driver.cc
... ... @@ -1130,25 +1130,56 @@ void runtest(int n, char const* filename1, char const* arg2)
1130 1130 // Should get qtest plus only the O3 page and the page that O3
1131 1131 // points to. Inherited objects should be preserved. This test
1132 1132 // also exercises copying from a stream that has a buffer and
1133   - // a provider, including copying a provider multiple times.
  1133 + // a provider, including copying a provider multiple times. We
  1134 + // also exercise setImmediateCopyFrom.
1134 1135  
1135   - Pl_Buffer p1("buffer");
1136   - p1.write(QUtil::unsigned_char_pointer("new data for stream\n"),
1137   - 20); // no null!
1138   - p1.finish();
1139   - PointerHolder<Buffer> b = p1.getBuffer();
1140   - Provider* provider = new Provider(b);
1141   - PointerHolder<QPDFObjectHandle::StreamDataProvider> p = provider;
  1136 + // Create a provider. The provider stays in scope.
  1137 + PointerHolder<QPDFObjectHandle::StreamDataProvider> p1;
  1138 + {
  1139 + // Local scope
  1140 + Pl_Buffer pl("buffer");
  1141 + pl.write(QUtil::unsigned_char_pointer("new data for stream\n"),
  1142 + 20); // no null!
  1143 + pl.finish();
  1144 + PointerHolder<Buffer> b = pl.getBuffer();
  1145 + Provider* provider = new Provider(b);
  1146 + p1 = provider;
  1147 + }
  1148 + // Create a stream that uses a provider in empty1 and copy it
  1149 + // to empty2. It is copied from empty2 to the final pdf.
1142 1150 QPDF empty1;
1143 1151 empty1.emptyPDF();
1144 1152 QPDFObjectHandle s1 = QPDFObjectHandle::newStream(&empty1);
1145 1153 s1.replaceStreamData(
1146   - p, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  1154 + p1, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
1147 1155 QPDF empty2;
1148 1156 empty2.emptyPDF();
1149 1157 s1 = empty2.copyForeignObject(s1);
1150 1158 {
1151   - // Make sure original PDF is out of scope when we write.
  1159 + // Make sure some source PDFs are out of scope when we
  1160 + // write.
  1161 +
  1162 + PointerHolder<QPDFObjectHandle::StreamDataProvider> p2;
  1163 + // Create another provider. This one will go out of scope
  1164 + // along with its containing qpdf, which has
  1165 + // setImmediateCopyFrom(true).
  1166 + {
  1167 + // Local scope
  1168 + Pl_Buffer pl("buffer");
  1169 + pl.write(QUtil::unsigned_char_pointer(
  1170 + "more data for stream\n"),
  1171 + 21); // no null!
  1172 + pl.finish();
  1173 + PointerHolder<Buffer> b = pl.getBuffer();
  1174 + Provider* provider = new Provider(b);
  1175 + p2 = provider;
  1176 + }
  1177 + QPDF empty3;
  1178 + empty3.emptyPDF();
  1179 + empty3.setImmediateCopyFrom(true);
  1180 + QPDFObjectHandle s3 = QPDFObjectHandle::newStream(&empty3);
  1181 + s3.replaceStreamData(
  1182 + p2, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
1152 1183 assert(arg2 != 0);
1153 1184 QPDF oldpdf;
1154 1185 oldpdf.processFile(arg2);
... ... @@ -1167,6 +1198,8 @@ void runtest(int n, char const* filename1, char const* arg2)
1167 1198 pdf.copyForeignObject(s1));
1168 1199 pdf.getTrailer().getKey("/QTest2").appendItem(
1169 1200 pdf.copyForeignObject(s2));
  1201 + pdf.getTrailer().getKey("/QTest2").appendItem(
  1202 + pdf.copyForeignObject(s3));
1170 1203 }
1171 1204  
1172 1205 QPDFWriter w(pdf, "a.pdf");
... ...