Commit b0b6d9f21fe77fb7b46469529bf252f0dbcaa551

Authored by Jay Berkenbilt
Committed by GitHub
2 parents d7a364b8 36866689

Merge pull request #1067 from m-holger/pl_buff

Reduce copying of Buffer contents
include/qpdf/Buffer.hh
... ... @@ -24,6 +24,7 @@
24 24  
25 25 #include <cstddef>
26 26 #include <memory>
  27 +#include <string>
27 28  
28 29 class Buffer
29 30 {
... ... @@ -35,11 +36,15 @@ class Buffer
35 36 // object is destroyed.
36 37 QPDF_DLL
37 38 Buffer(size_t size);
  39 + QPDF_DLL
  40 + Buffer(std::string&& content);
38 41  
39 42 // Create a Buffer object whose memory is owned by the caller and will not be freed when the
40 43 // Buffer is destroyed.
41 44 QPDF_DLL
42 45 Buffer(unsigned char* buf, size_t size);
  46 + QPDF_DLL
  47 + Buffer(std::string& content);
43 48  
44 49 [[deprecated("Move Buffer or use Buffer::copy instead")]] QPDF_DLL Buffer(Buffer const&);
45 50 [[deprecated("Move Buffer or use Buffer::copy instead")]] QPDF_DLL Buffer&
... ... @@ -75,8 +80,10 @@ class Buffer
75 80  
76 81 private:
77 82 Members(size_t size, unsigned char* buf, bool own_memory);
  83 + Members(std::string&& content);
78 84 Members(Members const&) = delete;
79 85  
  86 + std::string str;
80 87 bool own_memory;
81 88 size_t size;
82 89 unsigned char* buf;
... ...
include/qpdf/Pl_Buffer.hh
... ... @@ -33,7 +33,7 @@
33 33 #include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785)
34 34  
35 35 #include <memory>
36   -#include <vector>
  36 +#include <string>
37 37  
38 38 class QPDF_DLL_CLASS Pl_Buffer: public Pipeline
39 39 {
... ... @@ -64,6 +64,10 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline
64 64 QPDF_DLL
65 65 void getMallocBuffer(unsigned char** buf, size_t* len);
66 66  
  67 + // Same as getBuffer but returns the result as a string.
  68 + QPDF_DLL
  69 + std::string getString();
  70 +
67 71 private:
68 72 class QPDF_DLL_PRIVATE Members
69 73 {
... ... @@ -78,7 +82,7 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline
78 82 Members(Members const&) = delete;
79 83  
80 84 bool ready{true};
81   - std::vector<unsigned char> data;
  85 + std::string data;
82 86 };
83 87  
84 88 std::shared_ptr<Members> m;
... ...
include/qpdf/QPDF.hh
... ... @@ -1133,7 +1133,7 @@ class QPDF
1133 1133 Pipeline*& pipeline,
1134 1134 QPDFObjGen const& og,
1135 1135 QPDFObjectHandle& stream_dict,
1136   - std::vector<std::shared_ptr<Pipeline>>& heap);
  1136 + std::unique_ptr<Pipeline>& heap);
1137 1137  
1138 1138 // Methods to support object copying
1139 1139 void reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top);
... ...
libqpdf/Buffer.cc
... ... @@ -27,6 +27,14 @@ Buffer::Members::Members(size_t size, unsigned char* buf, bool own_memory) :
27 27 }
28 28 }
29 29  
  30 +Buffer::Members::Members(std::string&& content) :
  31 + str(std::move(content)),
  32 + own_memory(false),
  33 + size(str.size()),
  34 + buf(reinterpret_cast<unsigned char*>(str.data()))
  35 +{
  36 +}
  37 +
30 38 Buffer::Members::~Members()
31 39 {
32 40 if (this->own_memory) {
... ... @@ -44,11 +52,21 @@ Buffer::Buffer(size_t size) :
44 52 {
45 53 }
46 54  
  55 +Buffer::Buffer(std::string&& content) :
  56 + m(new Members(std::move(content)))
  57 +{
  58 +}
  59 +
47 60 Buffer::Buffer(unsigned char* buf, size_t size) :
48 61 m(new Members(size, buf, false))
49 62 {
50 63 }
51 64  
  65 +Buffer::Buffer(std::string& content) :
  66 + m(new Members(content.size(), reinterpret_cast<unsigned char*>(content.data()), false))
  67 +{
  68 +}
  69 +
52 70 Buffer::Buffer(Buffer const& rhs)
53 71 {
54 72 assert(test_mode);
... ...
libqpdf/Pl_Buffer.cc
... ... @@ -19,7 +19,7 @@ Pl_Buffer::~Pl_Buffer() // NOLINT (modernize-use-equals-default)
19 19 void
20 20 Pl_Buffer::write(unsigned char const* buf, size_t len)
21 21 {
22   - m->data.insert(m->data.end(), buf, buf + len);
  22 + m->data.append(reinterpret_cast<char const*>(buf), len);
23 23 m->ready = false;
24 24  
25 25 if (getNext(true)) {
... ... @@ -42,15 +42,20 @@ Pl_Buffer::getBuffer()
42 42 if (!m->ready) {
43 43 throw std::logic_error("Pl_Buffer::getBuffer() called when not ready");
44 44 }
  45 + auto* b = new Buffer(std::move(m->data));
  46 + m->data.clear();
  47 + return b;
  48 +}
45 49  
46   - auto size = m->data.size();
47   - auto* b = new Buffer(size);
48   - if (size > 0) {
49   - unsigned char* p = b->getBuffer();
50   - memcpy(p, m->data.data(), size);
  50 +std::string
  51 +Pl_Buffer::getString()
  52 +{
  53 + if (!m->ready) {
  54 + throw std::logic_error("Pl_Buffer::getString() called when not ready");
51 55 }
  56 + auto s = std::move(m->data);
52 57 m->data.clear();
53   - return b;
  58 + return s;
54 59 }
55 60  
56 61 std::shared_ptr<Buffer>
... ...
libqpdf/QPDF.cc
... ... @@ -2413,29 +2413,23 @@ QPDF::pipeStreamData(
2413 2413 bool suppress_warnings,
2414 2414 bool will_retry)
2415 2415 {
2416   - std::vector<std::shared_ptr<Pipeline>> to_delete;
  2416 + std::unique_ptr<Pipeline> to_delete;
2417 2417 if (encp->encrypted) {
2418 2418 decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);
2419 2419 }
2420 2420  
2421 2421 bool attempted_finish = false;
2422   - bool success = false;
2423 2422 try {
2424 2423 file->seek(offset, SEEK_SET);
2425   - char buf[10240];
2426   - while (length > 0) {
2427   - size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length);
2428   - size_t len = file->read(buf, to_read);
2429   - if (len == 0) {
2430   - throw damagedPDF(
2431   - file, "", file->getLastOffset(), "unexpected EOF reading stream data");
2432   - }
2433   - length -= len;
2434   - pipeline->write(buf, len);
  2424 + auto buf = std::make_unique<char[]>(length);
  2425 + if (auto read = file->read(buf.get(), length); read != length) {
  2426 + throw damagedPDF(
  2427 + file, "", offset + toO(read), "unexpected EOF reading stream data");
2435 2428 }
  2429 + pipeline->write(buf.get(), length);
2436 2430 attempted_finish = true;
2437 2431 pipeline->finish();
2438   - success = true;
  2432 + return true;
2439 2433 } catch (QPDFExc& e) {
2440 2434 if (!suppress_warnings) {
2441 2435 qpdf_for_warning.warn(e);
... ... @@ -2458,8 +2452,7 @@ QPDF::pipeStreamData(
2458 2452 file,
2459 2453 "",
2460 2454 file->getLastOffset(),
2461   - "stream will be re-processed without"
2462   - " filtering to avoid data loss"));
  2455 + "stream will be re-processed without filtering to avoid data loss"));
2463 2456 }
2464 2457 }
2465 2458 }
... ... @@ -2470,7 +2463,7 @@ QPDF::pipeStreamData(
2470 2463 // ignore
2471 2464 }
2472 2465 }
2473   - return success;
  2466 + return false ;
2474 2467 }
2475 2468  
2476 2469 bool
... ...
libqpdf/QPDFAcroFormDocumentHelper.cc
... ... @@ -584,8 +584,7 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
584 584 ResourceReplacer rr(dr_map, rf.getNamesByResourceType());
585 585 Pl_Buffer buf_pl("filtered DA");
586 586 da_stream.filterAsContents(&rr, &buf_pl);
587   - auto buf = buf_pl.getBufferSharedPointer();
588   - std::string new_da(reinterpret_cast<char*>(buf->getBuffer()), buf->getSize());
  587 + std::string new_da = buf_pl.getString();
589 588 obj.replaceKey("/DA", QPDFObjectHandle::newString(new_da));
590 589 }
591 590  
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -1708,8 +1708,7 @@ QPDFObjectHandle::pipeContentStreams(
1708 1708 need_newline = (lc.getLastChar() != static_cast<unsigned char>('\n'));
1709 1709 QTC::TC("qpdf", "QPDFObjectHandle need_newline", need_newline ? 0 : 1);
1710 1710 }
1711   - std::unique_ptr<Buffer> b(buf.getBuffer());
1712   - p->write(b->getBuffer(), b->getSize());
  1711 + p->writeString(buf.getString());
1713 1712 p->finish();
1714 1713 }
1715 1714  
... ...
libqpdf/QPDFPageObjectHelper.cc
... ... @@ -175,14 +175,11 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const&amp; token)
175 175 size_t len = image_data.length();
176 176 if (len >= this->min_size) {
177 177 QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
178   - Pl_Buffer b("image_data");
179   - b.writeString(image_data);
180   - b.finish();
181 178 QPDFObjectHandle dict = convertIIDict(QPDFObjectHandle::parse(dict_str));
182 179 dict.replaceKey("/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(len)));
183 180 std::string name = resources.getUniqueResourceName("/IIm", this->min_suffix);
184 181 QPDFObjectHandle image =
185   - QPDFObjectHandle::newStream(this->qpdf, b.getBufferSharedPointer());
  182 + QPDFObjectHandle::newStream(this->qpdf, std::make_shared<Buffer>(std::move(image_data)));
186 183 image.replaceDict(dict);
187 184 resources.getKey("/XObject").replaceKey(name, image);
188 185 write(name);
... ...
libqpdf/QPDFWriter.cc
... ... @@ -1579,10 +1579,7 @@ QPDFWriter::unparseObject(
1579 1579 m->cur_data_key.length());
1580 1580 pl.writeString(val);
1581 1581 pl.finish();
1582   - auto buf = bufpl.getBufferSharedPointer();
1583   - val = QPDF_String(
1584   - std::string(reinterpret_cast<char*>(buf->getBuffer()), buf->getSize()))
1585   - .unparse(true);
  1582 + val = QPDF_String(bufpl.getString()).unparse(true);
1586 1583 } else {
1587 1584 auto tmp_ph = QUtil::make_unique_cstr(val);
1588 1585 char* tmp = tmp_ph.get();
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -216,29 +216,28 @@ QPDF_Stream::getStreamJSON(
216 216 auto dict = this->stream_dict;
217 217 JSON result = JSON::makeDictionary();
218 218 if (json_data != qpdf_sj_none) {
219   - std::shared_ptr<Buffer> buf;
  219 + Pl_Discard discard;
  220 + Pl_Buffer buf_pl{"stream data"};
  221 + // buf_pl contains valid data and is ready for retrieval of the data.
  222 + bool buf_pl_ready = false;
220 223 bool filtered = false;
221 224 bool filter = (decode_level != qpdf_dl_none);
222 225 for (int attempt = 1; attempt <= 2; ++attempt) {
223   - Pl_Discard discard;
224   - std::shared_ptr<Pl_Buffer> buf_pl;
225   - Pipeline* data_pipeline = nullptr;
  226 + Pipeline* data_pipeline = &discard;
226 227 if (json_data == qpdf_sj_file) {
227 228 // We need to capture the data to write
228   - buf_pl = std::make_shared<Pl_Buffer>("stream data");
229   - data_pipeline = buf_pl.get();
230   - } else {
231   - data_pipeline = &discard;
  229 + data_pipeline = &buf_pl;
232 230 }
233 231 bool succeeded =
234 232 pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1));
235   - if ((!succeeded) || (filter && (!filtered))) {
  233 + if (!succeeded || (filter && !filtered)) {
236 234 // Try again
237 235 filter = false;
238 236 decode_level = qpdf_dl_none;
  237 + buf_pl.getString(); // reset buf_pl
239 238 } else {
240   - if (buf_pl.get()) {
241   - buf = buf_pl->getBufferSharedPointer();
  239 + if (json_data == qpdf_sj_file) {
  240 + buf_pl_ready = true;
242 241 }
243 242 break;
244 243 }
... ... @@ -252,10 +251,10 @@ QPDF_Stream::getStreamJSON(
252 251 }
253 252 if (json_data == qpdf_sj_file) {
254 253 result.addDictionaryMember("datafile", JSON::makeString(data_filename));
255   - if (!buf.get()) {
  254 + if (!buf_pl_ready) {
256 255 throw std::logic_error("QPDF_Stream: failed to get stream data in json file mode");
257 256 }
258   - p->write(buf->getBuffer(), buf->getSize());
  257 + p->writeString(buf_pl.getString());
259 258 } else if (json_data == qpdf_sj_inline) {
260 259 result.addDictionaryMember(
261 260 "data", JSON::makeBlob(StreamBlobProvider(this, decode_level)));
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -229,13 +229,11 @@ process_with_aes(
229 229 aes.writeString(data);
230 230 }
231 231 aes.finish();
232   - auto bufp = buffer.getBufferSharedPointer();
233 232 if (outlength == 0) {
234   - outlength = bufp->getSize();
  233 + return buffer.getString();
235 234 } else {
236   - outlength = std::min(outlength, bufp->getSize());
  235 + return buffer.getString().substr(0, outlength);
237 236 }
238   - return {reinterpret_cast<char*>(bufp->getBuffer()), outlength};
239 237 }
240 238  
241 239 static std::string
... ... @@ -1021,8 +1019,7 @@ QPDF::decryptString(std::string&amp; str, QPDFObjGen const&amp; og)
1021 1019 key.length());
1022 1020 pl.writeString(str);
1023 1021 pl.finish();
1024   - auto buf = bufpl.getBufferSharedPointer();
1025   - str = std::string(reinterpret_cast<char*>(buf->getBuffer()), buf->getSize());
  1022 + str = bufpl.getString();
1026 1023 } else {
1027 1024 QTC::TC("qpdf", "QPDF_encryption rc4 decode string");
1028 1025 size_t vlen = str.length();
... ... @@ -1041,6 +1038,9 @@ QPDF::decryptString(std::string&amp; str, QPDFObjGen const&amp; og)
1041 1038 }
1042 1039 }
1043 1040  
  1041 +// Prepend a decryption pipeline to 'pipeline'. The decryption pipeline (returned as
  1042 +// 'decrypt_pipeline' must be owned by the caller to ensure that it stays alive while the pipeline
  1043 +// is in use.
1044 1044 void
1045 1045 QPDF::decryptStream(
1046 1046 std::shared_ptr<EncryptionParameters> encp,
... ... @@ -1049,7 +1049,7 @@ QPDF::decryptStream(
1049 1049 Pipeline*& pipeline,
1050 1050 QPDFObjGen const& og,
1051 1051 QPDFObjectHandle& stream_dict,
1052   - std::vector<std::shared_ptr<Pipeline>>& heap)
  1052 + std::unique_ptr<Pipeline>& decrypt_pipeline)
1053 1053 {
1054 1054 std::string type;
1055 1055 if (stream_dict.getKey("/Type").isName()) {
... ... @@ -1085,8 +1085,7 @@ QPDF::decryptStream(
1085 1085 crypt_params.getKey("/Name").isName()) {
1086 1086 QTC::TC("qpdf", "QPDF_encrypt crypt array");
1087 1087 method = interpretCF(encp, crypt_params.getKey("/Name"));
1088   - method_source = "stream's Crypt "
1089   - "decode parameters (array)";
  1088 + method_source = "stream's Crypt decode parameters (array)";
1090 1089 }
1091 1090 }
1092 1091 }
... ... @@ -1135,10 +1134,9 @@ QPDF::decryptStream(
1135 1134 }
1136 1135 }
1137 1136 std::string key = getKeyForObject(encp, og, use_aes);
1138   - std::shared_ptr<Pipeline> new_pipeline;
1139 1137 if (use_aes) {
1140 1138 QTC::TC("qpdf", "QPDF_encryption aes decode stream");
1141   - new_pipeline = std::make_shared<Pl_AES_PDF>(
  1139 + decrypt_pipeline = std::make_unique<Pl_AES_PDF>(
1142 1140 "AES stream decryption",
1143 1141 pipeline,
1144 1142 false,
... ... @@ -1146,14 +1144,13 @@ QPDF::decryptStream(
1146 1144 key.length());
1147 1145 } else {
1148 1146 QTC::TC("qpdf", "QPDF_encryption rc4 decode stream");
1149   - new_pipeline = std::make_shared<Pl_RC4>(
  1147 + decrypt_pipeline = std::make_unique<Pl_RC4>(
1150 1148 "RC4 stream decryption",
1151 1149 pipeline,
1152 1150 QUtil::unsigned_char_pointer(key),
1153 1151 toI(key.length()));
1154 1152 }
1155   - pipeline = new_pipeline.get();
1156   - heap.push_back(new_pipeline);
  1153 + pipeline = decrypt_pipeline.get();
1157 1154 }
1158 1155  
1159 1156 void
... ...
libtests/buffer.cc
... ... @@ -35,6 +35,21 @@ main()
35 35 assert(bc2p != bc1p);
36 36 assert(bc2p[0] == 'R');
37 37 assert(bc2p[1] == 'W');
  38 +
  39 + // Test Buffer(std:string&&)
  40 + Buffer bc3("QW");
  41 + unsigned char* bc3p = bc3.getBuffer();
  42 + Buffer bc4(bc3.copy());
  43 + bc3p[0] = 'R';
  44 + unsigned char* bc4p = bc4.getBuffer();
  45 + assert(bc4p != bc3p);
  46 + assert(bc4p[0] == 'Q');
  47 + assert(bc4p[1] == 'W');
  48 + bc4 = bc3.copy();
  49 + bc4p = bc4.getBuffer();
  50 + assert(bc4p != bc3p);
  51 + assert(bc4p[0] == 'R');
  52 + assert(bc4p[1] == 'W');
38 53 }
39 54  
40 55 #ifdef _MSC_VER
... ... @@ -62,6 +77,37 @@ main()
62 77 assert(bc2p != bc1p);
63 78 assert(bc2p[0] == 'R');
64 79 assert(bc2p[1] == 'W');
  80 +
  81 + // Test Buffer(std:string&&)
  82 + Buffer bc3("QW");
  83 + unsigned char* bc3p = bc3.getBuffer();
  84 + Buffer bc4(bc3);
  85 + bc3p[0] = 'R';
  86 + unsigned char* bc4p = bc4.getBuffer();
  87 + assert(bc4p != bc3p);
  88 + assert(bc4p[0] == 'Q');
  89 + assert(bc4p[1] == 'W');
  90 + bc4 = bc3;
  91 + bc4p = bc4.getBuffer();
  92 + assert(bc4p != bc3p);
  93 + assert(bc2p[0] == 'R');
  94 + assert(bc2p[1] == 'W');
  95 +
  96 + // Test Buffer(std:string&)
  97 + std::string s{"QW"};
  98 + Buffer bc5(s);
  99 + unsigned char* bc5p = bc5.getBuffer();
  100 + Buffer bc6(bc5);
  101 + bc5p[0] = 'R';
  102 + unsigned char* bc6p = bc6.getBuffer();
  103 + assert(bc6p != bc5p);
  104 + assert(bc6p[0] == 'Q');
  105 + assert(bc6p[1] == 'W');
  106 + bc6 = bc5;
  107 + bc6p = bc6.getBuffer();
  108 + assert(bc6p != bc5p);
  109 + assert(bc2p[0] == 'R');
  110 + assert(bc2p[1] == 'W');
65 111 }
66 112 #if (defined(__GNUC__) || defined(__clang__))
67 113 # pragma GCC diagnostic pop
... ... @@ -82,6 +128,19 @@ main()
82 128 Buffer bm3 = std::move(bm2);
83 129 unsigned char* bm3p = bm3.getBuffer();
84 130 assert(bm3p == bm2p);
  131 +
  132 + // Test Buffer(dtd::string&&)
  133 + Buffer bm4("QW");
  134 + unsigned char* bm4p = bm4.getBuffer();
  135 + Buffer bm5(std::move(bm4));
  136 + bm4p[0] = 'R';
  137 + unsigned char* bm5p = bm5.getBuffer();
  138 + assert(bm5p == bm4p);
  139 + assert(bm5p[0] == 'R');
  140 +
  141 + Buffer bm6 = std::move(bm5);
  142 + unsigned char* bm6p = bm6.getBuffer();
  143 + assert(bm6p == bm5p);
85 144 }
86 145  
87 146 try {
... ...