Commit 054690835338c31ded0f48f5020d2799d1cd7a9c

Authored by m-holger
Committed by GitHub
2 parents 402c6989 92a8c888

Merge pull request #1499 from m-holger/base64

Refactor Pl_Base64
libqpdf/Pl_Base64.cc
  1 +#include <qpdf/assert_debug.h>
  2 +
1 3 #include <qpdf/Pl_Base64.hh>
2 4  
3 5 #include <qpdf/QIntC.hh>
... ... @@ -31,63 +33,81 @@ Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) :
31 33 Pipeline(identifier, next),
32 34 action(action)
33 35 {
34   - if (!next) {
35   - throw std::logic_error("Attempt to create Pl_Base64 with nullptr as next");
36   - }
37 36 }
38 37  
39 38 void
40 39 Pl_Base64::write(unsigned char const* data, size_t len)
41 40 {
42   - if (finished) {
43   - throw std::logic_error("Pl_Base64 used after finished");
44   - }
45   - if (action == a_decode) {
46   - decode(data, len);
47   - } else {
48   - encode(data, len);
49   - }
  41 + in_buffer.append(reinterpret_cast<const char*>(data), len);
  42 +}
  43 +
  44 +std::string
  45 +Pl_Base64::decode(std::string_view data)
  46 +{
  47 + Pl_Base64 p("base64-decode", nullptr, a_decode);
  48 + p.decode_internal(data);
  49 + return std::move(p.out_buffer);
  50 +}
  51 +
  52 +std::string
  53 +Pl_Base64::encode(std::string_view data)
  54 +{
  55 + Pl_Base64 p("base64-encode", nullptr, a_encode);
  56 + p.encode_internal(data);
  57 + return std::move(p.out_buffer);
50 58 }
51 59  
52 60 void
53   -Pl_Base64::decode(unsigned char const* data, size_t len)
  61 +Pl_Base64::decode_internal(std::string_view data)
54 62 {
55   - unsigned char const* p = data;
  63 + auto len = data.size();
  64 + auto res = (len / 4u + 1u) * 3u;
  65 + out_buffer.reserve(res);
  66 + unsigned char const* p = reinterpret_cast<const unsigned char*>(data.data());
56 67 while (len > 0) {
57 68 if (!util::is_space(to_c(*p))) {
58 69 buf[pos++] = *p;
59 70 if (pos == 4) {
60   - flush();
  71 + flush_decode();
61 72 }
62 73 }
63 74 ++p;
64 75 --len;
65 76 }
  77 + if (pos > 0) {
  78 + for (size_t i = pos; i < 4; ++i) {
  79 + buf[i] = '=';
  80 + }
  81 + flush_decode();
  82 + }
  83 + qpdf_assert_debug(out_buffer.size() <= res);
66 84 }
67 85  
68 86 void
69   -Pl_Base64::encode(unsigned char const* data, size_t len)
  87 +Pl_Base64::encode_internal(std::string_view data)
70 88 {
71   - unsigned char const* p = data;
  89 + auto len = data.size();
  90 + static const size_t max_len = (std::string().max_size() / 4u - 1u) * 3u;
  91 + // Change to constexpr once AppImage is build with GCC >= 12
  92 + if (len > max_len) {
  93 + throw std::length_error(getIdentifier() + ": base64 decode: data exceeds maximum length");
  94 + }
  95 +
  96 + auto res = (len / 3u + 1u) * 4u;
  97 + out_buffer.reserve(res);
  98 + unsigned char const* p = reinterpret_cast<const unsigned char*>(data.data());
72 99 while (len > 0) {
73 100 buf[pos++] = *p;
74 101 if (pos == 3) {
75   - flush();
  102 + flush_encode();
76 103 }
77 104 ++p;
78 105 --len;
79 106 }
80   -}
81   -
82   -void
83   -Pl_Base64::flush()
84   -{
85   - if (action == a_decode) {
86   - flush_decode();
87   - } else {
  107 + if (pos > 0) {
88 108 flush_encode();
89 109 }
90   - reset();
  110 + qpdf_assert_debug(out_buffer.size() <= res);
91 111 }
92 112  
93 113 void
... ... @@ -96,7 +116,7 @@ Pl_Base64::flush_decode()
96 116 if (end_of_data) {
97 117 throw std::runtime_error(getIdentifier() + ": base64 decode: data follows pad characters");
98 118 }
99   - int pad = 0;
  119 + size_t pad = 0;
100 120 int shift = 18;
101 121 int outval = 0;
102 122 for (size_t i = 0; i < 4; ++i) {
... ... @@ -128,7 +148,8 @@ Pl_Base64::flush_decode()
128 148 to_uc(0xff & outval),
129 149 };
130 150  
131   - next()->write(out, QIntC::to_size(3 - pad));
  151 + out_buffer.append(reinterpret_cast<const char*>(out), 3u - pad);
  152 + reset();
132 153 }
133 154  
134 155 void
... ... @@ -161,25 +182,27 @@ Pl_Base64::flush_encode()
161 182 for (size_t i = 0; i < 3 - pos; ++i) {
162 183 out[3 - i] = '=';
163 184 }
164   - next()->write(out, 4);
  185 + out_buffer.append(reinterpret_cast<const char*>(out), 4);
  186 + reset();
165 187 }
166 188  
167 189 void
168 190 Pl_Base64::finish()
169 191 {
170   - if (pos > 0) {
171   - if (finished) {
172   - throw std::logic_error("Pl_Base64 used after finished");
173   - }
174   - if (action == a_decode) {
175   - for (size_t i = pos; i < 4; ++i) {
176   - buf[i] = '=';
177   - }
178   - }
179   - flush();
  192 + if (action == a_decode) {
  193 + decode_internal(in_buffer);
  194 +
  195 + } else {
  196 + encode_internal(in_buffer);
  197 + }
  198 + if (next()) {
  199 + in_buffer.clear();
  200 + in_buffer.shrink_to_fit();
  201 + next()->write(reinterpret_cast<unsigned char const*>(out_buffer.data()), out_buffer.size());
  202 + out_buffer.clear();
  203 + out_buffer.shrink_to_fit();
  204 + next()->finish();
180 205 }
181   - finished = true;
182   - next()->finish();
183 206 }
184 207  
185 208 void
... ...
libqpdf/QPDF_json.cc
1 1 #include <qpdf/QPDF.hh>
2 2  
3 3 #include <qpdf/FileInputSource.hh>
  4 +#include <qpdf/InputSource_private.hh>
4 5 #include <qpdf/JSON_writer.hh>
5 6 #include <qpdf/Pl_Base64.hh>
6 7 #include <qpdf/Pl_StdioFile.hh>
... ... @@ -215,20 +216,10 @@ static std::function&lt;void(Pipeline*)&gt;
215 216 provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end)
216 217 {
217 218 return [is, start, end](Pipeline* p) {
218   - Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);
219   - p = &decode;
220   - size_t bytes = QIntC::to_size(end - start);
221   - char buf[8192];
222   - is->seek(start, SEEK_SET);
223   - size_t len = 0;
224   - while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {
225   - p->write(buf, len);
226   - bytes -= len;
227   - if (bytes == 0) {
228   - break;
229   - }
230   - }
231   - decode.finish();
  219 + auto data = is->read(QIntC::to_size(end - start), start);
  220 + data = Pl_Base64::decode(data);
  221 + p->write(reinterpret_cast<const unsigned char*>(data.data()), data.size());
  222 + p->finish();
232 223 };
233 224 }
234 225  
... ... @@ -615,7 +606,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
615 606 if (!tos.object.isStream()) {
616 607 throw std::logic_error("current object is not stream in st_stream");
617 608 }
618   - auto uninitialized = QPDFObjectHandle();
619 609 if (key == "dict") {
620 610 this->saw_dict = true;
621 611 if (setNextStateIfDictionary("stream.dict", value, st_object)) {
... ... @@ -630,7 +620,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
630 620 if (!value.getString(v)) {
631 621 QTC::TC("qpdf", "QPDF_json stream data not string");
632 622 error(value.getStart(), "\"stream.data\" must be a string");
633   - tos.object.replaceStreamData("", uninitialized, uninitialized);
  623 + tos.object.replaceStreamData("", {}, {});
634 624 } else {
635 625 // The range includes the quotes.
636 626 auto start = value.getStart() + 1;
... ... @@ -638,8 +628,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
638 628 if (end < start) {
639 629 throw std::logic_error("QPDF_json: JSON string length < 0");
640 630 }
641   - tos.object.replaceStreamData(
642   - provide_data(is, start, end), uninitialized, uninitialized);
  631 + tos.object.replaceStreamData(provide_data(is, start, end), {}, {});
643 632 }
644 633 } else if (key == "datafile") {
645 634 this->saw_datafile = true;
... ... @@ -649,10 +638,9 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
649 638 error(
650 639 value.getStart(),
651 640 "\"stream.datafile\" must be a string containing a file name");
652   - tos.object.replaceStreamData("", uninitialized, uninitialized);
  641 + tos.object.replaceStreamData("", {}, {});
653 642 } else {
654   - tos.object.replaceStreamData(
655   - QUtil::file_provider(filename), uninitialized, uninitialized);
  643 + tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {});
656 644 }
657 645 } else {
658 646 // Ignore unknown keys for forward compatibility.
... ...
libqpdf/qpdf/JSON_writer.hh
... ... @@ -32,10 +32,8 @@ class JSON::Writer
32 32 Writer&
33 33 writeBase64(std::string_view sv)
34 34 {
35   - Pl_Concatenate cat{"writer concat", p};
36   - Pl_Base64 base{"writer base64", &cat, Pl_Base64::a_encode};
37   - base.write(reinterpret_cast<unsigned char const*>(sv.data()), sv.size());
38   - base.finish();
  35 + auto encoded = Pl_Base64::encode(sv);
  36 + p->write(reinterpret_cast<unsigned char const*>(encoded.data()), encoded.size());
39 37 return *this;
40 38 }
41 39  
... ...
libqpdf/qpdf/Pl_Base64.hh
... ... @@ -12,10 +12,12 @@ class Pl_Base64 final: public Pipeline
12 12 void write(unsigned char const* buf, size_t len) final;
13 13 void finish() final;
14 14  
  15 + static std::string encode(std::string_view data);
  16 + static std::string decode(std::string_view data);
  17 +
15 18 private:
16   - void decode(unsigned char const* buf, size_t len);
17   - void encode(unsigned char const* buf, size_t len);
18   - void flush();
  19 + void decode_internal(std::string_view data);
  20 + void encode_internal(std::string_view data);
19 21 void flush_decode();
20 22 void flush_encode();
21 23 void reset();
... ... @@ -23,8 +25,9 @@ class Pl_Base64 final: public Pipeline
23 25 action_e action;
24 26 unsigned char buf[4]{0, 0, 0, 0};
25 27 size_t pos{0};
  28 + std::string in_buffer;
  29 + std::string out_buffer;
26 30 bool end_of_data{false};
27   - bool finished{false};
28 31 };
29 32  
30 33 #endif // PL_BASE64_HH
... ...