Commit 054690835338c31ded0f48f5020d2799d1cd7a9c

Authored by m-holger
Committed by GitHub
2 parents 402c6989 92a8c888

Merge pull request #1499 from m-holger/base64

Refactor Pl_Base64
libqpdf/Pl_Base64.cc
  1 +#include <qpdf/assert_debug.h>
  2 +
1 #include <qpdf/Pl_Base64.hh> 3 #include <qpdf/Pl_Base64.hh>
2 4
3 #include <qpdf/QIntC.hh> 5 #include <qpdf/QIntC.hh>
@@ -31,63 +33,81 @@ Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) : @@ -31,63 +33,81 @@ Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) :
31 Pipeline(identifier, next), 33 Pipeline(identifier, next),
32 action(action) 34 action(action)
33 { 35 {
34 - if (!next) {  
35 - throw std::logic_error("Attempt to create Pl_Base64 with nullptr as next");  
36 - }  
37 } 36 }
38 37
39 void 38 void
40 Pl_Base64::write(unsigned char const* data, size_t len) 39 Pl_Base64::write(unsigned char const* data, size_t len)
41 { 40 {
42 - if (finished) {  
43 - throw std::logic_error("Pl_Base64 used after finished");  
44 - }  
45 - if (action == a_decode) {  
46 - decode(data, len);  
47 - } else {  
48 - encode(data, len);  
49 - } 41 + in_buffer.append(reinterpret_cast<const char*>(data), len);
  42 +}
  43 +
  44 +std::string
  45 +Pl_Base64::decode(std::string_view data)
  46 +{
  47 + Pl_Base64 p("base64-decode", nullptr, a_decode);
  48 + p.decode_internal(data);
  49 + return std::move(p.out_buffer);
  50 +}
  51 +
  52 +std::string
  53 +Pl_Base64::encode(std::string_view data)
  54 +{
  55 + Pl_Base64 p("base64-encode", nullptr, a_encode);
  56 + p.encode_internal(data);
  57 + return std::move(p.out_buffer);
50 } 58 }
51 59
52 void 60 void
53 -Pl_Base64::decode(unsigned char const* data, size_t len) 61 +Pl_Base64::decode_internal(std::string_view data)
54 { 62 {
55 - unsigned char const* p = data; 63 + auto len = data.size();
  64 + auto res = (len / 4u + 1u) * 3u;
  65 + out_buffer.reserve(res);
  66 + unsigned char const* p = reinterpret_cast<const unsigned char*>(data.data());
56 while (len > 0) { 67 while (len > 0) {
57 if (!util::is_space(to_c(*p))) { 68 if (!util::is_space(to_c(*p))) {
58 buf[pos++] = *p; 69 buf[pos++] = *p;
59 if (pos == 4) { 70 if (pos == 4) {
60 - flush(); 71 + flush_decode();
61 } 72 }
62 } 73 }
63 ++p; 74 ++p;
64 --len; 75 --len;
65 } 76 }
  77 + if (pos > 0) {
  78 + for (size_t i = pos; i < 4; ++i) {
  79 + buf[i] = '=';
  80 + }
  81 + flush_decode();
  82 + }
  83 + qpdf_assert_debug(out_buffer.size() <= res);
66 } 84 }
67 85
68 void 86 void
69 -Pl_Base64::encode(unsigned char const* data, size_t len) 87 +Pl_Base64::encode_internal(std::string_view data)
70 { 88 {
71 - unsigned char const* p = data; 89 + auto len = data.size();
  90 + static const size_t max_len = (std::string().max_size() / 4u - 1u) * 3u;
  91 + // Change to constexpr once AppImage is build with GCC >= 12
  92 + if (len > max_len) {
  93 + throw std::length_error(getIdentifier() + ": base64 decode: data exceeds maximum length");
  94 + }
  95 +
  96 + auto res = (len / 3u + 1u) * 4u;
  97 + out_buffer.reserve(res);
  98 + unsigned char const* p = reinterpret_cast<const unsigned char*>(data.data());
72 while (len > 0) { 99 while (len > 0) {
73 buf[pos++] = *p; 100 buf[pos++] = *p;
74 if (pos == 3) { 101 if (pos == 3) {
75 - flush(); 102 + flush_encode();
76 } 103 }
77 ++p; 104 ++p;
78 --len; 105 --len;
79 } 106 }
80 -}  
81 -  
82 -void  
83 -Pl_Base64::flush()  
84 -{  
85 - if (action == a_decode) {  
86 - flush_decode();  
87 - } else { 107 + if (pos > 0) {
88 flush_encode(); 108 flush_encode();
89 } 109 }
90 - reset(); 110 + qpdf_assert_debug(out_buffer.size() <= res);
91 } 111 }
92 112
93 void 113 void
@@ -96,7 +116,7 @@ Pl_Base64::flush_decode() @@ -96,7 +116,7 @@ Pl_Base64::flush_decode()
96 if (end_of_data) { 116 if (end_of_data) {
97 throw std::runtime_error(getIdentifier() + ": base64 decode: data follows pad characters"); 117 throw std::runtime_error(getIdentifier() + ": base64 decode: data follows pad characters");
98 } 118 }
99 - int pad = 0; 119 + size_t pad = 0;
100 int shift = 18; 120 int shift = 18;
101 int outval = 0; 121 int outval = 0;
102 for (size_t i = 0; i < 4; ++i) { 122 for (size_t i = 0; i < 4; ++i) {
@@ -128,7 +148,8 @@ Pl_Base64::flush_decode() @@ -128,7 +148,8 @@ Pl_Base64::flush_decode()
128 to_uc(0xff & outval), 148 to_uc(0xff & outval),
129 }; 149 };
130 150
131 - next()->write(out, QIntC::to_size(3 - pad)); 151 + out_buffer.append(reinterpret_cast<const char*>(out), 3u - pad);
  152 + reset();
132 } 153 }
133 154
134 void 155 void
@@ -161,25 +182,27 @@ Pl_Base64::flush_encode() @@ -161,25 +182,27 @@ Pl_Base64::flush_encode()
161 for (size_t i = 0; i < 3 - pos; ++i) { 182 for (size_t i = 0; i < 3 - pos; ++i) {
162 out[3 - i] = '='; 183 out[3 - i] = '=';
163 } 184 }
164 - next()->write(out, 4); 185 + out_buffer.append(reinterpret_cast<const char*>(out), 4);
  186 + reset();
165 } 187 }
166 188
167 void 189 void
168 Pl_Base64::finish() 190 Pl_Base64::finish()
169 { 191 {
170 - if (pos > 0) {  
171 - if (finished) {  
172 - throw std::logic_error("Pl_Base64 used after finished");  
173 - }  
174 - if (action == a_decode) {  
175 - for (size_t i = pos; i < 4; ++i) {  
176 - buf[i] = '=';  
177 - }  
178 - }  
179 - flush(); 192 + if (action == a_decode) {
  193 + decode_internal(in_buffer);
  194 +
  195 + } else {
  196 + encode_internal(in_buffer);
  197 + }
  198 + if (next()) {
  199 + in_buffer.clear();
  200 + in_buffer.shrink_to_fit();
  201 + next()->write(reinterpret_cast<unsigned char const*>(out_buffer.data()), out_buffer.size());
  202 + out_buffer.clear();
  203 + out_buffer.shrink_to_fit();
  204 + next()->finish();
180 } 205 }
181 - finished = true;  
182 - next()->finish();  
183 } 206 }
184 207
185 void 208 void
libqpdf/QPDF_json.cc
1 #include <qpdf/QPDF.hh> 1 #include <qpdf/QPDF.hh>
2 2
3 #include <qpdf/FileInputSource.hh> 3 #include <qpdf/FileInputSource.hh>
  4 +#include <qpdf/InputSource_private.hh>
4 #include <qpdf/JSON_writer.hh> 5 #include <qpdf/JSON_writer.hh>
5 #include <qpdf/Pl_Base64.hh> 6 #include <qpdf/Pl_Base64.hh>
6 #include <qpdf/Pl_StdioFile.hh> 7 #include <qpdf/Pl_StdioFile.hh>
@@ -215,20 +216,10 @@ static std::function&lt;void(Pipeline*)&gt; @@ -215,20 +216,10 @@ static std::function&lt;void(Pipeline*)&gt;
215 provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end) 216 provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end)
216 { 217 {
217 return [is, start, end](Pipeline* p) { 218 return [is, start, end](Pipeline* p) {
218 - Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);  
219 - p = &decode;  
220 - size_t bytes = QIntC::to_size(end - start);  
221 - char buf[8192];  
222 - is->seek(start, SEEK_SET);  
223 - size_t len = 0;  
224 - while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {  
225 - p->write(buf, len);  
226 - bytes -= len;  
227 - if (bytes == 0) {  
228 - break;  
229 - }  
230 - }  
231 - decode.finish(); 219 + auto data = is->read(QIntC::to_size(end - start), start);
  220 + data = Pl_Base64::decode(data);
  221 + p->write(reinterpret_cast<const unsigned char*>(data.data()), data.size());
  222 + p->finish();
232 }; 223 };
233 } 224 }
234 225
@@ -615,7 +606,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -615,7 +606,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
615 if (!tos.object.isStream()) { 606 if (!tos.object.isStream()) {
616 throw std::logic_error("current object is not stream in st_stream"); 607 throw std::logic_error("current object is not stream in st_stream");
617 } 608 }
618 - auto uninitialized = QPDFObjectHandle();  
619 if (key == "dict") { 609 if (key == "dict") {
620 this->saw_dict = true; 610 this->saw_dict = true;
621 if (setNextStateIfDictionary("stream.dict", value, st_object)) { 611 if (setNextStateIfDictionary("stream.dict", value, st_object)) {
@@ -630,7 +620,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -630,7 +620,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
630 if (!value.getString(v)) { 620 if (!value.getString(v)) {
631 QTC::TC("qpdf", "QPDF_json stream data not string"); 621 QTC::TC("qpdf", "QPDF_json stream data not string");
632 error(value.getStart(), "\"stream.data\" must be a string"); 622 error(value.getStart(), "\"stream.data\" must be a string");
633 - tos.object.replaceStreamData("", uninitialized, uninitialized); 623 + tos.object.replaceStreamData("", {}, {});
634 } else { 624 } else {
635 // The range includes the quotes. 625 // The range includes the quotes.
636 auto start = value.getStart() + 1; 626 auto start = value.getStart() + 1;
@@ -638,8 +628,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -638,8 +628,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
638 if (end < start) { 628 if (end < start) {
639 throw std::logic_error("QPDF_json: JSON string length < 0"); 629 throw std::logic_error("QPDF_json: JSON string length < 0");
640 } 630 }
641 - tos.object.replaceStreamData(  
642 - provide_data(is, start, end), uninitialized, uninitialized); 631 + tos.object.replaceStreamData(provide_data(is, start, end), {}, {});
643 } 632 }
644 } else if (key == "datafile") { 633 } else if (key == "datafile") {
645 this->saw_datafile = true; 634 this->saw_datafile = true;
@@ -649,10 +638,9 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value) @@ -649,10 +638,9 @@ QPDF::JSONReactor::dictionaryItem(std::string const&amp; key, JSON const&amp; value)
649 error( 638 error(
650 value.getStart(), 639 value.getStart(),
651 "\"stream.datafile\" must be a string containing a file name"); 640 "\"stream.datafile\" must be a string containing a file name");
652 - tos.object.replaceStreamData("", uninitialized, uninitialized); 641 + tos.object.replaceStreamData("", {}, {});
653 } else { 642 } else {
654 - tos.object.replaceStreamData(  
655 - QUtil::file_provider(filename), uninitialized, uninitialized); 643 + tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {});
656 } 644 }
657 } else { 645 } else {
658 // Ignore unknown keys for forward compatibility. 646 // Ignore unknown keys for forward compatibility.
libqpdf/qpdf/JSON_writer.hh
@@ -32,10 +32,8 @@ class JSON::Writer @@ -32,10 +32,8 @@ class JSON::Writer
32 Writer& 32 Writer&
33 writeBase64(std::string_view sv) 33 writeBase64(std::string_view sv)
34 { 34 {
35 - Pl_Concatenate cat{"writer concat", p};  
36 - Pl_Base64 base{"writer base64", &cat, Pl_Base64::a_encode};  
37 - base.write(reinterpret_cast<unsigned char const*>(sv.data()), sv.size());  
38 - base.finish(); 35 + auto encoded = Pl_Base64::encode(sv);
  36 + p->write(reinterpret_cast<unsigned char const*>(encoded.data()), encoded.size());
39 return *this; 37 return *this;
40 } 38 }
41 39
libqpdf/qpdf/Pl_Base64.hh
@@ -12,10 +12,12 @@ class Pl_Base64 final: public Pipeline @@ -12,10 +12,12 @@ class Pl_Base64 final: public Pipeline
12 void write(unsigned char const* buf, size_t len) final; 12 void write(unsigned char const* buf, size_t len) final;
13 void finish() final; 13 void finish() final;
14 14
  15 + static std::string encode(std::string_view data);
  16 + static std::string decode(std::string_view data);
  17 +
15 private: 18 private:
16 - void decode(unsigned char const* buf, size_t len);  
17 - void encode(unsigned char const* buf, size_t len);  
18 - void flush(); 19 + void decode_internal(std::string_view data);
  20 + void encode_internal(std::string_view data);
19 void flush_decode(); 21 void flush_decode();
20 void flush_encode(); 22 void flush_encode();
21 void reset(); 23 void reset();
@@ -23,8 +25,9 @@ class Pl_Base64 final: public Pipeline @@ -23,8 +25,9 @@ class Pl_Base64 final: public Pipeline
23 action_e action; 25 action_e action;
24 unsigned char buf[4]{0, 0, 0, 0}; 26 unsigned char buf[4]{0, 0, 0, 0};
25 size_t pos{0}; 27 size_t pos{0};
  28 + std::string in_buffer;
  29 + std::string out_buffer;
26 bool end_of_data{false}; 30 bool end_of_data{false};
27 - bool finished{false};  
28 }; 31 };
29 32
30 #endif // PL_BASE64_HH 33 #endif // PL_BASE64_HH