Commit 054690835338c31ded0f48f5020d2799d1cd7a9c
Committed by
GitHub
Merge pull request #1499 from m-holger/base64
Refactor Pl_Base64
Showing
4 changed files
with
82 additions
and
70 deletions
libqpdf/Pl_Base64.cc
| 1 | +#include <qpdf/assert_debug.h> | ||
| 2 | + | ||
| 1 | #include <qpdf/Pl_Base64.hh> | 3 | #include <qpdf/Pl_Base64.hh> |
| 2 | 4 | ||
| 3 | #include <qpdf/QIntC.hh> | 5 | #include <qpdf/QIntC.hh> |
| @@ -31,63 +33,81 @@ Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) : | @@ -31,63 +33,81 @@ Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) : | ||
| 31 | Pipeline(identifier, next), | 33 | Pipeline(identifier, next), |
| 32 | action(action) | 34 | action(action) |
| 33 | { | 35 | { |
| 34 | - if (!next) { | ||
| 35 | - throw std::logic_error("Attempt to create Pl_Base64 with nullptr as next"); | ||
| 36 | - } | ||
| 37 | } | 36 | } |
| 38 | 37 | ||
| 39 | void | 38 | void |
| 40 | Pl_Base64::write(unsigned char const* data, size_t len) | 39 | Pl_Base64::write(unsigned char const* data, size_t len) |
| 41 | { | 40 | { |
| 42 | - if (finished) { | ||
| 43 | - throw std::logic_error("Pl_Base64 used after finished"); | ||
| 44 | - } | ||
| 45 | - if (action == a_decode) { | ||
| 46 | - decode(data, len); | ||
| 47 | - } else { | ||
| 48 | - encode(data, len); | ||
| 49 | - } | 41 | + in_buffer.append(reinterpret_cast<const char*>(data), len); |
| 42 | +} | ||
| 43 | + | ||
| 44 | +std::string | ||
| 45 | +Pl_Base64::decode(std::string_view data) | ||
| 46 | +{ | ||
| 47 | + Pl_Base64 p("base64-decode", nullptr, a_decode); | ||
| 48 | + p.decode_internal(data); | ||
| 49 | + return std::move(p.out_buffer); | ||
| 50 | +} | ||
| 51 | + | ||
| 52 | +std::string | ||
| 53 | +Pl_Base64::encode(std::string_view data) | ||
| 54 | +{ | ||
| 55 | + Pl_Base64 p("base64-encode", nullptr, a_encode); | ||
| 56 | + p.encode_internal(data); | ||
| 57 | + return std::move(p.out_buffer); | ||
| 50 | } | 58 | } |
| 51 | 59 | ||
| 52 | void | 60 | void |
| 53 | -Pl_Base64::decode(unsigned char const* data, size_t len) | 61 | +Pl_Base64::decode_internal(std::string_view data) |
| 54 | { | 62 | { |
| 55 | - unsigned char const* p = data; | 63 | + auto len = data.size(); |
| 64 | + auto res = (len / 4u + 1u) * 3u; | ||
| 65 | + out_buffer.reserve(res); | ||
| 66 | + unsigned char const* p = reinterpret_cast<const unsigned char*>(data.data()); | ||
| 56 | while (len > 0) { | 67 | while (len > 0) { |
| 57 | if (!util::is_space(to_c(*p))) { | 68 | if (!util::is_space(to_c(*p))) { |
| 58 | buf[pos++] = *p; | 69 | buf[pos++] = *p; |
| 59 | if (pos == 4) { | 70 | if (pos == 4) { |
| 60 | - flush(); | 71 | + flush_decode(); |
| 61 | } | 72 | } |
| 62 | } | 73 | } |
| 63 | ++p; | 74 | ++p; |
| 64 | --len; | 75 | --len; |
| 65 | } | 76 | } |
| 77 | + if (pos > 0) { | ||
| 78 | + for (size_t i = pos; i < 4; ++i) { | ||
| 79 | + buf[i] = '='; | ||
| 80 | + } | ||
| 81 | + flush_decode(); | ||
| 82 | + } | ||
| 83 | + qpdf_assert_debug(out_buffer.size() <= res); | ||
| 66 | } | 84 | } |
| 67 | 85 | ||
| 68 | void | 86 | void |
| 69 | -Pl_Base64::encode(unsigned char const* data, size_t len) | 87 | +Pl_Base64::encode_internal(std::string_view data) |
| 70 | { | 88 | { |
| 71 | - unsigned char const* p = data; | 89 | + auto len = data.size(); |
| 90 | + static const size_t max_len = (std::string().max_size() / 4u - 1u) * 3u; | ||
| 91 | + // Change to constexpr once AppImage is build with GCC >= 12 | ||
| 92 | + if (len > max_len) { | ||
| 93 | + throw std::length_error(getIdentifier() + ": base64 decode: data exceeds maximum length"); | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + auto res = (len / 3u + 1u) * 4u; | ||
| 97 | + out_buffer.reserve(res); | ||
| 98 | + unsigned char const* p = reinterpret_cast<const unsigned char*>(data.data()); | ||
| 72 | while (len > 0) { | 99 | while (len > 0) { |
| 73 | buf[pos++] = *p; | 100 | buf[pos++] = *p; |
| 74 | if (pos == 3) { | 101 | if (pos == 3) { |
| 75 | - flush(); | 102 | + flush_encode(); |
| 76 | } | 103 | } |
| 77 | ++p; | 104 | ++p; |
| 78 | --len; | 105 | --len; |
| 79 | } | 106 | } |
| 80 | -} | ||
| 81 | - | ||
| 82 | -void | ||
| 83 | -Pl_Base64::flush() | ||
| 84 | -{ | ||
| 85 | - if (action == a_decode) { | ||
| 86 | - flush_decode(); | ||
| 87 | - } else { | 107 | + if (pos > 0) { |
| 88 | flush_encode(); | 108 | flush_encode(); |
| 89 | } | 109 | } |
| 90 | - reset(); | 110 | + qpdf_assert_debug(out_buffer.size() <= res); |
| 91 | } | 111 | } |
| 92 | 112 | ||
| 93 | void | 113 | void |
| @@ -96,7 +116,7 @@ Pl_Base64::flush_decode() | @@ -96,7 +116,7 @@ Pl_Base64::flush_decode() | ||
| 96 | if (end_of_data) { | 116 | if (end_of_data) { |
| 97 | throw std::runtime_error(getIdentifier() + ": base64 decode: data follows pad characters"); | 117 | throw std::runtime_error(getIdentifier() + ": base64 decode: data follows pad characters"); |
| 98 | } | 118 | } |
| 99 | - int pad = 0; | 119 | + size_t pad = 0; |
| 100 | int shift = 18; | 120 | int shift = 18; |
| 101 | int outval = 0; | 121 | int outval = 0; |
| 102 | for (size_t i = 0; i < 4; ++i) { | 122 | for (size_t i = 0; i < 4; ++i) { |
| @@ -128,7 +148,8 @@ Pl_Base64::flush_decode() | @@ -128,7 +148,8 @@ Pl_Base64::flush_decode() | ||
| 128 | to_uc(0xff & outval), | 148 | to_uc(0xff & outval), |
| 129 | }; | 149 | }; |
| 130 | 150 | ||
| 131 | - next()->write(out, QIntC::to_size(3 - pad)); | 151 | + out_buffer.append(reinterpret_cast<const char*>(out), 3u - pad); |
| 152 | + reset(); | ||
| 132 | } | 153 | } |
| 133 | 154 | ||
| 134 | void | 155 | void |
| @@ -161,25 +182,27 @@ Pl_Base64::flush_encode() | @@ -161,25 +182,27 @@ Pl_Base64::flush_encode() | ||
| 161 | for (size_t i = 0; i < 3 - pos; ++i) { | 182 | for (size_t i = 0; i < 3 - pos; ++i) { |
| 162 | out[3 - i] = '='; | 183 | out[3 - i] = '='; |
| 163 | } | 184 | } |
| 164 | - next()->write(out, 4); | 185 | + out_buffer.append(reinterpret_cast<const char*>(out), 4); |
| 186 | + reset(); | ||
| 165 | } | 187 | } |
| 166 | 188 | ||
| 167 | void | 189 | void |
| 168 | Pl_Base64::finish() | 190 | Pl_Base64::finish() |
| 169 | { | 191 | { |
| 170 | - if (pos > 0) { | ||
| 171 | - if (finished) { | ||
| 172 | - throw std::logic_error("Pl_Base64 used after finished"); | ||
| 173 | - } | ||
| 174 | - if (action == a_decode) { | ||
| 175 | - for (size_t i = pos; i < 4; ++i) { | ||
| 176 | - buf[i] = '='; | ||
| 177 | - } | ||
| 178 | - } | ||
| 179 | - flush(); | 192 | + if (action == a_decode) { |
| 193 | + decode_internal(in_buffer); | ||
| 194 | + | ||
| 195 | + } else { | ||
| 196 | + encode_internal(in_buffer); | ||
| 197 | + } | ||
| 198 | + if (next()) { | ||
| 199 | + in_buffer.clear(); | ||
| 200 | + in_buffer.shrink_to_fit(); | ||
| 201 | + next()->write(reinterpret_cast<unsigned char const*>(out_buffer.data()), out_buffer.size()); | ||
| 202 | + out_buffer.clear(); | ||
| 203 | + out_buffer.shrink_to_fit(); | ||
| 204 | + next()->finish(); | ||
| 180 | } | 205 | } |
| 181 | - finished = true; | ||
| 182 | - next()->finish(); | ||
| 183 | } | 206 | } |
| 184 | 207 | ||
| 185 | void | 208 | void |
libqpdf/QPDF_json.cc
| 1 | #include <qpdf/QPDF.hh> | 1 | #include <qpdf/QPDF.hh> |
| 2 | 2 | ||
| 3 | #include <qpdf/FileInputSource.hh> | 3 | #include <qpdf/FileInputSource.hh> |
| 4 | +#include <qpdf/InputSource_private.hh> | ||
| 4 | #include <qpdf/JSON_writer.hh> | 5 | #include <qpdf/JSON_writer.hh> |
| 5 | #include <qpdf/Pl_Base64.hh> | 6 | #include <qpdf/Pl_Base64.hh> |
| 6 | #include <qpdf/Pl_StdioFile.hh> | 7 | #include <qpdf/Pl_StdioFile.hh> |
| @@ -215,20 +216,10 @@ static std::function<void(Pipeline*)> | @@ -215,20 +216,10 @@ static std::function<void(Pipeline*)> | ||
| 215 | provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end) | 216 | provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end) |
| 216 | { | 217 | { |
| 217 | return [is, start, end](Pipeline* p) { | 218 | return [is, start, end](Pipeline* p) { |
| 218 | - Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode); | ||
| 219 | - p = &decode; | ||
| 220 | - size_t bytes = QIntC::to_size(end - start); | ||
| 221 | - char buf[8192]; | ||
| 222 | - is->seek(start, SEEK_SET); | ||
| 223 | - size_t len = 0; | ||
| 224 | - while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) { | ||
| 225 | - p->write(buf, len); | ||
| 226 | - bytes -= len; | ||
| 227 | - if (bytes == 0) { | ||
| 228 | - break; | ||
| 229 | - } | ||
| 230 | - } | ||
| 231 | - decode.finish(); | 219 | + auto data = is->read(QIntC::to_size(end - start), start); |
| 220 | + data = Pl_Base64::decode(data); | ||
| 221 | + p->write(reinterpret_cast<const unsigned char*>(data.data()), data.size()); | ||
| 222 | + p->finish(); | ||
| 232 | }; | 223 | }; |
| 233 | } | 224 | } |
| 234 | 225 | ||
| @@ -615,7 +606,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -615,7 +606,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 615 | if (!tos.object.isStream()) { | 606 | if (!tos.object.isStream()) { |
| 616 | throw std::logic_error("current object is not stream in st_stream"); | 607 | throw std::logic_error("current object is not stream in st_stream"); |
| 617 | } | 608 | } |
| 618 | - auto uninitialized = QPDFObjectHandle(); | ||
| 619 | if (key == "dict") { | 609 | if (key == "dict") { |
| 620 | this->saw_dict = true; | 610 | this->saw_dict = true; |
| 621 | if (setNextStateIfDictionary("stream.dict", value, st_object)) { | 611 | if (setNextStateIfDictionary("stream.dict", value, st_object)) { |
| @@ -630,7 +620,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -630,7 +620,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 630 | if (!value.getString(v)) { | 620 | if (!value.getString(v)) { |
| 631 | QTC::TC("qpdf", "QPDF_json stream data not string"); | 621 | QTC::TC("qpdf", "QPDF_json stream data not string"); |
| 632 | error(value.getStart(), "\"stream.data\" must be a string"); | 622 | error(value.getStart(), "\"stream.data\" must be a string"); |
| 633 | - tos.object.replaceStreamData("", uninitialized, uninitialized); | 623 | + tos.object.replaceStreamData("", {}, {}); |
| 634 | } else { | 624 | } else { |
| 635 | // The range includes the quotes. | 625 | // The range includes the quotes. |
| 636 | auto start = value.getStart() + 1; | 626 | auto start = value.getStart() + 1; |
| @@ -638,8 +628,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -638,8 +628,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 638 | if (end < start) { | 628 | if (end < start) { |
| 639 | throw std::logic_error("QPDF_json: JSON string length < 0"); | 629 | throw std::logic_error("QPDF_json: JSON string length < 0"); |
| 640 | } | 630 | } |
| 641 | - tos.object.replaceStreamData( | ||
| 642 | - provide_data(is, start, end), uninitialized, uninitialized); | 631 | + tos.object.replaceStreamData(provide_data(is, start, end), {}, {}); |
| 643 | } | 632 | } |
| 644 | } else if (key == "datafile") { | 633 | } else if (key == "datafile") { |
| 645 | this->saw_datafile = true; | 634 | this->saw_datafile = true; |
| @@ -649,10 +638,9 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | @@ -649,10 +638,9 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) | ||
| 649 | error( | 638 | error( |
| 650 | value.getStart(), | 639 | value.getStart(), |
| 651 | "\"stream.datafile\" must be a string containing a file name"); | 640 | "\"stream.datafile\" must be a string containing a file name"); |
| 652 | - tos.object.replaceStreamData("", uninitialized, uninitialized); | 641 | + tos.object.replaceStreamData("", {}, {}); |
| 653 | } else { | 642 | } else { |
| 654 | - tos.object.replaceStreamData( | ||
| 655 | - QUtil::file_provider(filename), uninitialized, uninitialized); | 643 | + tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {}); |
| 656 | } | 644 | } |
| 657 | } else { | 645 | } else { |
| 658 | // Ignore unknown keys for forward compatibility. | 646 | // Ignore unknown keys for forward compatibility. |
libqpdf/qpdf/JSON_writer.hh
| @@ -32,10 +32,8 @@ class JSON::Writer | @@ -32,10 +32,8 @@ class JSON::Writer | ||
| 32 | Writer& | 32 | Writer& |
| 33 | writeBase64(std::string_view sv) | 33 | writeBase64(std::string_view sv) |
| 34 | { | 34 | { |
| 35 | - Pl_Concatenate cat{"writer concat", p}; | ||
| 36 | - Pl_Base64 base{"writer base64", &cat, Pl_Base64::a_encode}; | ||
| 37 | - base.write(reinterpret_cast<unsigned char const*>(sv.data()), sv.size()); | ||
| 38 | - base.finish(); | 35 | + auto encoded = Pl_Base64::encode(sv); |
| 36 | + p->write(reinterpret_cast<unsigned char const*>(encoded.data()), encoded.size()); | ||
| 39 | return *this; | 37 | return *this; |
| 40 | } | 38 | } |
| 41 | 39 |
libqpdf/qpdf/Pl_Base64.hh
| @@ -12,10 +12,12 @@ class Pl_Base64 final: public Pipeline | @@ -12,10 +12,12 @@ class Pl_Base64 final: public Pipeline | ||
| 12 | void write(unsigned char const* buf, size_t len) final; | 12 | void write(unsigned char const* buf, size_t len) final; |
| 13 | void finish() final; | 13 | void finish() final; |
| 14 | 14 | ||
| 15 | + static std::string encode(std::string_view data); | ||
| 16 | + static std::string decode(std::string_view data); | ||
| 17 | + | ||
| 15 | private: | 18 | private: |
| 16 | - void decode(unsigned char const* buf, size_t len); | ||
| 17 | - void encode(unsigned char const* buf, size_t len); | ||
| 18 | - void flush(); | 19 | + void decode_internal(std::string_view data); |
| 20 | + void encode_internal(std::string_view data); | ||
| 19 | void flush_decode(); | 21 | void flush_decode(); |
| 20 | void flush_encode(); | 22 | void flush_encode(); |
| 21 | void reset(); | 23 | void reset(); |
| @@ -23,8 +25,9 @@ class Pl_Base64 final: public Pipeline | @@ -23,8 +25,9 @@ class Pl_Base64 final: public Pipeline | ||
| 23 | action_e action; | 25 | action_e action; |
| 24 | unsigned char buf[4]{0, 0, 0, 0}; | 26 | unsigned char buf[4]{0, 0, 0, 0}; |
| 25 | size_t pos{0}; | 27 | size_t pos{0}; |
| 28 | + std::string in_buffer; | ||
| 29 | + std::string out_buffer; | ||
| 26 | bool end_of_data{false}; | 30 | bool end_of_data{false}; |
| 27 | - bool finished{false}; | ||
| 28 | }; | 31 | }; |
| 29 | 32 | ||
| 30 | #endif // PL_BASE64_HH | 33 | #endif // PL_BASE64_HH |