Commit 05e52843bf8db42bd59ccba1553f61ebbd70c329
1 parent
a686cc5f
Refactor `Pl_Base64` to use `std::string` for output buffering and `std::string_…
…view` for encoding/decoding, improving memory efficiency and simplifying buffer management
Showing
2 changed files
with
41 additions
and
21 deletions
libqpdf/Pl_Base64.cc
| 1 | +#include <qpdf/assert_debug.h> | |
| 2 | + | |
| 1 | 3 | #include <qpdf/Pl_Base64.hh> |
| 2 | 4 | |
| 3 | 5 | #include <qpdf/QIntC.hh> |
| ... | ... | @@ -43,9 +45,12 @@ Pl_Base64::write(unsigned char const* data, size_t len) |
| 43 | 45 | } |
| 44 | 46 | |
| 45 | 47 | void |
| 46 | -Pl_Base64::decode(unsigned char const* data, size_t len) | |
| 48 | +Pl_Base64::decode(std::string_view data) | |
| 47 | 49 | { |
| 48 | - unsigned char const* p = data; | |
| 50 | + auto len = data.size(); | |
| 51 | + auto res = (len / 4u + 1u) * 3u; | |
| 52 | + out_buffer.reserve(res); | |
| 53 | + unsigned char const* p = reinterpret_cast<const unsigned char*>(data.data()); | |
| 49 | 54 | while (len > 0) { |
| 50 | 55 | if (!util::is_space(to_c(*p))) { |
| 51 | 56 | buf[pos++] = *p; |
| ... | ... | @@ -56,12 +61,28 @@ Pl_Base64::decode(unsigned char const* data, size_t len) |
| 56 | 61 | ++p; |
| 57 | 62 | --len; |
| 58 | 63 | } |
| 64 | + if (pos > 0) { | |
| 65 | + for (size_t i = pos; i < 4; ++i) { | |
| 66 | + buf[i] = '='; | |
| 67 | + } | |
| 68 | + flush_decode(); | |
| 69 | + } | |
| 70 | + qpdf_assert_debug(out_buffer.size() <= res); | |
| 59 | 71 | } |
| 60 | 72 | |
| 61 | 73 | void |
| 62 | -Pl_Base64::encode(unsigned char const* data, size_t len) | |
| 74 | +Pl_Base64::encode(std::string_view data) | |
| 63 | 75 | { |
| 64 | - unsigned char const* p = data; | |
| 76 | + auto len = data.size(); | |
| 77 | + static const size_t max_len = (std::string().max_size() / 4u - 1u) * 3u; | |
| 78 | + // Change to constexpr once AppImage is build with GCC >= 12 | |
| 79 | + if (len > max_len) { | |
| 80 | + throw std::length_error(getIdentifier() + ": base64 decode: data exceeds maximum length"); | |
| 81 | + } | |
| 82 | + | |
| 83 | + auto res = (len / 3u + 1u) * 4u; | |
| 84 | + out_buffer.reserve(res); | |
| 85 | + unsigned char const* p = reinterpret_cast<const unsigned char*>(data.data()); | |
| 65 | 86 | while (len > 0) { |
| 66 | 87 | buf[pos++] = *p; |
| 67 | 88 | if (pos == 3) { |
| ... | ... | @@ -70,6 +91,10 @@ Pl_Base64::encode(unsigned char const* data, size_t len) |
| 70 | 91 | ++p; |
| 71 | 92 | --len; |
| 72 | 93 | } |
| 94 | + if (pos > 0) { | |
| 95 | + flush_encode(); | |
| 96 | + } | |
| 97 | + qpdf_assert_debug(out_buffer.size() <= res); | |
| 73 | 98 | } |
| 74 | 99 | |
| 75 | 100 | void |
| ... | ... | @@ -78,7 +103,7 @@ Pl_Base64::flush_decode() |
| 78 | 103 | if (end_of_data) { |
| 79 | 104 | throw std::runtime_error(getIdentifier() + ": base64 decode: data follows pad characters"); |
| 80 | 105 | } |
| 81 | - int pad = 0; | |
| 106 | + size_t pad = 0; | |
| 82 | 107 | int shift = 18; |
| 83 | 108 | int outval = 0; |
| 84 | 109 | for (size_t i = 0; i < 4; ++i) { |
| ... | ... | @@ -110,7 +135,7 @@ Pl_Base64::flush_decode() |
| 110 | 135 | to_uc(0xff & outval), |
| 111 | 136 | }; |
| 112 | 137 | |
| 113 | - next()->write(out, QIntC::to_size(3 - pad)); | |
| 138 | + out_buffer.append(reinterpret_cast<const char*>(out), 3u - pad); | |
| 114 | 139 | reset(); |
| 115 | 140 | } |
| 116 | 141 | |
| ... | ... | @@ -144,7 +169,7 @@ Pl_Base64::flush_encode() |
| 144 | 169 | for (size_t i = 0; i < 3 - pos; ++i) { |
| 145 | 170 | out[3 - i] = '='; |
| 146 | 171 | } |
| 147 | - next()->write(out, 4); | |
| 172 | + out_buffer.append(reinterpret_cast<const char*>(out), 4); | |
| 148 | 173 | reset(); |
| 149 | 174 | } |
| 150 | 175 | |
| ... | ... | @@ -152,21 +177,16 @@ void |
| 152 | 177 | Pl_Base64::finish() |
| 153 | 178 | { |
| 154 | 179 | if (action == a_decode) { |
| 155 | - decode(reinterpret_cast<unsigned char const*>(in_buffer.data()), in_buffer.size()); | |
| 156 | - if (pos > 0) { | |
| 157 | - for (size_t i = pos; i < 4; ++i) { | |
| 158 | - buf[i] = '='; | |
| 159 | - } | |
| 160 | - flush_decode(); | |
| 161 | - } | |
| 180 | + decode(in_buffer); | |
| 181 | + | |
| 162 | 182 | } else { |
| 163 | - encode(reinterpret_cast<unsigned char const*>(in_buffer.data()), in_buffer.size()); | |
| 164 | - if (pos > 0) { | |
| 165 | - flush_encode(); | |
| 166 | - } | |
| 183 | + encode(in_buffer); | |
| 167 | 184 | } |
| 168 | 185 | in_buffer.clear(); |
| 169 | 186 | in_buffer.shrink_to_fit(); |
| 187 | + next()->write(reinterpret_cast<unsigned char const*>(out_buffer.data()), out_buffer.size()); | |
| 188 | + out_buffer.clear(); | |
| 189 | + out_buffer.shrink_to_fit(); | |
| 170 | 190 | next()->finish(); |
| 171 | 191 | } |
| 172 | 192 | ... | ... |
libqpdf/qpdf/Pl_Base64.hh
| ... | ... | @@ -13,8 +13,8 @@ class Pl_Base64 final: public Pipeline |
| 13 | 13 | void finish() final; |
| 14 | 14 | |
| 15 | 15 | private: |
| 16 | - void decode(unsigned char const* buf, size_t len); | |
| 17 | - void encode(unsigned char const* buf, size_t len); | |
| 16 | + void decode(std::string_view data); | |
| 17 | + void encode(std::string_view data); | |
| 18 | 18 | void flush_decode(); |
| 19 | 19 | void flush_encode(); |
| 20 | 20 | void reset(); |
| ... | ... | @@ -23,8 +23,8 @@ class Pl_Base64 final: public Pipeline |
| 23 | 23 | unsigned char buf[4]{0, 0, 0, 0}; |
| 24 | 24 | size_t pos{0}; |
| 25 | 25 | std::string in_buffer; |
| 26 | + std::string out_buffer; | |
| 26 | 27 | bool end_of_data{false}; |
| 27 | - bool finished{false}; | |
| 28 | 28 | }; |
| 29 | 29 | |
| 30 | 30 | #endif // PL_BASE64_HH | ... | ... |