From 05e52843bf8db42bd59ccba1553f61ebbd70c329 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sat, 26 Jul 2025 16:54:22 +0100 Subject: [PATCH] Refactor `Pl_Base64` to use `std::string` for output buffering and `std::string_view` for encoding/decoding, improving memory efficiency and simplifying buffer management --- libqpdf/Pl_Base64.cc | 56 ++++++++++++++++++++++++++++++++++++++------------------ libqpdf/qpdf/Pl_Base64.hh | 6 +++--- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/libqpdf/Pl_Base64.cc b/libqpdf/Pl_Base64.cc index fbdbef1..a56392e 100644 --- a/libqpdf/Pl_Base64.cc +++ b/libqpdf/Pl_Base64.cc @@ -1,3 +1,5 @@ +#include + #include #include @@ -43,9 +45,12 @@ Pl_Base64::write(unsigned char const* data, size_t len) } void -Pl_Base64::decode(unsigned char const* data, size_t len) +Pl_Base64::decode(std::string_view data) { - unsigned char const* p = data; + auto len = data.size(); + auto res = (len / 4u + 1u) * 3u; + out_buffer.reserve(res); + unsigned char const* p = reinterpret_cast(data.data()); while (len > 0) { if (!util::is_space(to_c(*p))) { buf[pos++] = *p; @@ -56,12 +61,28 @@ Pl_Base64::decode(unsigned char const* data, size_t len) ++p; --len; } + if (pos > 0) { + for (size_t i = pos; i < 4; ++i) { + buf[i] = '='; + } + flush_decode(); + } + qpdf_assert_debug(out_buffer.size() <= res); } void -Pl_Base64::encode(unsigned char const* data, size_t len) +Pl_Base64::encode(std::string_view data) { - unsigned char const* p = data; + auto len = data.size(); + static const size_t max_len = (std::string().max_size() / 4u - 1u) * 3u; + // Change to constexpr once AppImage is build with GCC >= 12 + if (len > max_len) { + throw std::length_error(getIdentifier() + ": base64 decode: data exceeds maximum length"); + } + + auto res = (len / 3u + 1u) * 4u; + out_buffer.reserve(res); + unsigned char const* p = reinterpret_cast(data.data()); while (len > 0) { buf[pos++] = *p; if (pos == 3) { @@ -70,6 +91,10 @@ Pl_Base64::encode(unsigned char const* data, size_t len) ++p; --len; } + if (pos > 0) { + flush_encode(); + } + qpdf_assert_debug(out_buffer.size() <= res); } void @@ -78,7 +103,7 @@ Pl_Base64::flush_decode() if (end_of_data) { throw std::runtime_error(getIdentifier() + ": base64 decode: data follows pad characters"); } - int pad = 0; + size_t pad = 0; int shift = 18; int outval = 0; for (size_t i = 0; i < 4; ++i) { @@ -110,7 +135,7 @@ Pl_Base64::flush_decode() to_uc(0xff & outval), }; - next()->write(out, QIntC::to_size(3 - pad)); + out_buffer.append(reinterpret_cast(out), 3u - pad); reset(); } @@ -144,7 +169,7 @@ Pl_Base64::flush_encode() for (size_t i = 0; i < 3 - pos; ++i) { out[3 - i] = '='; } - next()->write(out, 4); + out_buffer.append(reinterpret_cast(out), 4); reset(); } @@ -152,21 +177,16 @@ void Pl_Base64::finish() { if (action == a_decode) { - decode(reinterpret_cast(in_buffer.data()), in_buffer.size()); - if (pos > 0) { - for (size_t i = pos; i < 4; ++i) { - buf[i] = '='; - } - flush_decode(); - } + decode(in_buffer); + } else { - encode(reinterpret_cast(in_buffer.data()), in_buffer.size()); - if (pos > 0) { - flush_encode(); - } + encode(in_buffer); } in_buffer.clear(); in_buffer.shrink_to_fit(); + next()->write(reinterpret_cast(out_buffer.data()), out_buffer.size()); + out_buffer.clear(); + out_buffer.shrink_to_fit(); next()->finish(); } diff --git a/libqpdf/qpdf/Pl_Base64.hh b/libqpdf/qpdf/Pl_Base64.hh index 30cbff3..aeb6e59 100644 --- a/libqpdf/qpdf/Pl_Base64.hh +++ b/libqpdf/qpdf/Pl_Base64.hh @@ -13,8 +13,8 @@ class Pl_Base64 final: public Pipeline void finish() final; private: - void decode(unsigned char const* buf, size_t len); - void encode(unsigned char const* buf, size_t len); + void decode(std::string_view data); + void encode(std::string_view data); void flush_decode(); void flush_encode(); void reset(); @@ -23,8 +23,8 @@ class Pl_Base64 final: public Pipeline unsigned char buf[4]{0, 0, 0, 0}; size_t pos{0}; std::string in_buffer; + std::string out_buffer; bool end_of_data{false}; - bool finished{false}; }; #endif // PL_BASE64_HH -- libgit2 0.21.4