diff --git a/libqpdf/Pl_Base64.cc b/libqpdf/Pl_Base64.cc index c9edc42..68d3fa3 100644 --- a/libqpdf/Pl_Base64.cc +++ b/libqpdf/Pl_Base64.cc @@ -1,3 +1,5 @@ +#include + #include #include @@ -31,63 +33,81 @@ Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) : Pipeline(identifier, next), action(action) { - if (!next) { - throw std::logic_error("Attempt to create Pl_Base64 with nullptr as next"); - } } void Pl_Base64::write(unsigned char const* data, size_t len) { - if (finished) { - throw std::logic_error("Pl_Base64 used after finished"); - } - if (action == a_decode) { - decode(data, len); - } else { - encode(data, len); - } + in_buffer.append(reinterpret_cast(data), len); +} + +std::string +Pl_Base64::decode(std::string_view data) +{ + Pl_Base64 p("base64-decode", nullptr, a_decode); + p.decode_internal(data); + return std::move(p.out_buffer); +} + +std::string +Pl_Base64::encode(std::string_view data) +{ + Pl_Base64 p("base64-encode", nullptr, a_encode); + p.encode_internal(data); + return std::move(p.out_buffer); } void -Pl_Base64::decode(unsigned char const* data, size_t len) +Pl_Base64::decode_internal(std::string_view data) { - unsigned char const* p = data; + auto len = data.size(); + auto res = (len / 4u + 1u) * 3u; + out_buffer.reserve(res); + unsigned char const* p = reinterpret_cast(data.data()); while (len > 0) { if (!util::is_space(to_c(*p))) { buf[pos++] = *p; if (pos == 4) { - flush(); + flush_decode(); } } ++p; --len; } + if (pos > 0) { + for (size_t i = pos; i < 4; ++i) { + buf[i] = '='; + } + flush_decode(); + } + qpdf_assert_debug(out_buffer.size() <= res); } void -Pl_Base64::encode(unsigned char const* data, size_t len) +Pl_Base64::encode_internal(std::string_view data) { - unsigned char const* p = data; + auto len = data.size(); + static const size_t max_len = (std::string().max_size() / 4u - 1u) * 3u; + // Change to constexpr once AppImage is build with GCC >= 12 + if (len > max_len) { + throw std::length_error(getIdentifier() + ": base64 decode: data exceeds maximum length"); + } + + auto res = (len / 3u + 1u) * 4u; + out_buffer.reserve(res); + unsigned char const* p = reinterpret_cast(data.data()); while (len > 0) { buf[pos++] = *p; if (pos == 3) { - flush(); + flush_encode(); } ++p; --len; } -} - -void -Pl_Base64::flush() -{ - if (action == a_decode) { - flush_decode(); - } else { + if (pos > 0) { flush_encode(); } - reset(); + qpdf_assert_debug(out_buffer.size() <= res); } void @@ -96,7 +116,7 @@ Pl_Base64::flush_decode() if (end_of_data) { throw std::runtime_error(getIdentifier() + ": base64 decode: data follows pad characters"); } - int pad = 0; + size_t pad = 0; int shift = 18; int outval = 0; for (size_t i = 0; i < 4; ++i) { @@ -128,7 +148,8 @@ Pl_Base64::flush_decode() to_uc(0xff & outval), }; - next()->write(out, QIntC::to_size(3 - pad)); + out_buffer.append(reinterpret_cast(out), 3u - pad); + reset(); } void @@ -161,25 +182,27 @@ Pl_Base64::flush_encode() for (size_t i = 0; i < 3 - pos; ++i) { out[3 - i] = '='; } - next()->write(out, 4); + out_buffer.append(reinterpret_cast(out), 4); + reset(); } void Pl_Base64::finish() { - if (pos > 0) { - if (finished) { - throw std::logic_error("Pl_Base64 used after finished"); - } - if (action == a_decode) { - for (size_t i = pos; i < 4; ++i) { - buf[i] = '='; - } - } - flush(); + if (action == a_decode) { + decode_internal(in_buffer); + + } else { + encode_internal(in_buffer); + } + if (next()) { + in_buffer.clear(); + in_buffer.shrink_to_fit(); + next()->write(reinterpret_cast(out_buffer.data()), out_buffer.size()); + out_buffer.clear(); + out_buffer.shrink_to_fit(); + next()->finish(); } - finished = true; - next()->finish(); } void diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index a820f7b..e27a84b 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -215,20 +216,10 @@ static std::function provide_data(std::shared_ptr is, qpdf_offset_t start, qpdf_offset_t end) { return [is, start, end](Pipeline* p) { - Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode); - p = &decode; - size_t bytes = QIntC::to_size(end - start); - char buf[8192]; - is->seek(start, SEEK_SET); - size_t len = 0; - while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) { - p->write(buf, len); - bytes -= len; - if (bytes == 0) { - break; - } - } - decode.finish(); + auto data = is->read(QIntC::to_size(end - start), start); + data = Pl_Base64::decode(data); + p->write(reinterpret_cast(data.data()), data.size()); + p->finish(); }; } @@ -615,7 +606,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) if (!tos.object.isStream()) { throw std::logic_error("current object is not stream in st_stream"); } - auto uninitialized = QPDFObjectHandle(); if (key == "dict") { this->saw_dict = true; if (setNextStateIfDictionary("stream.dict", value, st_object)) { @@ -630,7 +620,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) if (!value.getString(v)) { QTC::TC("qpdf", "QPDF_json stream data not string"); error(value.getStart(), "\"stream.data\" must be a string"); - tos.object.replaceStreamData("", uninitialized, uninitialized); + tos.object.replaceStreamData("", {}, {}); } else { // The range includes the quotes. auto start = value.getStart() + 1; @@ -638,8 +628,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) if (end < start) { throw std::logic_error("QPDF_json: JSON string length < 0"); } - tos.object.replaceStreamData( - provide_data(is, start, end), uninitialized, uninitialized); + tos.object.replaceStreamData(provide_data(is, start, end), {}, {}); } } else if (key == "datafile") { this->saw_datafile = true; @@ -649,10 +638,9 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) error( value.getStart(), "\"stream.datafile\" must be a string containing a file name"); - tos.object.replaceStreamData("", uninitialized, uninitialized); + tos.object.replaceStreamData("", {}, {}); } else { - tos.object.replaceStreamData( - QUtil::file_provider(filename), uninitialized, uninitialized); + tos.object.replaceStreamData(QUtil::file_provider(filename), {}, {}); } } else { // Ignore unknown keys for forward compatibility. diff --git a/libqpdf/qpdf/JSON_writer.hh b/libqpdf/qpdf/JSON_writer.hh index 3f770c5..a77138e 100644 --- a/libqpdf/qpdf/JSON_writer.hh +++ b/libqpdf/qpdf/JSON_writer.hh @@ -32,10 +32,8 @@ class JSON::Writer Writer& writeBase64(std::string_view sv) { - Pl_Concatenate cat{"writer concat", p}; - Pl_Base64 base{"writer base64", &cat, Pl_Base64::a_encode}; - base.write(reinterpret_cast(sv.data()), sv.size()); - base.finish(); + auto encoded = Pl_Base64::encode(sv); + p->write(reinterpret_cast(encoded.data()), encoded.size()); return *this; } diff --git a/libqpdf/qpdf/Pl_Base64.hh b/libqpdf/qpdf/Pl_Base64.hh index 1abee88..7d6c47d 100644 --- a/libqpdf/qpdf/Pl_Base64.hh +++ b/libqpdf/qpdf/Pl_Base64.hh @@ -12,10 +12,12 @@ class Pl_Base64 final: public Pipeline void write(unsigned char const* buf, size_t len) final; void finish() final; + static std::string encode(std::string_view data); + static std::string decode(std::string_view data); + private: - void decode(unsigned char const* buf, size_t len); - void encode(unsigned char const* buf, size_t len); - void flush(); + void decode_internal(std::string_view data); + void encode_internal(std::string_view data); void flush_decode(); void flush_encode(); void reset(); @@ -23,8 +25,9 @@ class Pl_Base64 final: public Pipeline action_e action; unsigned char buf[4]{0, 0, 0, 0}; size_t pos{0}; + std::string in_buffer; + std::string out_buffer; bool end_of_data{false}; - bool finished{false}; }; #endif // PL_BASE64_HH