diff --git a/include/qpdf/BufferInputSource.hh b/include/qpdf/BufferInputSource.hh index 33adc5a..90cdcdc 100644 --- a/include/qpdf/BufferInputSource.hh +++ b/include/qpdf/BufferInputSource.hh @@ -23,6 +23,8 @@ #include #include +#include + class QPDF_DLL_CLASS BufferInputSource: public InputSource { public: @@ -52,11 +54,17 @@ class QPDF_DLL_CLASS BufferInputSource: public InputSource void unreadCh(char ch) override; private: +#ifndef QPDF_FUTURE bool own_memory; std::string description; Buffer* buf; qpdf_offset_t cur_offset; qpdf_offset_t max_offset; +#else + class Members; + + std::unique_ptr m; +#endif }; #endif // QPDF_BUFFERINPUTSOURCE_HH diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 2fd40da..f1cc39f 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -95,7 +95,8 @@ class QPDF // Parse a PDF file loaded into a memory buffer. This works exactly like processFile except // that the PDF file is in memory instead of on disk. The description appears in any warning or - // error message in place of the file name. + // error message in place of the file name. The buffer is owned by the caller and must remain + // valid for the lifetime of the QPDF object. QPDF_DLL void processMemoryFile( char const* description, char const* buf, size_t length, char const* password = nullptr); diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 4a9aa57..6eaeedb 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -1351,7 +1351,7 @@ class QPDFObjectHandle: public qpdf::BaseHandle void setParsedOffset(qpdf_offset_t offset); void parseContentStream_internal(std::string const& description, ParserCallbacks* callbacks); static void parseContentStream_data( - std::shared_ptr, + std::string_view stream_data, std::string const& description, ParserCallbacks* callbacks, QPDF* context); diff --git a/libqpdf/BufferInputSource.cc b/libqpdf/BufferInputSource.cc index dae96f8..1ea3a4d 100644 --- a/libqpdf/BufferInputSource.cc +++ b/libqpdf/BufferInputSource.cc @@ -1,10 +1,17 @@ #include +#include +#include #include + #include #include #include +using namespace qpdf; + +#ifndef QPDF_FUTURE + BufferInputSource::BufferInputSource(std::string const& description, Buffer* buf, bool own_memory) : own_memory(own_memory), description(description), @@ -139,3 +146,166 @@ BufferInputSource::unreadCh(char ch) --cur_offset; } } + +#else + +class BufferInputSource::Members +{ + public: + Members(std::string const& description, Buffer* buf, bool own_memory) : + buf(own_memory ? buf : nullptr), + is(description, + buf && buf->getSize() > 0 + ? std::string_view(reinterpret_cast(buf->getBuffer()), buf->getSize()) + : std::string_view()) + { + } + + Members(std::string const& description, std::string const& str) : + content(str), + is(description, content) + { + } + + ~Members() = default; + + std::unique_ptr buf{nullptr}; + std::string content; + is::OffsetBuffer is; +}; + +BufferInputSource::BufferInputSource(std::string const& description, Buffer* buf, bool own_memory) : + m(std::make_unique(description, buf, own_memory)) +{ +} + +BufferInputSource::BufferInputSource(std::string const& description, std::string const& contents) : + m(std::make_unique(description, contents)) +{ +} +BufferInputSource::~BufferInputSource() = default; + +qpdf_offset_t +BufferInputSource::findAndSkipNextEOL() +{ + auto result = m->is.findAndSkipNextEOL(); + last_offset = m->is.getLastOffset(); + return result; +} +std::string const& +BufferInputSource::getName() const +{ + return m->is.getName(); +} +qpdf_offset_t +BufferInputSource::tell() +{ + return m->is.tell(); +} +void +BufferInputSource::seek(qpdf_offset_t offset, int whence) +{ + m->is.seek(offset, whence); +} +void +BufferInputSource::rewind() +{ + m->is.rewind(); +} +size_t +BufferInputSource::read(char* buffer, size_t length) +{ + auto result = m->is.read(buffer, length); + last_offset = m->is.getLastOffset(); + return result; +} +void +BufferInputSource::unreadCh(char ch) +{ + m->is.unreadCh(ch); +} + +#endif // QPDF_FUTURE + +qpdf_offset_t +is::OffsetBuffer::findAndSkipNextEOL() +{ + if (pos < 0) { + throw std::logic_error("INTERNAL ERROR: is::OffsetBuffer offset < 0"); + } + auto end_pos = static_cast(view_.size()); + if (pos >= end_pos) { + last_offset = end_pos + global_offset; + pos = end_pos; + return end_pos + global_offset; + } + + qpdf_offset_t result = 0; + auto buffer = view_.begin(); + auto end = view_.end(); + auto p = buffer + static_cast(pos); + + while (p < end && !(*p == '\r' || *p == '\n')) { + ++p; + } + if (p < end) { + result = p - buffer; + pos = result + 1; + ++p; + while (pos < end_pos && (*p == '\r' || *p == '\n')) { + ++p; + ++pos; + } + } else { + pos = end_pos; + result = end_pos; + } + return result + global_offset; +} + +void +is::OffsetBuffer::seek(qpdf_offset_t offset, int whence) +{ + switch (whence) { + case SEEK_SET: + pos = offset - global_offset; + break; + + case SEEK_END: + QIntC::range_check(static_cast(view_.size()), offset); + pos = static_cast(view_.size()) + offset; + break; + + case SEEK_CUR: + QIntC::range_check(pos, offset); + pos += offset; + break; + + default: + throw std::logic_error("INTERNAL ERROR: invalid argument to BufferInputSource::seek"); + break; + } + + if (pos < 0) { + throw std::runtime_error(description + ": seek before beginning of buffer"); + } +} + +size_t +is::OffsetBuffer::read(char* buffer, size_t length) +{ + if (pos < 0) { + throw std::logic_error("INTERNAL ERROR: is::OffsetBuffer offset < 0"); + } + auto end_pos = static_cast(view_.size()); + if (pos >= end_pos) { + last_offset = end_pos + global_offset; + return 0; + } + + last_offset = pos + global_offset; + size_t len = std::min(QIntC::to_size(end_pos - pos), length); + memcpy(buffer, view_.data() + pos, len); + pos += QIntC::to_offset(len); + return len; +} diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 852e53c..236babc 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -2,7 +2,7 @@ #include -#include +#include #include #include #include @@ -1348,7 +1348,7 @@ JSON::parse(InputSource& is, Reactor* reactor) JSON JSON::parse(std::string const& s) { - BufferInputSource bis("json input", s); + is::OffsetBuffer bis("json input", s); JSONParser jp(bis, nullptr); return jp.parse(); } diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc index b60ab78..a41612b 100644 --- a/libqpdf/Pl_QPDFTokenizer.cc +++ b/libqpdf/Pl_QPDFTokenizer.cc @@ -1,9 +1,13 @@ #include -#include +#include +#include #include + #include +using namespace qpdf; + class Pl_QPDFTokenizer::Members { public: @@ -13,7 +17,8 @@ class Pl_QPDFTokenizer::Members QPDFObjectHandle::TokenFilter* filter{nullptr}; QPDFTokenizer tokenizer; - Pl_Buffer buf{"tokenizer buffer"}; + std::string buffer; + pl::String buf{"pl_tokenizer", nullptr, buffer}; }; Pl_QPDFTokenizer::Pl_QPDFTokenizer( @@ -39,8 +44,7 @@ Pl_QPDFTokenizer::write(unsigned char const* data, size_t len) void Pl_QPDFTokenizer::finish() { - m->buf.finish(); - auto input = BufferInputSource("tokenizer data", m->buf.getBuffer(), true); + auto input = is::OffsetBuffer("tokenizer data", m->buffer); std::string empty; while (true) { auto token = m->tokenizer.readToken(input, empty, true); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index a0c2054..cf39b61 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include #include @@ -259,12 +258,8 @@ void QPDF::processMemoryFile( char const* description, char const* buf, size_t length, char const* password) { - processInputSource( - std::shared_ptr( - // line-break - new BufferInputSource( - description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)), - password); + auto is = std::make_shared(description, std::string_view{buf, length}); + processInputSource(is, password); } void diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 440e3e5..b8d8639 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -2,7 +2,6 @@ #include -#include #include #include #include @@ -1452,10 +1451,7 @@ QPDFObjectHandle QPDFObjectHandle::parse( QPDF* context, std::string const& object_str, std::string const& object_description) { - // BufferInputSource does not modify the input, but Buffer either requires a string& or copies - // the string. - Buffer buf(const_cast(object_str)); - auto input = BufferInputSource("parsed object", &buf); + auto input = is::OffsetBuffer("parsed object", object_str); auto result = QPDFParser::parse(input, object_description, context); size_t offset = QIntC::to_size(input.tell()); while (offset < object_str.length()) { @@ -1549,11 +1545,11 @@ void QPDFObjectHandle::parseContentStream_internal( std::string const& description, ParserCallbacks* callbacks) { - Pl_Buffer buf("concatenated stream data buffer"); + std::string stream_data; + pl::String buf(stream_data); std::string all_description; pipeContentStreams(&buf, description, all_description); - auto stream_data = buf.getBufferSharedPointer(); - callbacks->contentSize(stream_data->getSize()); + callbacks->contentSize(stream_data.size()); try { parseContentStream_data(stream_data, all_description, callbacks, getOwningQPDF()); } catch (TerminateParsing&) { @@ -1564,13 +1560,13 @@ QPDFObjectHandle::parseContentStream_internal( void QPDFObjectHandle::parseContentStream_data( - std::shared_ptr stream_data, + std::string_view stream_data, std::string const& description, ParserCallbacks* callbacks, QPDF* context) { - size_t stream_length = stream_data->getSize(); - auto input = BufferInputSource(description, stream_data.get()); + size_t stream_length = stream_data.size(); + auto input = is::OffsetBuffer(description, stream_data); Tokenizer tokenizer; tokenizer.allowEOF(); auto sp_description = QPDFParser::make_description(description, "content"); diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 942e623..f671f64 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -1,6 +1,5 @@ #include -#include #include #include #include diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index b71a439..e6dd873 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -319,10 +320,11 @@ qpdf::Stream::setDictDescription() } } -std::shared_ptr +std::string Stream::getStreamData(qpdf_stream_decode_level_e decode_level) { - Pl_Buffer buf("stream data buffer"); + std::string result; + pl::String buf(result); bool filtered; pipeStreamData(&buf, &filtered, 0, decode_level, false, false); if (!filtered) { @@ -334,13 +336,14 @@ Stream::getStreamData(qpdf_stream_decode_level_e decode_level) "getStreamData called on unfilterable stream"); } QTC::TC("qpdf", "QPDF_Stream getStreamData"); - return buf.getBufferSharedPointer(); + return result; } -std::shared_ptr +std::string Stream::getRawStreamData() { - Pl_Buffer buf("stream data buffer"); + std::string result; + pl::String buf(result); if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) { throw QPDFExc( qpdf_e_unsupported, @@ -350,7 +353,7 @@ Stream::getRawStreamData() "error getting raw stream data"); } QTC::TC("qpdf", "QPDF_Stream getRawStreamData"); - return buf.getBufferSharedPointer(); + return result; } bool @@ -683,13 +686,13 @@ QPDFObjectHandle::isRootMetadata() const std::shared_ptr QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) { - return as_stream(error).getStreamData(level); + return std::make_shared(as_stream(error).getStreamData(level)); } std::shared_ptr QPDFObjectHandle::getRawStreamData() { - return as_stream(error).getRawStreamData(); + return std::make_shared(as_stream(error).getRawStreamData()); } bool diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index 4591677..b2a352d 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -2,19 +2,7 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include #include -#include #include #include #include @@ -25,6 +13,13 @@ #include #include +#include +#include +#include +#include +#include +#include + using namespace qpdf; using namespace std::literals; @@ -1676,13 +1671,13 @@ QPDF::resolveObjectsInStream(int obj_stream_number) // id, offset, size std::vector> offsets; - auto bp = obj_stream.getStreamData(qpdf_dl_specialized); + auto stream_data = obj_stream.getStreamData(qpdf_dl_specialized); - BufferInputSource input("", bp.get()); + is::OffsetBuffer input("", stream_data); - const auto b_size = bp->getSize(); + const auto b_size = stream_data.size(); const auto end_offset = static_cast(b_size); - auto b_start = bp->getBuffer(); + auto b_start = stream_data.data(); if (first >= end_offset) { throw damagedPDF( @@ -1763,8 +1758,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) auto entry = m->xref_table.find(og); if (entry != m->xref_table.end() && entry->second.getType() == 2 && entry->second.getObjStreamNumber() == obj_stream_number) { - Buffer obj_buffer{b_start + obj_offset, obj_size}; - is::OffsetBuffer in("", &obj_buffer, obj_offset); + is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); auto oh = readObjectInStream(in, obj_stream_number, obj_id); updateCache(og, oh.getObj(), end_before_space, end_after_space); } else { diff --git a/libqpdf/qpdf/InputSource_private.hh b/libqpdf/qpdf/InputSource_private.hh index 00d27c9..7732ea1 100644 --- a/libqpdf/qpdf/InputSource_private.hh +++ b/libqpdf/qpdf/InputSource_private.hh @@ -1,7 +1,7 @@ #ifndef QPDF_INPUTSOURCE_PRIVATE_HH #define QPDF_INPUTSOURCE_PRIVATE_HH -#include +#include #include #include @@ -13,8 +13,12 @@ namespace qpdf::is class OffsetBuffer final: public InputSource { public: - OffsetBuffer(std::string const& description, Buffer* buf, qpdf_offset_t global_offset) : - proxied(description, buf), + OffsetBuffer( + std::string const& description, + std::string_view view, + qpdf_offset_t global_offset = 0) : + description(description), + view_(view), global_offset(global_offset) { if (global_offset < 0) { @@ -23,58 +27,55 @@ namespace qpdf::is last_offset = global_offset; } - ~OffsetBuffer() final = default; - - qpdf_offset_t - findAndSkipNextEOL() final + OffsetBuffer(std::string const& description, Buffer* buf, qpdf_offset_t global_offset = 0) : + OffsetBuffer( + description, + {buf && buf->getSize() + ? std::string_view( + reinterpret_cast(buf->getBuffer()), buf->getSize()) + : std::string_view()}, + global_offset) { - return proxied.findAndSkipNextEOL() + global_offset; } + ~OffsetBuffer() final = default; + + qpdf_offset_t findAndSkipNextEOL() final; + std::string const& getName() const final { - return proxied.getName(); + return description; } qpdf_offset_t tell() final { - return proxied.tell() + global_offset; + return pos + global_offset; } - void - seek(qpdf_offset_t offset, int whence) final - { - if (whence == SEEK_SET) { - proxied.seek(offset - global_offset, whence); - } else { - proxied.seek(offset, whence); - } - } + void seek(qpdf_offset_t offset, int whence) final; void rewind() final { - seek(0, SEEK_SET); + pos = 0; } - size_t - read(char* buffer, size_t length) final - { - size_t result = proxied.read(buffer, length); - setLastOffset(proxied.getLastOffset() + global_offset); - return result; - } + size_t read(char* buffer, size_t length) final; void unreadCh(char ch) final { - proxied.unreadCh(ch); + if (pos > 0) { + --pos; + } } private: - BufferInputSource proxied; + std::string description; + qpdf_offset_t pos{0}; + std::string_view view_; qpdf_offset_t global_offset; }; diff --git a/libqpdf/qpdf/QPDFObjectHandle_private.hh b/libqpdf/qpdf/QPDFObjectHandle_private.hh index ddba62f..c73ea1b 100644 --- a/libqpdf/qpdf/QPDFObjectHandle_private.hh +++ b/libqpdf/qpdf/QPDFObjectHandle_private.hh @@ -254,8 +254,8 @@ namespace qpdf qpdf_stream_decode_level_e decode_level, bool suppress_warnings, bool will_retry); - std::shared_ptr getStreamData(qpdf_stream_decode_level_e level); - std::shared_ptr getRawStreamData(); + std::string getStreamData(qpdf_stream_decode_level_e level); + std::string getRawStreamData(); void replaceStreamData( std::shared_ptr data, QPDFObjectHandle const& filter,