diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 36fc942..5c9d0f1 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1498,10 +1498,7 @@ QPDFObjectHandle::parse( // the string. Buffer buf(const_cast(object_str)); auto input = BufferInputSource("parsed object", &buf); - qpdf::Tokenizer tokenizer; - bool empty = false; - auto result = QPDFParser(input, object_description, tokenizer, nullptr, context, false) - .parse(empty, false); + auto result = QPDFParser::parse(input, object_description, context); size_t offset = QIntC::to_size(input.tell()); while (offset < object_str.length()) { if (!isspace(object_str.at(offset))) { @@ -1621,7 +1618,6 @@ QPDFObjectHandle::parseContentStream_data( Tokenizer tokenizer; tokenizer.allowEOF(); auto sp_description = QPDFParser::make_description(description, "content"); - bool empty = false; while (QIntC::to_size(input.tell()) < stream_length) { // Read a token and seek to the beginning. The offset we get from this process is the // beginning of the next non-ignorable (space, comment) token. This way, the offset and @@ -1629,8 +1625,7 @@ QPDFObjectHandle::parseContentStream_data( tokenizer.nextToken(input, "content", true); qpdf_offset_t offset = input.getLastOffset(); input.seek(offset, SEEK_SET); - auto obj = - QPDFParser(input, sp_description, "content", tokenizer, context).parse(empty, true); + auto obj = QPDFParser::parse_content(input, sp_description, tokenizer, context); if (!obj) { // EOF break; @@ -1690,8 +1685,7 @@ QPDFObjectHandle::parse( StringDecrypter* decrypter, QPDF* context) { - return QPDFParser(*input, object_description, tokenizer, decrypter, context, false) - .parse(empty, false); + return QPDFParser::parse(*input, object_description, tokenizer, empty, decrypter, context); } qpdf_offset_t diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index fdb4827..05b9a35 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -15,6 +16,96 @@ using namespace std::literals; using ObjectPtr = std::shared_ptr; QPDFObjectHandle +QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) +{ + qpdf::Tokenizer tokenizer; + bool empty = false; + return QPDFParser( + input, + make_description(input.getName(), object_description), + object_description, + tokenizer, + nullptr, + context, + false) + .parse(empty, false); +} + +QPDFObjectHandle +QPDFParser::parse_content( + InputSource& input, + std::shared_ptr sp_description, + qpdf::Tokenizer& tokenizer, + QPDF* context) +{ + bool empty = false; + return QPDFParser( + input, std::move(sp_description), "content", tokenizer, nullptr, context, true) + .parse(empty, true); +} + +QPDFObjectHandle +QPDFParser::parse( + InputSource& input, + std::string const& object_description, + QPDFTokenizer& tokenizer, + bool& empty, + QPDFObjectHandle::StringDecrypter* decrypter, + QPDF* context) +{ + return QPDFParser( + input, + make_description(input.getName(), object_description), + object_description, + *tokenizer.m, + decrypter, + context, + false) + .parse(empty, false); +} + +std::pair +QPDFParser::parse( + InputSource& input, + std::string const& object_description, + qpdf::Tokenizer& tokenizer, + QPDFObjectHandle::StringDecrypter* decrypter, + QPDF& context) +{ + bool empty{false}; + auto result = QPDFParser( + input, + make_description(input.getName(), object_description), + object_description, + tokenizer, + decrypter, + &context, + true) + .parse(empty, false); + return {result, empty}; +} + +std::pair +QPDFParser::parse( + BufferInputSource& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) +{ + bool empty{false}; + auto result = QPDFParser( + input, + std::make_shared( + QPDFObject::ObjStreamDescr(stream_id, obj_id)), + "", + tokenizer, + nullptr, + &context, + true, + stream_id, + obj_id) + .parse(empty, false); + return {result, empty}; +} + +QPDFObjectHandle QPDFParser::parse(bool& empty, bool content_stream) { // This method must take care not to resolve any objects. Don't check the type of any object diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index 8142bd2..e434a5d 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -1154,9 +1154,7 @@ QPDFObjectHandle QPDF::readTrailer() { qpdf_offset_t offset = m->file->tell(); - bool empty = false; - auto object = - QPDFParser(*m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false); + auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. @@ -1174,13 +1172,11 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) { setLastObjectDescription(description, og); qpdf_offset_t offset = m->file->tell(); - bool empty = false; StringDecrypter decrypter{this, og}; StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; - auto object = - QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true) - .parse(empty, false); + auto [object, empty] = + QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. @@ -1294,10 +1290,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset QPDFObjectHandle QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) { - bool empty = false; - auto object = - QPDFParser(input, stream_id, obj_id, m->last_object_description, m->tokenizer, this) - .parse(empty, false); + auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index f422fda..2bc76ae 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -11,96 +11,70 @@ class QPDFParser { public: - QPDFParser() = delete; + static QPDFObjectHandle + parse(InputSource& input, std::string const& object_description, QPDF* context); - // This constructor is only used by QPDFObjectHandle::parse overload taking a QPDFTokenizer. - // ABI: remove when removing QPDFObjectHandle::parse overload. - QPDFParser( + static QPDFObjectHandle parse_content( + InputSource& input, + std::shared_ptr sp_description, + qpdf::Tokenizer& tokenizer, + QPDF* context); + + // For use by deprecated QPDFObjectHandle::parse. + static QPDFObjectHandle parse( InputSource& input, std::string const& object_description, QPDFTokenizer& tokenizer, + bool& empty, QPDFObjectHandle::StringDecrypter* decrypter, - QPDF* context, - bool parse_pdf) : - input(input), - object_description(object_description), - tokenizer(*tokenizer.m), - decrypter(decrypter), - context(context), - description(make_description(input.getName(), object_description)), - parse_pdf(parse_pdf) - { - } + QPDF* context); - QPDFParser( + // For use by QPDF. Return parsed object and whether it is empty. + static std::pair parse( InputSource& input, std::string const& object_description, qpdf::Tokenizer& tokenizer, QPDFObjectHandle::StringDecrypter* decrypter, - QPDF* context, - bool parse_pdf) : - input(input), - object_description(object_description), - tokenizer(tokenizer), - decrypter(decrypter), - context(context), - description(make_description(input.getName(), object_description)), - parse_pdf(parse_pdf) - { - } + QPDF& context); - // Used by parseContentStream_data only - QPDFParser( - InputSource& input, - std::shared_ptr sp_description, - std::string const& object_description, + static std::pair parse( + BufferInputSource& input, + int stream_id, + int obj_id, qpdf::Tokenizer& tokenizer, - QPDF* context) : - input(input), - object_description(object_description), - tokenizer(tokenizer), - decrypter(nullptr), - context(context), - description(std::move(sp_description)), - parse_pdf(true) + QPDF& context); + + static std::shared_ptr + make_description(std::string const& input_name, std::string const& object_description) { + using namespace std::literals; + return std::make_shared( + input_name + ", " + object_description + " at offset $PO"); } - // Used by readObjectInStream only + private: QPDFParser( InputSource& input, - int stream_id, - int obj_id, + std::shared_ptr sp_description, std::string const& object_description, qpdf::Tokenizer& tokenizer, - QPDF* context) : + QPDFObjectHandle::StringDecrypter* decrypter, + QPDF* context, + bool parse_pdf, + int stream_id = 0, + int obj_id = 0) : input(input), object_description(object_description), tokenizer(tokenizer), - decrypter(nullptr), + decrypter(decrypter), context(context), - description( - std::make_shared( - QPDFObject::ObjStreamDescr(stream_id, obj_id))), - parse_pdf(true), + description(std::move(sp_description)), + parse_pdf(parse_pdf), stream_id(stream_id), obj_id(obj_id) { } - ~QPDFParser() = default; - - QPDFObjectHandle parse(bool& empty, bool content_stream); - - static std::shared_ptr - make_description(std::string const& input_name, std::string const& object_description) - { - using namespace std::literals; - return std::make_shared( - input_name + ", " + object_description + " at offset $PO"); - } - - private: // Parser state. Note: // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; @@ -123,6 +97,7 @@ class QPDFParser int null_count{0}; }; + QPDFObjectHandle parse(bool& empty, bool content_stream); QPDFObjectHandle parseRemainder(bool content_stream); void add(std::shared_ptr&& obj); void addNull(); @@ -146,7 +121,7 @@ class QPDFParser QPDFObjectHandle::StringDecrypter* decrypter; QPDF* context; std::shared_ptr description; - bool parse_pdf; + bool parse_pdf{false}; int stream_id{0}; int obj_id{0};