Commit 5eb9a18a863e9c2c462aee80af45ba2ea982585b
Committed by
GitHub
Merge pull request #1399 from m-holger/parse
Refactor calls to QPDFParser::parse
Showing
4 changed files
with
136 additions
and
83 deletions
libqpdf/QPDFObjectHandle.cc
| @@ -1498,10 +1498,7 @@ QPDFObjectHandle::parse( | @@ -1498,10 +1498,7 @@ QPDFObjectHandle::parse( | ||
| 1498 | // the string. | 1498 | // the string. |
| 1499 | Buffer buf(const_cast<std::string&>(object_str)); | 1499 | Buffer buf(const_cast<std::string&>(object_str)); |
| 1500 | auto input = BufferInputSource("parsed object", &buf); | 1500 | auto input = BufferInputSource("parsed object", &buf); |
| 1501 | - qpdf::Tokenizer tokenizer; | ||
| 1502 | - bool empty = false; | ||
| 1503 | - auto result = QPDFParser(input, object_description, tokenizer, nullptr, context, false) | ||
| 1504 | - .parse(empty, false); | 1501 | + auto result = QPDFParser::parse(input, object_description, context); |
| 1505 | size_t offset = QIntC::to_size(input.tell()); | 1502 | size_t offset = QIntC::to_size(input.tell()); |
| 1506 | while (offset < object_str.length()) { | 1503 | while (offset < object_str.length()) { |
| 1507 | if (!isspace(object_str.at(offset))) { | 1504 | if (!isspace(object_str.at(offset))) { |
| @@ -1621,7 +1618,6 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1621,7 +1618,6 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1621 | Tokenizer tokenizer; | 1618 | Tokenizer tokenizer; |
| 1622 | tokenizer.allowEOF(); | 1619 | tokenizer.allowEOF(); |
| 1623 | auto sp_description = QPDFParser::make_description(description, "content"); | 1620 | auto sp_description = QPDFParser::make_description(description, "content"); |
| 1624 | - bool empty = false; | ||
| 1625 | while (QIntC::to_size(input.tell()) < stream_length) { | 1621 | while (QIntC::to_size(input.tell()) < stream_length) { |
| 1626 | // Read a token and seek to the beginning. The offset we get from this process is the | 1622 | // Read a token and seek to the beginning. The offset we get from this process is the |
| 1627 | // beginning of the next non-ignorable (space, comment) token. This way, the offset and | 1623 | // beginning of the next non-ignorable (space, comment) token. This way, the offset and |
| @@ -1629,8 +1625,7 @@ QPDFObjectHandle::parseContentStream_data( | @@ -1629,8 +1625,7 @@ QPDFObjectHandle::parseContentStream_data( | ||
| 1629 | tokenizer.nextToken(input, "content", true); | 1625 | tokenizer.nextToken(input, "content", true); |
| 1630 | qpdf_offset_t offset = input.getLastOffset(); | 1626 | qpdf_offset_t offset = input.getLastOffset(); |
| 1631 | input.seek(offset, SEEK_SET); | 1627 | input.seek(offset, SEEK_SET); |
| 1632 | - auto obj = | ||
| 1633 | - QPDFParser(input, sp_description, "content", tokenizer, context).parse(empty, true); | 1628 | + auto obj = QPDFParser::parse_content(input, sp_description, tokenizer, context); |
| 1634 | if (!obj) { | 1629 | if (!obj) { |
| 1635 | // EOF | 1630 | // EOF |
| 1636 | break; | 1631 | break; |
| @@ -1690,8 +1685,7 @@ QPDFObjectHandle::parse( | @@ -1690,8 +1685,7 @@ QPDFObjectHandle::parse( | ||
| 1690 | StringDecrypter* decrypter, | 1685 | StringDecrypter* decrypter, |
| 1691 | QPDF* context) | 1686 | QPDF* context) |
| 1692 | { | 1687 | { |
| 1693 | - return QPDFParser(*input, object_description, tokenizer, decrypter, context, false) | ||
| 1694 | - .parse(empty, false); | 1688 | + return QPDFParser::parse(*input, object_description, tokenizer, empty, decrypter, context); |
| 1695 | } | 1689 | } |
| 1696 | 1690 | ||
| 1697 | qpdf_offset_t | 1691 | qpdf_offset_t |
libqpdf/QPDFParser.cc
| 1 | #include <qpdf/QPDFParser.hh> | 1 | #include <qpdf/QPDFParser.hh> |
| 2 | 2 | ||
| 3 | +#include <qpdf/BufferInputSource.hh> | ||
| 3 | #include <qpdf/QPDF.hh> | 4 | #include <qpdf/QPDF.hh> |
| 4 | #include <qpdf/QPDFObjGen.hh> | 5 | #include <qpdf/QPDFObjGen.hh> |
| 5 | #include <qpdf/QPDFObjectHandle.hh> | 6 | #include <qpdf/QPDFObjectHandle.hh> |
| @@ -15,6 +16,96 @@ using namespace std::literals; | @@ -15,6 +16,96 @@ using namespace std::literals; | ||
| 15 | using ObjectPtr = std::shared_ptr<QPDFObject>; | 16 | using ObjectPtr = std::shared_ptr<QPDFObject>; |
| 16 | 17 | ||
| 17 | QPDFObjectHandle | 18 | QPDFObjectHandle |
| 19 | +QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) | ||
| 20 | +{ | ||
| 21 | + qpdf::Tokenizer tokenizer; | ||
| 22 | + bool empty = false; | ||
| 23 | + return QPDFParser( | ||
| 24 | + input, | ||
| 25 | + make_description(input.getName(), object_description), | ||
| 26 | + object_description, | ||
| 27 | + tokenizer, | ||
| 28 | + nullptr, | ||
| 29 | + context, | ||
| 30 | + false) | ||
| 31 | + .parse(empty, false); | ||
| 32 | +} | ||
| 33 | + | ||
| 34 | +QPDFObjectHandle | ||
| 35 | +QPDFParser::parse_content( | ||
| 36 | + InputSource& input, | ||
| 37 | + std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 38 | + qpdf::Tokenizer& tokenizer, | ||
| 39 | + QPDF* context) | ||
| 40 | +{ | ||
| 41 | + bool empty = false; | ||
| 42 | + return QPDFParser( | ||
| 43 | + input, std::move(sp_description), "content", tokenizer, nullptr, context, true) | ||
| 44 | + .parse(empty, true); | ||
| 45 | +} | ||
| 46 | + | ||
| 47 | +QPDFObjectHandle | ||
| 48 | +QPDFParser::parse( | ||
| 49 | + InputSource& input, | ||
| 50 | + std::string const& object_description, | ||
| 51 | + QPDFTokenizer& tokenizer, | ||
| 52 | + bool& empty, | ||
| 53 | + QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 54 | + QPDF* context) | ||
| 55 | +{ | ||
| 56 | + return QPDFParser( | ||
| 57 | + input, | ||
| 58 | + make_description(input.getName(), object_description), | ||
| 59 | + object_description, | ||
| 60 | + *tokenizer.m, | ||
| 61 | + decrypter, | ||
| 62 | + context, | ||
| 63 | + false) | ||
| 64 | + .parse(empty, false); | ||
| 65 | +} | ||
| 66 | + | ||
| 67 | +std::pair<QPDFObjectHandle, bool> | ||
| 68 | +QPDFParser::parse( | ||
| 69 | + InputSource& input, | ||
| 70 | + std::string const& object_description, | ||
| 71 | + qpdf::Tokenizer& tokenizer, | ||
| 72 | + QPDFObjectHandle::StringDecrypter* decrypter, | ||
| 73 | + QPDF& context) | ||
| 74 | +{ | ||
| 75 | + bool empty{false}; | ||
| 76 | + auto result = QPDFParser( | ||
| 77 | + input, | ||
| 78 | + make_description(input.getName(), object_description), | ||
| 79 | + object_description, | ||
| 80 | + tokenizer, | ||
| 81 | + decrypter, | ||
| 82 | + &context, | ||
| 83 | + true) | ||
| 84 | + .parse(empty, false); | ||
| 85 | + return {result, empty}; | ||
| 86 | +} | ||
| 87 | + | ||
| 88 | +std::pair<QPDFObjectHandle, bool> | ||
| 89 | +QPDFParser::parse( | ||
| 90 | + BufferInputSource& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) | ||
| 91 | +{ | ||
| 92 | + bool empty{false}; | ||
| 93 | + auto result = QPDFParser( | ||
| 94 | + input, | ||
| 95 | + std::make_shared<QPDFObject::Description>( | ||
| 96 | + QPDFObject::ObjStreamDescr(stream_id, obj_id)), | ||
| 97 | + "", | ||
| 98 | + tokenizer, | ||
| 99 | + nullptr, | ||
| 100 | + &context, | ||
| 101 | + true, | ||
| 102 | + stream_id, | ||
| 103 | + obj_id) | ||
| 104 | + .parse(empty, false); | ||
| 105 | + return {result, empty}; | ||
| 106 | +} | ||
| 107 | + | ||
| 108 | +QPDFObjectHandle | ||
| 18 | QPDFParser::parse(bool& empty, bool content_stream) | 109 | QPDFParser::parse(bool& empty, bool content_stream) |
| 19 | { | 110 | { |
| 20 | // This method must take care not to resolve any objects. Don't check the type of any object | 111 | // This method must take care not to resolve any objects. Don't check the type of any object |
libqpdf/QPDF_objects.cc
| @@ -1154,9 +1154,7 @@ QPDFObjectHandle | @@ -1154,9 +1154,7 @@ QPDFObjectHandle | ||
| 1154 | QPDF::readTrailer() | 1154 | QPDF::readTrailer() |
| 1155 | { | 1155 | { |
| 1156 | qpdf_offset_t offset = m->file->tell(); | 1156 | qpdf_offset_t offset = m->file->tell(); |
| 1157 | - bool empty = false; | ||
| 1158 | - auto object = | ||
| 1159 | - QPDFParser(*m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false); | 1157 | + auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this); |
| 1160 | if (empty) { | 1158 | if (empty) { |
| 1161 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in | 1159 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1162 | // actual PDF files and Adobe Reader appears to ignore them. | 1160 | // actual PDF files and Adobe Reader appears to ignore them. |
| @@ -1174,13 +1172,11 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) | @@ -1174,13 +1172,11 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) | ||
| 1174 | { | 1172 | { |
| 1175 | setLastObjectDescription(description, og); | 1173 | setLastObjectDescription(description, og); |
| 1176 | qpdf_offset_t offset = m->file->tell(); | 1174 | qpdf_offset_t offset = m->file->tell(); |
| 1177 | - bool empty = false; | ||
| 1178 | 1175 | ||
| 1179 | StringDecrypter decrypter{this, og}; | 1176 | StringDecrypter decrypter{this, og}; |
| 1180 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; | 1177 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; |
| 1181 | - auto object = | ||
| 1182 | - QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true) | ||
| 1183 | - .parse(empty, false); | 1178 | + auto [object, empty] = |
| 1179 | + QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this); | ||
| 1184 | if (empty) { | 1180 | if (empty) { |
| 1185 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in | 1181 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1186 | // actual PDF files and Adobe Reader appears to ignore them. | 1182 | // actual PDF files and Adobe Reader appears to ignore them. |
| @@ -1294,10 +1290,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset | @@ -1294,10 +1290,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset | ||
| 1294 | QPDFObjectHandle | 1290 | QPDFObjectHandle |
| 1295 | QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) | 1291 | QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) |
| 1296 | { | 1292 | { |
| 1297 | - bool empty = false; | ||
| 1298 | - auto object = | ||
| 1299 | - QPDFParser(input, stream_id, obj_id, m->last_object_description, m->tokenizer, this) | ||
| 1300 | - .parse(empty, false); | 1293 | + auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this); |
| 1301 | if (empty) { | 1294 | if (empty) { |
| 1302 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in | 1295 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1303 | // actual PDF files and Adobe Reader appears to ignore them. | 1296 | // actual PDF files and Adobe Reader appears to ignore them. |
libqpdf/qpdf/QPDFParser.hh
| @@ -11,96 +11,70 @@ | @@ -11,96 +11,70 @@ | ||
| 11 | class QPDFParser | 11 | class QPDFParser |
| 12 | { | 12 | { |
| 13 | public: | 13 | public: |
| 14 | - QPDFParser() = delete; | 14 | + static QPDFObjectHandle |
| 15 | + parse(InputSource& input, std::string const& object_description, QPDF* context); | ||
| 15 | 16 | ||
| 16 | - // This constructor is only used by QPDFObjectHandle::parse overload taking a QPDFTokenizer. | ||
| 17 | - // ABI: remove when removing QPDFObjectHandle::parse overload. | ||
| 18 | - QPDFParser( | 17 | + static QPDFObjectHandle parse_content( |
| 18 | + InputSource& input, | ||
| 19 | + std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 20 | + qpdf::Tokenizer& tokenizer, | ||
| 21 | + QPDF* context); | ||
| 22 | + | ||
| 23 | + // For use by deprecated QPDFObjectHandle::parse. | ||
| 24 | + static QPDFObjectHandle parse( | ||
| 19 | InputSource& input, | 25 | InputSource& input, |
| 20 | std::string const& object_description, | 26 | std::string const& object_description, |
| 21 | QPDFTokenizer& tokenizer, | 27 | QPDFTokenizer& tokenizer, |
| 28 | + bool& empty, | ||
| 22 | QPDFObjectHandle::StringDecrypter* decrypter, | 29 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 23 | - QPDF* context, | ||
| 24 | - bool parse_pdf) : | ||
| 25 | - input(input), | ||
| 26 | - object_description(object_description), | ||
| 27 | - tokenizer(*tokenizer.m), | ||
| 28 | - decrypter(decrypter), | ||
| 29 | - context(context), | ||
| 30 | - description(make_description(input.getName(), object_description)), | ||
| 31 | - parse_pdf(parse_pdf) | ||
| 32 | - { | ||
| 33 | - } | 30 | + QPDF* context); |
| 34 | 31 | ||
| 35 | - QPDFParser( | 32 | + // For use by QPDF. Return parsed object and whether it is empty. |
| 33 | + static std::pair<QPDFObjectHandle, bool> parse( | ||
| 36 | InputSource& input, | 34 | InputSource& input, |
| 37 | std::string const& object_description, | 35 | std::string const& object_description, |
| 38 | qpdf::Tokenizer& tokenizer, | 36 | qpdf::Tokenizer& tokenizer, |
| 39 | QPDFObjectHandle::StringDecrypter* decrypter, | 37 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 40 | - QPDF* context, | ||
| 41 | - bool parse_pdf) : | ||
| 42 | - input(input), | ||
| 43 | - object_description(object_description), | ||
| 44 | - tokenizer(tokenizer), | ||
| 45 | - decrypter(decrypter), | ||
| 46 | - context(context), | ||
| 47 | - description(make_description(input.getName(), object_description)), | ||
| 48 | - parse_pdf(parse_pdf) | ||
| 49 | - { | ||
| 50 | - } | 38 | + QPDF& context); |
| 51 | 39 | ||
| 52 | - // Used by parseContentStream_data only | ||
| 53 | - QPDFParser( | ||
| 54 | - InputSource& input, | ||
| 55 | - std::shared_ptr<QPDFObject::Description> sp_description, | ||
| 56 | - std::string const& object_description, | 40 | + static std::pair<QPDFObjectHandle, bool> parse( |
| 41 | + BufferInputSource& input, | ||
| 42 | + int stream_id, | ||
| 43 | + int obj_id, | ||
| 57 | qpdf::Tokenizer& tokenizer, | 44 | qpdf::Tokenizer& tokenizer, |
| 58 | - QPDF* context) : | ||
| 59 | - input(input), | ||
| 60 | - object_description(object_description), | ||
| 61 | - tokenizer(tokenizer), | ||
| 62 | - decrypter(nullptr), | ||
| 63 | - context(context), | ||
| 64 | - description(std::move(sp_description)), | ||
| 65 | - parse_pdf(true) | 45 | + QPDF& context); |
| 46 | + | ||
| 47 | + static std::shared_ptr<QPDFObject::Description> | ||
| 48 | + make_description(std::string const& input_name, std::string const& object_description) | ||
| 66 | { | 49 | { |
| 50 | + using namespace std::literals; | ||
| 51 | + return std::make_shared<QPDFObject::Description>( | ||
| 52 | + input_name + ", " + object_description + " at offset $PO"); | ||
| 67 | } | 53 | } |
| 68 | 54 | ||
| 69 | - // Used by readObjectInStream only | 55 | + private: |
| 70 | QPDFParser( | 56 | QPDFParser( |
| 71 | InputSource& input, | 57 | InputSource& input, |
| 72 | - int stream_id, | ||
| 73 | - int obj_id, | 58 | + std::shared_ptr<QPDFObject::Description> sp_description, |
| 74 | std::string const& object_description, | 59 | std::string const& object_description, |
| 75 | qpdf::Tokenizer& tokenizer, | 60 | qpdf::Tokenizer& tokenizer, |
| 76 | - QPDF* context) : | 61 | + QPDFObjectHandle::StringDecrypter* decrypter, |
| 62 | + QPDF* context, | ||
| 63 | + bool parse_pdf, | ||
| 64 | + int stream_id = 0, | ||
| 65 | + int obj_id = 0) : | ||
| 77 | input(input), | 66 | input(input), |
| 78 | object_description(object_description), | 67 | object_description(object_description), |
| 79 | tokenizer(tokenizer), | 68 | tokenizer(tokenizer), |
| 80 | - decrypter(nullptr), | 69 | + decrypter(decrypter), |
| 81 | context(context), | 70 | context(context), |
| 82 | - description( | ||
| 83 | - std::make_shared<QPDFObject::Description>( | ||
| 84 | - QPDFObject::ObjStreamDescr(stream_id, obj_id))), | ||
| 85 | - parse_pdf(true), | 71 | + description(std::move(sp_description)), |
| 72 | + parse_pdf(parse_pdf), | ||
| 86 | stream_id(stream_id), | 73 | stream_id(stream_id), |
| 87 | obj_id(obj_id) | 74 | obj_id(obj_id) |
| 88 | { | 75 | { |
| 89 | } | 76 | } |
| 90 | 77 | ||
| 91 | - ~QPDFParser() = default; | ||
| 92 | - | ||
| 93 | - QPDFObjectHandle parse(bool& empty, bool content_stream); | ||
| 94 | - | ||
| 95 | - static std::shared_ptr<QPDFObject::Description> | ||
| 96 | - make_description(std::string const& input_name, std::string const& object_description) | ||
| 97 | - { | ||
| 98 | - using namespace std::literals; | ||
| 99 | - return std::make_shared<QPDFObject::Description>( | ||
| 100 | - input_name + ", " + object_description + " at offset $PO"); | ||
| 101 | - } | ||
| 102 | - | ||
| 103 | - private: | ||
| 104 | // Parser state. Note: | 78 | // Parser state. Note: |
| 105 | // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) | 79 | // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) |
| 106 | enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; | 80 | enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; |
| @@ -123,6 +97,7 @@ class QPDFParser | @@ -123,6 +97,7 @@ class QPDFParser | ||
| 123 | int null_count{0}; | 97 | int null_count{0}; |
| 124 | }; | 98 | }; |
| 125 | 99 | ||
| 100 | + QPDFObjectHandle parse(bool& empty, bool content_stream); | ||
| 126 | QPDFObjectHandle parseRemainder(bool content_stream); | 101 | QPDFObjectHandle parseRemainder(bool content_stream); |
| 127 | void add(std::shared_ptr<QPDFObject>&& obj); | 102 | void add(std::shared_ptr<QPDFObject>&& obj); |
| 128 | void addNull(); | 103 | void addNull(); |
| @@ -146,7 +121,7 @@ class QPDFParser | @@ -146,7 +121,7 @@ class QPDFParser | ||
| 146 | QPDFObjectHandle::StringDecrypter* decrypter; | 121 | QPDFObjectHandle::StringDecrypter* decrypter; |
| 147 | QPDF* context; | 122 | QPDF* context; |
| 148 | std::shared_ptr<QPDFObject::Description> description; | 123 | std::shared_ptr<QPDFObject::Description> description; |
| 149 | - bool parse_pdf; | 124 | + bool parse_pdf{false}; |
| 150 | int stream_id{0}; | 125 | int stream_id{0}; |
| 151 | int obj_id{0}; | 126 | int obj_id{0}; |
| 152 | 127 |