Commit 5eb9a18a863e9c2c462aee80af45ba2ea982585b
Committed by
GitHub
Merge pull request #1399 from m-holger/parse
Refactor calls to QPDFParser::parse
Showing
4 changed files
with
136 additions
and
83 deletions
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -1498,10 +1498,7 @@ QPDFObjectHandle::parse( |
| 1498 | 1498 | // the string. |
| 1499 | 1499 | Buffer buf(const_cast<std::string&>(object_str)); |
| 1500 | 1500 | auto input = BufferInputSource("parsed object", &buf); |
| 1501 | - qpdf::Tokenizer tokenizer; | |
| 1502 | - bool empty = false; | |
| 1503 | - auto result = QPDFParser(input, object_description, tokenizer, nullptr, context, false) | |
| 1504 | - .parse(empty, false); | |
| 1501 | + auto result = QPDFParser::parse(input, object_description, context); | |
| 1505 | 1502 | size_t offset = QIntC::to_size(input.tell()); |
| 1506 | 1503 | while (offset < object_str.length()) { |
| 1507 | 1504 | if (!isspace(object_str.at(offset))) { |
| ... | ... | @@ -1621,7 +1618,6 @@ QPDFObjectHandle::parseContentStream_data( |
| 1621 | 1618 | Tokenizer tokenizer; |
| 1622 | 1619 | tokenizer.allowEOF(); |
| 1623 | 1620 | auto sp_description = QPDFParser::make_description(description, "content"); |
| 1624 | - bool empty = false; | |
| 1625 | 1621 | while (QIntC::to_size(input.tell()) < stream_length) { |
| 1626 | 1622 | // Read a token and seek to the beginning. The offset we get from this process is the |
| 1627 | 1623 | // beginning of the next non-ignorable (space, comment) token. This way, the offset and |
| ... | ... | @@ -1629,8 +1625,7 @@ QPDFObjectHandle::parseContentStream_data( |
| 1629 | 1625 | tokenizer.nextToken(input, "content", true); |
| 1630 | 1626 | qpdf_offset_t offset = input.getLastOffset(); |
| 1631 | 1627 | input.seek(offset, SEEK_SET); |
| 1632 | - auto obj = | |
| 1633 | - QPDFParser(input, sp_description, "content", tokenizer, context).parse(empty, true); | |
| 1628 | + auto obj = QPDFParser::parse_content(input, sp_description, tokenizer, context); | |
| 1634 | 1629 | if (!obj) { |
| 1635 | 1630 | // EOF |
| 1636 | 1631 | break; |
| ... | ... | @@ -1690,8 +1685,7 @@ QPDFObjectHandle::parse( |
| 1690 | 1685 | StringDecrypter* decrypter, |
| 1691 | 1686 | QPDF* context) |
| 1692 | 1687 | { |
| 1693 | - return QPDFParser(*input, object_description, tokenizer, decrypter, context, false) | |
| 1694 | - .parse(empty, false); | |
| 1688 | + return QPDFParser::parse(*input, object_description, tokenizer, empty, decrypter, context); | |
| 1695 | 1689 | } |
| 1696 | 1690 | |
| 1697 | 1691 | qpdf_offset_t | ... | ... |
libqpdf/QPDFParser.cc
| 1 | 1 | #include <qpdf/QPDFParser.hh> |
| 2 | 2 | |
| 3 | +#include <qpdf/BufferInputSource.hh> | |
| 3 | 4 | #include <qpdf/QPDF.hh> |
| 4 | 5 | #include <qpdf/QPDFObjGen.hh> |
| 5 | 6 | #include <qpdf/QPDFObjectHandle.hh> |
| ... | ... | @@ -15,6 +16,96 @@ using namespace std::literals; |
| 15 | 16 | using ObjectPtr = std::shared_ptr<QPDFObject>; |
| 16 | 17 | |
| 17 | 18 | QPDFObjectHandle |
| 19 | +QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context) | |
| 20 | +{ | |
| 21 | + qpdf::Tokenizer tokenizer; | |
| 22 | + bool empty = false; | |
| 23 | + return QPDFParser( | |
| 24 | + input, | |
| 25 | + make_description(input.getName(), object_description), | |
| 26 | + object_description, | |
| 27 | + tokenizer, | |
| 28 | + nullptr, | |
| 29 | + context, | |
| 30 | + false) | |
| 31 | + .parse(empty, false); | |
| 32 | +} | |
| 33 | + | |
| 34 | +QPDFObjectHandle | |
| 35 | +QPDFParser::parse_content( | |
| 36 | + InputSource& input, | |
| 37 | + std::shared_ptr<QPDFObject::Description> sp_description, | |
| 38 | + qpdf::Tokenizer& tokenizer, | |
| 39 | + QPDF* context) | |
| 40 | +{ | |
| 41 | + bool empty = false; | |
| 42 | + return QPDFParser( | |
| 43 | + input, std::move(sp_description), "content", tokenizer, nullptr, context, true) | |
| 44 | + .parse(empty, true); | |
| 45 | +} | |
| 46 | + | |
| 47 | +QPDFObjectHandle | |
| 48 | +QPDFParser::parse( | |
| 49 | + InputSource& input, | |
| 50 | + std::string const& object_description, | |
| 51 | + QPDFTokenizer& tokenizer, | |
| 52 | + bool& empty, | |
| 53 | + QPDFObjectHandle::StringDecrypter* decrypter, | |
| 54 | + QPDF* context) | |
| 55 | +{ | |
| 56 | + return QPDFParser( | |
| 57 | + input, | |
| 58 | + make_description(input.getName(), object_description), | |
| 59 | + object_description, | |
| 60 | + *tokenizer.m, | |
| 61 | + decrypter, | |
| 62 | + context, | |
| 63 | + false) | |
| 64 | + .parse(empty, false); | |
| 65 | +} | |
| 66 | + | |
| 67 | +std::pair<QPDFObjectHandle, bool> | |
| 68 | +QPDFParser::parse( | |
| 69 | + InputSource& input, | |
| 70 | + std::string const& object_description, | |
| 71 | + qpdf::Tokenizer& tokenizer, | |
| 72 | + QPDFObjectHandle::StringDecrypter* decrypter, | |
| 73 | + QPDF& context) | |
| 74 | +{ | |
| 75 | + bool empty{false}; | |
| 76 | + auto result = QPDFParser( | |
| 77 | + input, | |
| 78 | + make_description(input.getName(), object_description), | |
| 79 | + object_description, | |
| 80 | + tokenizer, | |
| 81 | + decrypter, | |
| 82 | + &context, | |
| 83 | + true) | |
| 84 | + .parse(empty, false); | |
| 85 | + return {result, empty}; | |
| 86 | +} | |
| 87 | + | |
| 88 | +std::pair<QPDFObjectHandle, bool> | |
| 89 | +QPDFParser::parse( | |
| 90 | + BufferInputSource& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context) | |
| 91 | +{ | |
| 92 | + bool empty{false}; | |
| 93 | + auto result = QPDFParser( | |
| 94 | + input, | |
| 95 | + std::make_shared<QPDFObject::Description>( | |
| 96 | + QPDFObject::ObjStreamDescr(stream_id, obj_id)), | |
| 97 | + "", | |
| 98 | + tokenizer, | |
| 99 | + nullptr, | |
| 100 | + &context, | |
| 101 | + true, | |
| 102 | + stream_id, | |
| 103 | + obj_id) | |
| 104 | + .parse(empty, false); | |
| 105 | + return {result, empty}; | |
| 106 | +} | |
| 107 | + | |
| 108 | +QPDFObjectHandle | |
| 18 | 109 | QPDFParser::parse(bool& empty, bool content_stream) |
| 19 | 110 | { |
| 20 | 111 | // This method must take care not to resolve any objects. Don't check the type of any object | ... | ... |
libqpdf/QPDF_objects.cc
| ... | ... | @@ -1154,9 +1154,7 @@ QPDFObjectHandle |
| 1154 | 1154 | QPDF::readTrailer() |
| 1155 | 1155 | { |
| 1156 | 1156 | qpdf_offset_t offset = m->file->tell(); |
| 1157 | - bool empty = false; | |
| 1158 | - auto object = | |
| 1159 | - QPDFParser(*m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false); | |
| 1157 | + auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this); | |
| 1160 | 1158 | if (empty) { |
| 1161 | 1159 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1162 | 1160 | // actual PDF files and Adobe Reader appears to ignore them. |
| ... | ... | @@ -1174,13 +1172,11 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) |
| 1174 | 1172 | { |
| 1175 | 1173 | setLastObjectDescription(description, og); |
| 1176 | 1174 | qpdf_offset_t offset = m->file->tell(); |
| 1177 | - bool empty = false; | |
| 1178 | 1175 | |
| 1179 | 1176 | StringDecrypter decrypter{this, og}; |
| 1180 | 1177 | StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; |
| 1181 | - auto object = | |
| 1182 | - QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true) | |
| 1183 | - .parse(empty, false); | |
| 1178 | + auto [object, empty] = | |
| 1179 | + QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this); | |
| 1184 | 1180 | if (empty) { |
| 1185 | 1181 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1186 | 1182 | // actual PDF files and Adobe Reader appears to ignore them. |
| ... | ... | @@ -1294,10 +1290,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset |
| 1294 | 1290 | QPDFObjectHandle |
| 1295 | 1291 | QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) |
| 1296 | 1292 | { |
| 1297 | - bool empty = false; | |
| 1298 | - auto object = | |
| 1299 | - QPDFParser(input, stream_id, obj_id, m->last_object_description, m->tokenizer, this) | |
| 1300 | - .parse(empty, false); | |
| 1293 | + auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this); | |
| 1301 | 1294 | if (empty) { |
| 1302 | 1295 | // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in |
| 1303 | 1296 | // actual PDF files and Adobe Reader appears to ignore them. | ... | ... |
libqpdf/qpdf/QPDFParser.hh
| ... | ... | @@ -11,96 +11,70 @@ |
| 11 | 11 | class QPDFParser |
| 12 | 12 | { |
| 13 | 13 | public: |
| 14 | - QPDFParser() = delete; | |
| 14 | + static QPDFObjectHandle | |
| 15 | + parse(InputSource& input, std::string const& object_description, QPDF* context); | |
| 15 | 16 | |
| 16 | - // This constructor is only used by QPDFObjectHandle::parse overload taking a QPDFTokenizer. | |
| 17 | - // ABI: remove when removing QPDFObjectHandle::parse overload. | |
| 18 | - QPDFParser( | |
| 17 | + static QPDFObjectHandle parse_content( | |
| 18 | + InputSource& input, | |
| 19 | + std::shared_ptr<QPDFObject::Description> sp_description, | |
| 20 | + qpdf::Tokenizer& tokenizer, | |
| 21 | + QPDF* context); | |
| 22 | + | |
| 23 | + // For use by deprecated QPDFObjectHandle::parse. | |
| 24 | + static QPDFObjectHandle parse( | |
| 19 | 25 | InputSource& input, |
| 20 | 26 | std::string const& object_description, |
| 21 | 27 | QPDFTokenizer& tokenizer, |
| 28 | + bool& empty, | |
| 22 | 29 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 23 | - QPDF* context, | |
| 24 | - bool parse_pdf) : | |
| 25 | - input(input), | |
| 26 | - object_description(object_description), | |
| 27 | - tokenizer(*tokenizer.m), | |
| 28 | - decrypter(decrypter), | |
| 29 | - context(context), | |
| 30 | - description(make_description(input.getName(), object_description)), | |
| 31 | - parse_pdf(parse_pdf) | |
| 32 | - { | |
| 33 | - } | |
| 30 | + QPDF* context); | |
| 34 | 31 | |
| 35 | - QPDFParser( | |
| 32 | + // For use by QPDF. Return parsed object and whether it is empty. | |
| 33 | + static std::pair<QPDFObjectHandle, bool> parse( | |
| 36 | 34 | InputSource& input, |
| 37 | 35 | std::string const& object_description, |
| 38 | 36 | qpdf::Tokenizer& tokenizer, |
| 39 | 37 | QPDFObjectHandle::StringDecrypter* decrypter, |
| 40 | - QPDF* context, | |
| 41 | - bool parse_pdf) : | |
| 42 | - input(input), | |
| 43 | - object_description(object_description), | |
| 44 | - tokenizer(tokenizer), | |
| 45 | - decrypter(decrypter), | |
| 46 | - context(context), | |
| 47 | - description(make_description(input.getName(), object_description)), | |
| 48 | - parse_pdf(parse_pdf) | |
| 49 | - { | |
| 50 | - } | |
| 38 | + QPDF& context); | |
| 51 | 39 | |
| 52 | - // Used by parseContentStream_data only | |
| 53 | - QPDFParser( | |
| 54 | - InputSource& input, | |
| 55 | - std::shared_ptr<QPDFObject::Description> sp_description, | |
| 56 | - std::string const& object_description, | |
| 40 | + static std::pair<QPDFObjectHandle, bool> parse( | |
| 41 | + BufferInputSource& input, | |
| 42 | + int stream_id, | |
| 43 | + int obj_id, | |
| 57 | 44 | qpdf::Tokenizer& tokenizer, |
| 58 | - QPDF* context) : | |
| 59 | - input(input), | |
| 60 | - object_description(object_description), | |
| 61 | - tokenizer(tokenizer), | |
| 62 | - decrypter(nullptr), | |
| 63 | - context(context), | |
| 64 | - description(std::move(sp_description)), | |
| 65 | - parse_pdf(true) | |
| 45 | + QPDF& context); | |
| 46 | + | |
| 47 | + static std::shared_ptr<QPDFObject::Description> | |
| 48 | + make_description(std::string const& input_name, std::string const& object_description) | |
| 66 | 49 | { |
| 50 | + using namespace std::literals; | |
| 51 | + return std::make_shared<QPDFObject::Description>( | |
| 52 | + input_name + ", " + object_description + " at offset $PO"); | |
| 67 | 53 | } |
| 68 | 54 | |
| 69 | - // Used by readObjectInStream only | |
| 55 | + private: | |
| 70 | 56 | QPDFParser( |
| 71 | 57 | InputSource& input, |
| 72 | - int stream_id, | |
| 73 | - int obj_id, | |
| 58 | + std::shared_ptr<QPDFObject::Description> sp_description, | |
| 74 | 59 | std::string const& object_description, |
| 75 | 60 | qpdf::Tokenizer& tokenizer, |
| 76 | - QPDF* context) : | |
| 61 | + QPDFObjectHandle::StringDecrypter* decrypter, | |
| 62 | + QPDF* context, | |
| 63 | + bool parse_pdf, | |
| 64 | + int stream_id = 0, | |
| 65 | + int obj_id = 0) : | |
| 77 | 66 | input(input), |
| 78 | 67 | object_description(object_description), |
| 79 | 68 | tokenizer(tokenizer), |
| 80 | - decrypter(nullptr), | |
| 69 | + decrypter(decrypter), | |
| 81 | 70 | context(context), |
| 82 | - description( | |
| 83 | - std::make_shared<QPDFObject::Description>( | |
| 84 | - QPDFObject::ObjStreamDescr(stream_id, obj_id))), | |
| 85 | - parse_pdf(true), | |
| 71 | + description(std::move(sp_description)), | |
| 72 | + parse_pdf(parse_pdf), | |
| 86 | 73 | stream_id(stream_id), |
| 87 | 74 | obj_id(obj_id) |
| 88 | 75 | { |
| 89 | 76 | } |
| 90 | 77 | |
| 91 | - ~QPDFParser() = default; | |
| 92 | - | |
| 93 | - QPDFObjectHandle parse(bool& empty, bool content_stream); | |
| 94 | - | |
| 95 | - static std::shared_ptr<QPDFObject::Description> | |
| 96 | - make_description(std::string const& input_name, std::string const& object_description) | |
| 97 | - { | |
| 98 | - using namespace std::literals; | |
| 99 | - return std::make_shared<QPDFObject::Description>( | |
| 100 | - input_name + ", " + object_description + " at offset $PO"); | |
| 101 | - } | |
| 102 | - | |
| 103 | - private: | |
| 104 | 78 | // Parser state. Note: |
| 105 | 79 | // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) |
| 106 | 80 | enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; |
| ... | ... | @@ -123,6 +97,7 @@ class QPDFParser |
| 123 | 97 | int null_count{0}; |
| 124 | 98 | }; |
| 125 | 99 | |
| 100 | + QPDFObjectHandle parse(bool& empty, bool content_stream); | |
| 126 | 101 | QPDFObjectHandle parseRemainder(bool content_stream); |
| 127 | 102 | void add(std::shared_ptr<QPDFObject>&& obj); |
| 128 | 103 | void addNull(); |
| ... | ... | @@ -146,7 +121,7 @@ class QPDFParser |
| 146 | 121 | QPDFObjectHandle::StringDecrypter* decrypter; |
| 147 | 122 | QPDF* context; |
| 148 | 123 | std::shared_ptr<QPDFObject::Description> description; |
| 149 | - bool parse_pdf; | |
| 124 | + bool parse_pdf{false}; | |
| 150 | 125 | int stream_id{0}; |
| 151 | 126 | int obj_id{0}; |
| 152 | 127 | ... | ... |