Commit 5eb9a18a863e9c2c462aee80af45ba2ea982585b

Authored by m-holger
Committed by GitHub
2 parents be25fc30 8b0eaaf7

Merge pull request #1399 from m-holger/parse

Refactor calls to QPDFParser::parse
libqpdf/QPDFObjectHandle.cc
@@ -1498,10 +1498,7 @@ QPDFObjectHandle::parse( @@ -1498,10 +1498,7 @@ QPDFObjectHandle::parse(
1498 // the string. 1498 // the string.
1499 Buffer buf(const_cast<std::string&>(object_str)); 1499 Buffer buf(const_cast<std::string&>(object_str));
1500 auto input = BufferInputSource("parsed object", &buf); 1500 auto input = BufferInputSource("parsed object", &buf);
1501 - qpdf::Tokenizer tokenizer;  
1502 - bool empty = false;  
1503 - auto result = QPDFParser(input, object_description, tokenizer, nullptr, context, false)  
1504 - .parse(empty, false); 1501 + auto result = QPDFParser::parse(input, object_description, context);
1505 size_t offset = QIntC::to_size(input.tell()); 1502 size_t offset = QIntC::to_size(input.tell());
1506 while (offset < object_str.length()) { 1503 while (offset < object_str.length()) {
1507 if (!isspace(object_str.at(offset))) { 1504 if (!isspace(object_str.at(offset))) {
@@ -1621,7 +1618,6 @@ QPDFObjectHandle::parseContentStream_data( @@ -1621,7 +1618,6 @@ QPDFObjectHandle::parseContentStream_data(
1621 Tokenizer tokenizer; 1618 Tokenizer tokenizer;
1622 tokenizer.allowEOF(); 1619 tokenizer.allowEOF();
1623 auto sp_description = QPDFParser::make_description(description, "content"); 1620 auto sp_description = QPDFParser::make_description(description, "content");
1624 - bool empty = false;  
1625 while (QIntC::to_size(input.tell()) < stream_length) { 1621 while (QIntC::to_size(input.tell()) < stream_length) {
1626 // Read a token and seek to the beginning. The offset we get from this process is the 1622 // Read a token and seek to the beginning. The offset we get from this process is the
1627 // beginning of the next non-ignorable (space, comment) token. This way, the offset and 1623 // beginning of the next non-ignorable (space, comment) token. This way, the offset and
@@ -1629,8 +1625,7 @@ QPDFObjectHandle::parseContentStream_data( @@ -1629,8 +1625,7 @@ QPDFObjectHandle::parseContentStream_data(
1629 tokenizer.nextToken(input, "content", true); 1625 tokenizer.nextToken(input, "content", true);
1630 qpdf_offset_t offset = input.getLastOffset(); 1626 qpdf_offset_t offset = input.getLastOffset();
1631 input.seek(offset, SEEK_SET); 1627 input.seek(offset, SEEK_SET);
1632 - auto obj =  
1633 - QPDFParser(input, sp_description, "content", tokenizer, context).parse(empty, true); 1628 + auto obj = QPDFParser::parse_content(input, sp_description, tokenizer, context);
1634 if (!obj) { 1629 if (!obj) {
1635 // EOF 1630 // EOF
1636 break; 1631 break;
@@ -1690,8 +1685,7 @@ QPDFObjectHandle::parse( @@ -1690,8 +1685,7 @@ QPDFObjectHandle::parse(
1690 StringDecrypter* decrypter, 1685 StringDecrypter* decrypter,
1691 QPDF* context) 1686 QPDF* context)
1692 { 1687 {
1693 - return QPDFParser(*input, object_description, tokenizer, decrypter, context, false)  
1694 - .parse(empty, false); 1688 + return QPDFParser::parse(*input, object_description, tokenizer, empty, decrypter, context);
1695 } 1689 }
1696 1690
1697 qpdf_offset_t 1691 qpdf_offset_t
libqpdf/QPDFParser.cc
1 #include <qpdf/QPDFParser.hh> 1 #include <qpdf/QPDFParser.hh>
2 2
  3 +#include <qpdf/BufferInputSource.hh>
3 #include <qpdf/QPDF.hh> 4 #include <qpdf/QPDF.hh>
4 #include <qpdf/QPDFObjGen.hh> 5 #include <qpdf/QPDFObjGen.hh>
5 #include <qpdf/QPDFObjectHandle.hh> 6 #include <qpdf/QPDFObjectHandle.hh>
@@ -15,6 +16,96 @@ using namespace std::literals; @@ -15,6 +16,96 @@ using namespace std::literals;
15 using ObjectPtr = std::shared_ptr<QPDFObject>; 16 using ObjectPtr = std::shared_ptr<QPDFObject>;
16 17
17 QPDFObjectHandle 18 QPDFObjectHandle
  19 +QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context)
  20 +{
  21 + qpdf::Tokenizer tokenizer;
  22 + bool empty = false;
  23 + return QPDFParser(
  24 + input,
  25 + make_description(input.getName(), object_description),
  26 + object_description,
  27 + tokenizer,
  28 + nullptr,
  29 + context,
  30 + false)
  31 + .parse(empty, false);
  32 +}
  33 +
  34 +QPDFObjectHandle
  35 +QPDFParser::parse_content(
  36 + InputSource& input,
  37 + std::shared_ptr<QPDFObject::Description> sp_description,
  38 + qpdf::Tokenizer& tokenizer,
  39 + QPDF* context)
  40 +{
  41 + bool empty = false;
  42 + return QPDFParser(
  43 + input, std::move(sp_description), "content", tokenizer, nullptr, context, true)
  44 + .parse(empty, true);
  45 +}
  46 +
  47 +QPDFObjectHandle
  48 +QPDFParser::parse(
  49 + InputSource& input,
  50 + std::string const& object_description,
  51 + QPDFTokenizer& tokenizer,
  52 + bool& empty,
  53 + QPDFObjectHandle::StringDecrypter* decrypter,
  54 + QPDF* context)
  55 +{
  56 + return QPDFParser(
  57 + input,
  58 + make_description(input.getName(), object_description),
  59 + object_description,
  60 + *tokenizer.m,
  61 + decrypter,
  62 + context,
  63 + false)
  64 + .parse(empty, false);
  65 +}
  66 +
  67 +std::pair<QPDFObjectHandle, bool>
  68 +QPDFParser::parse(
  69 + InputSource& input,
  70 + std::string const& object_description,
  71 + qpdf::Tokenizer& tokenizer,
  72 + QPDFObjectHandle::StringDecrypter* decrypter,
  73 + QPDF& context)
  74 +{
  75 + bool empty{false};
  76 + auto result = QPDFParser(
  77 + input,
  78 + make_description(input.getName(), object_description),
  79 + object_description,
  80 + tokenizer,
  81 + decrypter,
  82 + &context,
  83 + true)
  84 + .parse(empty, false);
  85 + return {result, empty};
  86 +}
  87 +
  88 +std::pair<QPDFObjectHandle, bool>
  89 +QPDFParser::parse(
  90 + BufferInputSource& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
  91 +{
  92 + bool empty{false};
  93 + auto result = QPDFParser(
  94 + input,
  95 + std::make_shared<QPDFObject::Description>(
  96 + QPDFObject::ObjStreamDescr(stream_id, obj_id)),
  97 + "",
  98 + tokenizer,
  99 + nullptr,
  100 + &context,
  101 + true,
  102 + stream_id,
  103 + obj_id)
  104 + .parse(empty, false);
  105 + return {result, empty};
  106 +}
  107 +
  108 +QPDFObjectHandle
18 QPDFParser::parse(bool& empty, bool content_stream) 109 QPDFParser::parse(bool& empty, bool content_stream)
19 { 110 {
20 // This method must take care not to resolve any objects. Don't check the type of any object 111 // This method must take care not to resolve any objects. Don't check the type of any object
libqpdf/QPDF_objects.cc
@@ -1154,9 +1154,7 @@ QPDFObjectHandle @@ -1154,9 +1154,7 @@ QPDFObjectHandle
1154 QPDF::readTrailer() 1154 QPDF::readTrailer()
1155 { 1155 {
1156 qpdf_offset_t offset = m->file->tell(); 1156 qpdf_offset_t offset = m->file->tell();
1157 - bool empty = false;  
1158 - auto object =  
1159 - QPDFParser(*m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false); 1157 + auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this);
1160 if (empty) { 1158 if (empty) {
1161 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1159 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1162 // actual PDF files and Adobe Reader appears to ignore them. 1160 // actual PDF files and Adobe Reader appears to ignore them.
@@ -1174,13 +1172,11 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og) @@ -1174,13 +1172,11 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1174 { 1172 {
1175 setLastObjectDescription(description, og); 1173 setLastObjectDescription(description, og);
1176 qpdf_offset_t offset = m->file->tell(); 1174 qpdf_offset_t offset = m->file->tell();
1177 - bool empty = false;  
1178 1175
1179 StringDecrypter decrypter{this, og}; 1176 StringDecrypter decrypter{this, og};
1180 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; 1177 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1181 - auto object =  
1182 - QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true)  
1183 - .parse(empty, false); 1178 + auto [object, empty] =
  1179 + QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this);
1184 if (empty) { 1180 if (empty) {
1185 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1181 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1186 // actual PDF files and Adobe Reader appears to ignore them. 1182 // actual PDF files and Adobe Reader appears to ignore them.
@@ -1294,10 +1290,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset @@ -1294,10 +1290,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset
1294 QPDFObjectHandle 1290 QPDFObjectHandle
1295 QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id) 1291 QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id)
1296 { 1292 {
1297 - bool empty = false;  
1298 - auto object =  
1299 - QPDFParser(input, stream_id, obj_id, m->last_object_description, m->tokenizer, this)  
1300 - .parse(empty, false); 1293 + auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this);
1301 if (empty) { 1294 if (empty) {
1302 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in 1295 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1303 // actual PDF files and Adobe Reader appears to ignore them. 1296 // actual PDF files and Adobe Reader appears to ignore them.
libqpdf/qpdf/QPDFParser.hh
@@ -11,96 +11,70 @@ @@ -11,96 +11,70 @@
11 class QPDFParser 11 class QPDFParser
12 { 12 {
13 public: 13 public:
14 - QPDFParser() = delete; 14 + static QPDFObjectHandle
  15 + parse(InputSource& input, std::string const& object_description, QPDF* context);
15 16
16 - // This constructor is only used by QPDFObjectHandle::parse overload taking a QPDFTokenizer.  
17 - // ABI: remove when removing QPDFObjectHandle::parse overload.  
18 - QPDFParser( 17 + static QPDFObjectHandle parse_content(
  18 + InputSource& input,
  19 + std::shared_ptr<QPDFObject::Description> sp_description,
  20 + qpdf::Tokenizer& tokenizer,
  21 + QPDF* context);
  22 +
  23 + // For use by deprecated QPDFObjectHandle::parse.
  24 + static QPDFObjectHandle parse(
19 InputSource& input, 25 InputSource& input,
20 std::string const& object_description, 26 std::string const& object_description,
21 QPDFTokenizer& tokenizer, 27 QPDFTokenizer& tokenizer,
  28 + bool& empty,
22 QPDFObjectHandle::StringDecrypter* decrypter, 29 QPDFObjectHandle::StringDecrypter* decrypter,
23 - QPDF* context,  
24 - bool parse_pdf) :  
25 - input(input),  
26 - object_description(object_description),  
27 - tokenizer(*tokenizer.m),  
28 - decrypter(decrypter),  
29 - context(context),  
30 - description(make_description(input.getName(), object_description)),  
31 - parse_pdf(parse_pdf)  
32 - {  
33 - } 30 + QPDF* context);
34 31
35 - QPDFParser( 32 + // For use by QPDF. Return parsed object and whether it is empty.
  33 + static std::pair<QPDFObjectHandle, bool> parse(
36 InputSource& input, 34 InputSource& input,
37 std::string const& object_description, 35 std::string const& object_description,
38 qpdf::Tokenizer& tokenizer, 36 qpdf::Tokenizer& tokenizer,
39 QPDFObjectHandle::StringDecrypter* decrypter, 37 QPDFObjectHandle::StringDecrypter* decrypter,
40 - QPDF* context,  
41 - bool parse_pdf) :  
42 - input(input),  
43 - object_description(object_description),  
44 - tokenizer(tokenizer),  
45 - decrypter(decrypter),  
46 - context(context),  
47 - description(make_description(input.getName(), object_description)),  
48 - parse_pdf(parse_pdf)  
49 - {  
50 - } 38 + QPDF& context);
51 39
52 - // Used by parseContentStream_data only  
53 - QPDFParser(  
54 - InputSource& input,  
55 - std::shared_ptr<QPDFObject::Description> sp_description,  
56 - std::string const& object_description, 40 + static std::pair<QPDFObjectHandle, bool> parse(
  41 + BufferInputSource& input,
  42 + int stream_id,
  43 + int obj_id,
57 qpdf::Tokenizer& tokenizer, 44 qpdf::Tokenizer& tokenizer,
58 - QPDF* context) :  
59 - input(input),  
60 - object_description(object_description),  
61 - tokenizer(tokenizer),  
62 - decrypter(nullptr),  
63 - context(context),  
64 - description(std::move(sp_description)),  
65 - parse_pdf(true) 45 + QPDF& context);
  46 +
  47 + static std::shared_ptr<QPDFObject::Description>
  48 + make_description(std::string const& input_name, std::string const& object_description)
66 { 49 {
  50 + using namespace std::literals;
  51 + return std::make_shared<QPDFObject::Description>(
  52 + input_name + ", " + object_description + " at offset $PO");
67 } 53 }
68 54
69 - // Used by readObjectInStream only 55 + private:
70 QPDFParser( 56 QPDFParser(
71 InputSource& input, 57 InputSource& input,
72 - int stream_id,  
73 - int obj_id, 58 + std::shared_ptr<QPDFObject::Description> sp_description,
74 std::string const& object_description, 59 std::string const& object_description,
75 qpdf::Tokenizer& tokenizer, 60 qpdf::Tokenizer& tokenizer,
76 - QPDF* context) : 61 + QPDFObjectHandle::StringDecrypter* decrypter,
  62 + QPDF* context,
  63 + bool parse_pdf,
  64 + int stream_id = 0,
  65 + int obj_id = 0) :
77 input(input), 66 input(input),
78 object_description(object_description), 67 object_description(object_description),
79 tokenizer(tokenizer), 68 tokenizer(tokenizer),
80 - decrypter(nullptr), 69 + decrypter(decrypter),
81 context(context), 70 context(context),
82 - description(  
83 - std::make_shared<QPDFObject::Description>(  
84 - QPDFObject::ObjStreamDescr(stream_id, obj_id))),  
85 - parse_pdf(true), 71 + description(std::move(sp_description)),
  72 + parse_pdf(parse_pdf),
86 stream_id(stream_id), 73 stream_id(stream_id),
87 obj_id(obj_id) 74 obj_id(obj_id)
88 { 75 {
89 } 76 }
90 77
91 - ~QPDFParser() = default;  
92 -  
93 - QPDFObjectHandle parse(bool& empty, bool content_stream);  
94 -  
95 - static std::shared_ptr<QPDFObject::Description>  
96 - make_description(std::string const& input_name, std::string const& object_description)  
97 - {  
98 - using namespace std::literals;  
99 - return std::make_shared<QPDFObject::Description>(  
100 - input_name + ", " + object_description + " at offset $PO");  
101 - }  
102 -  
103 - private:  
104 // Parser state. Note: 78 // Parser state. Note:
105 // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) 79 // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
106 enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; 80 enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
@@ -123,6 +97,7 @@ class QPDFParser @@ -123,6 +97,7 @@ class QPDFParser
123 int null_count{0}; 97 int null_count{0};
124 }; 98 };
125 99
  100 + QPDFObjectHandle parse(bool& empty, bool content_stream);
126 QPDFObjectHandle parseRemainder(bool content_stream); 101 QPDFObjectHandle parseRemainder(bool content_stream);
127 void add(std::shared_ptr<QPDFObject>&& obj); 102 void add(std::shared_ptr<QPDFObject>&& obj);
128 void addNull(); 103 void addNull();
@@ -146,7 +121,7 @@ class QPDFParser @@ -146,7 +121,7 @@ class QPDFParser
146 QPDFObjectHandle::StringDecrypter* decrypter; 121 QPDFObjectHandle::StringDecrypter* decrypter;
147 QPDF* context; 122 QPDF* context;
148 std::shared_ptr<QPDFObject::Description> description; 123 std::shared_ptr<QPDFObject::Description> description;
149 - bool parse_pdf; 124 + bool parse_pdf{false};
150 int stream_id{0}; 125 int stream_id{0};
151 int obj_id{0}; 126 int obj_id{0};
152 127