Commit 5eb9a18a863e9c2c462aee80af45ba2ea982585b

Authored by m-holger
Committed by GitHub
2 parents be25fc30 8b0eaaf7

Merge pull request #1399 from m-holger/parse

Refactor calls to QPDFParser::parse
libqpdf/QPDFObjectHandle.cc
... ... @@ -1498,10 +1498,7 @@ QPDFObjectHandle::parse(
1498 1498 // the string.
1499 1499 Buffer buf(const_cast<std::string&>(object_str));
1500 1500 auto input = BufferInputSource("parsed object", &buf);
1501   - qpdf::Tokenizer tokenizer;
1502   - bool empty = false;
1503   - auto result = QPDFParser(input, object_description, tokenizer, nullptr, context, false)
1504   - .parse(empty, false);
  1501 + auto result = QPDFParser::parse(input, object_description, context);
1505 1502 size_t offset = QIntC::to_size(input.tell());
1506 1503 while (offset < object_str.length()) {
1507 1504 if (!isspace(object_str.at(offset))) {
... ... @@ -1621,7 +1618,6 @@ QPDFObjectHandle::parseContentStream_data(
1621 1618 Tokenizer tokenizer;
1622 1619 tokenizer.allowEOF();
1623 1620 auto sp_description = QPDFParser::make_description(description, "content");
1624   - bool empty = false;
1625 1621 while (QIntC::to_size(input.tell()) < stream_length) {
1626 1622 // Read a token and seek to the beginning. The offset we get from this process is the
1627 1623 // beginning of the next non-ignorable (space, comment) token. This way, the offset and
... ... @@ -1629,8 +1625,7 @@ QPDFObjectHandle::parseContentStream_data(
1629 1625 tokenizer.nextToken(input, "content", true);
1630 1626 qpdf_offset_t offset = input.getLastOffset();
1631 1627 input.seek(offset, SEEK_SET);
1632   - auto obj =
1633   - QPDFParser(input, sp_description, "content", tokenizer, context).parse(empty, true);
  1628 + auto obj = QPDFParser::parse_content(input, sp_description, tokenizer, context);
1634 1629 if (!obj) {
1635 1630 // EOF
1636 1631 break;
... ... @@ -1690,8 +1685,7 @@ QPDFObjectHandle::parse(
1690 1685 StringDecrypter* decrypter,
1691 1686 QPDF* context)
1692 1687 {
1693   - return QPDFParser(*input, object_description, tokenizer, decrypter, context, false)
1694   - .parse(empty, false);
  1688 + return QPDFParser::parse(*input, object_description, tokenizer, empty, decrypter, context);
1695 1689 }
1696 1690  
1697 1691 qpdf_offset_t
... ...
libqpdf/QPDFParser.cc
1 1 #include <qpdf/QPDFParser.hh>
2 2  
  3 +#include <qpdf/BufferInputSource.hh>
3 4 #include <qpdf/QPDF.hh>
4 5 #include <qpdf/QPDFObjGen.hh>
5 6 #include <qpdf/QPDFObjectHandle.hh>
... ... @@ -15,6 +16,96 @@ using namespace std::literals;
15 16 using ObjectPtr = std::shared_ptr<QPDFObject>;
16 17  
17 18 QPDFObjectHandle
  19 +QPDFParser::parse(InputSource& input, std::string const& object_description, QPDF* context)
  20 +{
  21 + qpdf::Tokenizer tokenizer;
  22 + bool empty = false;
  23 + return QPDFParser(
  24 + input,
  25 + make_description(input.getName(), object_description),
  26 + object_description,
  27 + tokenizer,
  28 + nullptr,
  29 + context,
  30 + false)
  31 + .parse(empty, false);
  32 +}
  33 +
  34 +QPDFObjectHandle
  35 +QPDFParser::parse_content(
  36 + InputSource& input,
  37 + std::shared_ptr<QPDFObject::Description> sp_description,
  38 + qpdf::Tokenizer& tokenizer,
  39 + QPDF* context)
  40 +{
  41 + bool empty = false;
  42 + return QPDFParser(
  43 + input, std::move(sp_description), "content", tokenizer, nullptr, context, true)
  44 + .parse(empty, true);
  45 +}
  46 +
  47 +QPDFObjectHandle
  48 +QPDFParser::parse(
  49 + InputSource& input,
  50 + std::string const& object_description,
  51 + QPDFTokenizer& tokenizer,
  52 + bool& empty,
  53 + QPDFObjectHandle::StringDecrypter* decrypter,
  54 + QPDF* context)
  55 +{
  56 + return QPDFParser(
  57 + input,
  58 + make_description(input.getName(), object_description),
  59 + object_description,
  60 + *tokenizer.m,
  61 + decrypter,
  62 + context,
  63 + false)
  64 + .parse(empty, false);
  65 +}
  66 +
  67 +std::pair<QPDFObjectHandle, bool>
  68 +QPDFParser::parse(
  69 + InputSource& input,
  70 + std::string const& object_description,
  71 + qpdf::Tokenizer& tokenizer,
  72 + QPDFObjectHandle::StringDecrypter* decrypter,
  73 + QPDF& context)
  74 +{
  75 + bool empty{false};
  76 + auto result = QPDFParser(
  77 + input,
  78 + make_description(input.getName(), object_description),
  79 + object_description,
  80 + tokenizer,
  81 + decrypter,
  82 + &context,
  83 + true)
  84 + .parse(empty, false);
  85 + return {result, empty};
  86 +}
  87 +
  88 +std::pair<QPDFObjectHandle, bool>
  89 +QPDFParser::parse(
  90 + BufferInputSource& input, int stream_id, int obj_id, qpdf::Tokenizer& tokenizer, QPDF& context)
  91 +{
  92 + bool empty{false};
  93 + auto result = QPDFParser(
  94 + input,
  95 + std::make_shared<QPDFObject::Description>(
  96 + QPDFObject::ObjStreamDescr(stream_id, obj_id)),
  97 + "",
  98 + tokenizer,
  99 + nullptr,
  100 + &context,
  101 + true,
  102 + stream_id,
  103 + obj_id)
  104 + .parse(empty, false);
  105 + return {result, empty};
  106 +}
  107 +
  108 +QPDFObjectHandle
18 109 QPDFParser::parse(bool& empty, bool content_stream)
19 110 {
20 111 // This method must take care not to resolve any objects. Don't check the type of any object
... ...
libqpdf/QPDF_objects.cc
... ... @@ -1154,9 +1154,7 @@ QPDFObjectHandle
1154 1154 QPDF::readTrailer()
1155 1155 {
1156 1156 qpdf_offset_t offset = m->file->tell();
1157   - bool empty = false;
1158   - auto object =
1159   - QPDFParser(*m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false);
  1157 + auto [object, empty] = QPDFParser::parse(*m->file, "trailer", m->tokenizer, nullptr, *this);
1160 1158 if (empty) {
1161 1159 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1162 1160 // actual PDF files and Adobe Reader appears to ignore them.
... ... @@ -1174,13 +1172,11 @@ QPDF::readObject(std::string const&amp; description, QPDFObjGen og)
1174 1172 {
1175 1173 setLastObjectDescription(description, og);
1176 1174 qpdf_offset_t offset = m->file->tell();
1177   - bool empty = false;
1178 1175  
1179 1176 StringDecrypter decrypter{this, og};
1180 1177 StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
1181   - auto object =
1182   - QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true)
1183   - .parse(empty, false);
  1178 + auto [object, empty] =
  1179 + QPDFParser::parse(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, *this);
1184 1180 if (empty) {
1185 1181 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1186 1182 // actual PDF files and Adobe Reader appears to ignore them.
... ... @@ -1294,10 +1290,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle&amp; object, QPDFObjGen og, qpdf_offset
1294 1290 QPDFObjectHandle
1295 1291 QPDF::readObjectInStream(BufferInputSource& input, int stream_id, int obj_id)
1296 1292 {
1297   - bool empty = false;
1298   - auto object =
1299   - QPDFParser(input, stream_id, obj_id, m->last_object_description, m->tokenizer, this)
1300   - .parse(empty, false);
  1293 + auto [object, empty] = QPDFParser::parse(input, stream_id, obj_id, m->tokenizer, *this);
1301 1294 if (empty) {
1302 1295 // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
1303 1296 // actual PDF files and Adobe Reader appears to ignore them.
... ...
libqpdf/qpdf/QPDFParser.hh
... ... @@ -11,96 +11,70 @@
11 11 class QPDFParser
12 12 {
13 13 public:
14   - QPDFParser() = delete;
  14 + static QPDFObjectHandle
  15 + parse(InputSource& input, std::string const& object_description, QPDF* context);
15 16  
16   - // This constructor is only used by QPDFObjectHandle::parse overload taking a QPDFTokenizer.
17   - // ABI: remove when removing QPDFObjectHandle::parse overload.
18   - QPDFParser(
  17 + static QPDFObjectHandle parse_content(
  18 + InputSource& input,
  19 + std::shared_ptr<QPDFObject::Description> sp_description,
  20 + qpdf::Tokenizer& tokenizer,
  21 + QPDF* context);
  22 +
  23 + // For use by deprecated QPDFObjectHandle::parse.
  24 + static QPDFObjectHandle parse(
19 25 InputSource& input,
20 26 std::string const& object_description,
21 27 QPDFTokenizer& tokenizer,
  28 + bool& empty,
22 29 QPDFObjectHandle::StringDecrypter* decrypter,
23   - QPDF* context,
24   - bool parse_pdf) :
25   - input(input),
26   - object_description(object_description),
27   - tokenizer(*tokenizer.m),
28   - decrypter(decrypter),
29   - context(context),
30   - description(make_description(input.getName(), object_description)),
31   - parse_pdf(parse_pdf)
32   - {
33   - }
  30 + QPDF* context);
34 31  
35   - QPDFParser(
  32 + // For use by QPDF. Return parsed object and whether it is empty.
  33 + static std::pair<QPDFObjectHandle, bool> parse(
36 34 InputSource& input,
37 35 std::string const& object_description,
38 36 qpdf::Tokenizer& tokenizer,
39 37 QPDFObjectHandle::StringDecrypter* decrypter,
40   - QPDF* context,
41   - bool parse_pdf) :
42   - input(input),
43   - object_description(object_description),
44   - tokenizer(tokenizer),
45   - decrypter(decrypter),
46   - context(context),
47   - description(make_description(input.getName(), object_description)),
48   - parse_pdf(parse_pdf)
49   - {
50   - }
  38 + QPDF& context);
51 39  
52   - // Used by parseContentStream_data only
53   - QPDFParser(
54   - InputSource& input,
55   - std::shared_ptr<QPDFObject::Description> sp_description,
56   - std::string const& object_description,
  40 + static std::pair<QPDFObjectHandle, bool> parse(
  41 + BufferInputSource& input,
  42 + int stream_id,
  43 + int obj_id,
57 44 qpdf::Tokenizer& tokenizer,
58   - QPDF* context) :
59   - input(input),
60   - object_description(object_description),
61   - tokenizer(tokenizer),
62   - decrypter(nullptr),
63   - context(context),
64   - description(std::move(sp_description)),
65   - parse_pdf(true)
  45 + QPDF& context);
  46 +
  47 + static std::shared_ptr<QPDFObject::Description>
  48 + make_description(std::string const& input_name, std::string const& object_description)
66 49 {
  50 + using namespace std::literals;
  51 + return std::make_shared<QPDFObject::Description>(
  52 + input_name + ", " + object_description + " at offset $PO");
67 53 }
68 54  
69   - // Used by readObjectInStream only
  55 + private:
70 56 QPDFParser(
71 57 InputSource& input,
72   - int stream_id,
73   - int obj_id,
  58 + std::shared_ptr<QPDFObject::Description> sp_description,
74 59 std::string const& object_description,
75 60 qpdf::Tokenizer& tokenizer,
76   - QPDF* context) :
  61 + QPDFObjectHandle::StringDecrypter* decrypter,
  62 + QPDF* context,
  63 + bool parse_pdf,
  64 + int stream_id = 0,
  65 + int obj_id = 0) :
77 66 input(input),
78 67 object_description(object_description),
79 68 tokenizer(tokenizer),
80   - decrypter(nullptr),
  69 + decrypter(decrypter),
81 70 context(context),
82   - description(
83   - std::make_shared<QPDFObject::Description>(
84   - QPDFObject::ObjStreamDescr(stream_id, obj_id))),
85   - parse_pdf(true),
  71 + description(std::move(sp_description)),
  72 + parse_pdf(parse_pdf),
86 73 stream_id(stream_id),
87 74 obj_id(obj_id)
88 75 {
89 76 }
90 77  
91   - ~QPDFParser() = default;
92   -
93   - QPDFObjectHandle parse(bool& empty, bool content_stream);
94   -
95   - static std::shared_ptr<QPDFObject::Description>
96   - make_description(std::string const& input_name, std::string const& object_description)
97   - {
98   - using namespace std::literals;
99   - return std::make_shared<QPDFObject::Description>(
100   - input_name + ", " + object_description + " at offset $PO");
101   - }
102   -
103   - private:
104 78 // Parser state. Note:
105 79 // state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
106 80 enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
... ... @@ -123,6 +97,7 @@ class QPDFParser
123 97 int null_count{0};
124 98 };
125 99  
  100 + QPDFObjectHandle parse(bool& empty, bool content_stream);
126 101 QPDFObjectHandle parseRemainder(bool content_stream);
127 102 void add(std::shared_ptr<QPDFObject>&& obj);
128 103 void addNull();
... ... @@ -146,7 +121,7 @@ class QPDFParser
146 121 QPDFObjectHandle::StringDecrypter* decrypter;
147 122 QPDF* context;
148 123 std::shared_ptr<QPDFObject::Description> description;
149   - bool parse_pdf;
  124 + bool parse_pdf{false};
150 125 int stream_id{0};
151 126 int obj_id{0};
152 127  
... ...