QPDFParser.hh
4.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#ifndef QPDFPARSER_HH
#define QPDFPARSER_HH
#include <qpdf/InputSource_private.hh>
#include <qpdf/QPDFObjectHandle_private.hh>
#include <qpdf/QPDFObject_private.hh>
#include <qpdf/QPDFTokenizer_private.hh>
#include <memory>
#include <string>
class QPDFParser
{
public:
static QPDFObjectHandle
parse(InputSource& input, std::string const& object_description, QPDF* context);
static QPDFObjectHandle parse_content(
InputSource& input,
std::shared_ptr<QPDFObject::Description> sp_description,
qpdf::Tokenizer& tokenizer,
QPDF* context);
// For use by deprecated QPDFObjectHandle::parse.
static QPDFObjectHandle parse(
InputSource& input,
std::string const& object_description,
QPDFTokenizer& tokenizer,
bool& empty,
QPDFObjectHandle::StringDecrypter* decrypter,
QPDF* context);
// For use by QPDF. Return parsed object and whether it is empty.
static std::pair<QPDFObjectHandle, bool> parse(
InputSource& input,
std::string const& object_description,
qpdf::Tokenizer& tokenizer,
QPDFObjectHandle::StringDecrypter* decrypter,
QPDF& context,
bool sanity_checks);
static std::pair<QPDFObjectHandle, bool> parse(
qpdf::is::OffsetBuffer& input,
int stream_id,
int obj_id,
qpdf::Tokenizer& tokenizer,
QPDF& context);
static std::shared_ptr<QPDFObject::Description>
make_description(std::string const& input_name, std::string const& object_description)
{
using namespace std::literals;
return std::make_shared<QPDFObject::Description>(
input_name + ", " + object_description + " at offset $PO");
}
private:
QPDFParser(
InputSource& input,
std::shared_ptr<QPDFObject::Description> sp_description,
std::string const& object_description,
qpdf::Tokenizer& tokenizer,
QPDFObjectHandle::StringDecrypter* decrypter,
QPDF* context,
bool parse_pdf,
int stream_id = 0,
int obj_id = 0,
bool sanity_checks = false) :
input(input),
object_description(object_description),
tokenizer(tokenizer),
decrypter(decrypter),
context(context),
description(std::move(sp_description)),
parse_pdf(parse_pdf),
stream_id(stream_id),
obj_id(obj_id),
sanity_checks(sanity_checks)
{
}
// Parser state. Note:
// state <= st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
struct StackFrame
{
StackFrame(InputSource& input, parser_state_e state) :
state(state),
offset(input.tell())
{
}
std::vector<QPDFObjectHandle> olist;
std::map<std::string, QPDFObjectHandle> dict;
parser_state_e state;
std::string key;
qpdf_offset_t offset;
std::string contents_string;
qpdf_offset_t contents_offset{-1};
int null_count{0};
};
QPDFObjectHandle parse(bool& empty, bool content_stream);
QPDFObjectHandle parseRemainder(bool content_stream);
void add(std::shared_ptr<QPDFObject>&& obj);
void addNull();
void addInt(int count);
template <typename T, typename... Args>
void addScalar(Args&&... args);
bool tooManyBadTokens();
void warnDuplicateKey();
void fixMissingKeys();
void warn(qpdf_offset_t offset, std::string const& msg) const;
void warn(std::string const& msg) const;
void warn(QPDFExc const&) const;
template <typename T, typename... Args>
// Create a new scalar object complete with parsed offset and description.
// NB the offset includes any leading whitespace.
QPDFObjectHandle withDescription(Args&&... args);
void setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parsed_offset);
InputSource& input;
std::string const& object_description;
qpdf::Tokenizer& tokenizer;
QPDFObjectHandle::StringDecrypter* decrypter;
QPDF* context;
std::shared_ptr<QPDFObject::Description> description;
bool parse_pdf{false};
int stream_id{0};
int obj_id{0};
bool sanity_checks{false};
std::vector<StackFrame> stack;
StackFrame* frame{nullptr};
// Number of recent bad tokens. This will always be > 0 once a bad token has been encountered as
// it only gets incremented or reset when a bad token is encountered.
int bad_count{0};
// Number of bad tokens (remaining) before giving up.
int max_bad_count{15};
// Number of good tokens since last bad token. Irrelevant if bad_count == 0.
int good_count{0};
// Start offset including any leading whitespace.
qpdf_offset_t start{0};
// Number of successive integer tokens.
int int_count{0};
long long int_buffer[2]{0, 0};
qpdf_offset_t last_offset_buffer[2]{0, 0};
};
#endif // QPDFPARSER_HH