QPDFTokenizer_private.hh
5.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#ifndef QPDFTOKENIZER_PRIVATE_HH
#define QPDFTOKENIZER_PRIVATE_HH
#include <qpdf/QPDFTokenizer.hh>
namespace qpdf
{
class Tokenizer
{
public:
Tokenizer();
Tokenizer(Tokenizer const&) = delete;
Tokenizer& operator=(Tokenizer const&) = delete;
// Methods to support QPDFTokenizer. See QPDFTokenizer.hh for detail. Some of these are used
// by Tokenizer internally but are not accessed directly by the rest of qpdf.
void allowEOF();
void includeIgnorable();
void presentCharacter(char ch);
void presentEOF();
bool betweenTokens();
// If a token is available, return true and initialize token with the token, unread_char
// with whether or not we have to unread the last character, and if unread_char, ch with the
// character to unread.
bool getToken(QPDFTokenizer::Token& token, bool& unread_char, char& ch);
// Read a token from an input source. Context describes the context in which the token is
// being read and is used in the exception thrown if there is an error. After a token is
// read, the position of the input source returned by input->tell() points to just after the
// token, and the input source's "last offset" as returned by input->getLastOffset() points
// to the beginning of the token.
QPDFTokenizer::Token readToken(
InputSource& input,
std::string const& context,
bool allow_bad = false,
size_t max_len = 0);
// Calling this method puts the tokenizer in a state for reading inline images. You should
// call this method after reading the character following the ID operator. In that state, it
// will return all data up to BUT NOT INCLUDING the next EI token. After you call this
// method, the next call to readToken (or the token created next time getToken returns true)
// will either be tt_inline_image or tt_bad. This is the only way readToken returns a
// tt_inline_image token.
void expectInlineImage(InputSource& input);
// Read a token from an input source. Context describes the context in which the token is
// being read and is used in the exception thrown if there is an error. After a token is
// read, the position of the input source returned by input->tell() points to just after the
// token, and the input source's "last offset" as returned by input->getLastOffset() points
// to the beginning of the token. Returns false if the token is bad or if scanning produced
// an error message for any reason.
bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0);
// The following methods are only valid after nextToken has been called and until another
// QPDFTokenizer method is called. They allow the results of calling nextToken to be
// accessed without creating a Token, thus avoiding copying information that may not be
// needed.
inline QPDFTokenizer::token_type_e
getType() const
{
return this->type;
}
inline std::string const&
getValue() const
{
return (this->type == QPDFTokenizer::tt_name || this->type == QPDFTokenizer::tt_string)
? this->val
: this->raw_val;
}
inline std::string const&
getRawValue() const
{
return this->raw_val;
}
inline std::string const&
getErrorMessage() const
{
return this->error_message;
}
private:
bool isSpace(char);
bool isDelimiter(char);
void findEI(InputSource& input);
enum state_e {
st_top,
st_in_hexstring,
st_in_string,
st_in_hexstring_2nd,
st_name,
st_literal,
st_in_space,
st_in_comment,
st_string_escape,
st_char_code,
st_string_after_cr,
st_lt,
st_gt,
st_inline_image,
st_sign,
st_number,
st_real,
st_decimal,
st_name_hex1,
st_name_hex2,
st_before_token,
st_token_ready
};
void handleCharacter(char);
void inBeforeToken(char);
void inTop(char);
void inSpace(char);
void inComment(char);
void inString(char);
void inName(char);
void inLt(char);
void inGt(char);
void inStringAfterCR(char);
void inStringEscape(char);
void inLiteral(char);
void inCharCode(char);
void inHexstring(char);
void inHexstring2nd(char);
void inInlineImage(char);
void inTokenReady(char);
void inNameHex1(char);
void inNameHex2(char);
void inSign(char);
void inDecimal(char);
void inNumber(char);
void inReal(char);
void reset();
// Lexer state
state_e state;
bool allow_eof{false};
bool include_ignorable{false};
// Current token accumulation
QPDFTokenizer::token_type_e type;
std::string val;
std::string raw_val;
std::string error_message;
bool before_token;
bool in_token;
char char_to_unread;
size_t inline_image_bytes;
bool bad;
// State for strings
int string_depth;
int char_code;
char hex_char;
int digit_count;
};
} // namespace qpdf
#endif // QPDFTOKENIZER_PRIVATE_HH