Commit 39bc2eb4d9c41ef9707db224ffacc1ec008fb441
1 parent
a4b7907e
For QPDFTokenizer add private implementation class qpdf::Tokenizer
Showing
4 changed files
with
350 additions
and
165 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -29,6 +29,11 @@ | @@ -29,6 +29,11 @@ | ||
| 29 | #include <memory> | 29 | #include <memory> |
| 30 | #include <string> | 30 | #include <string> |
| 31 | 31 | ||
| 32 | +namespace qpdf | ||
| 33 | +{ | ||
| 34 | + class Tokenizer; | ||
| 35 | +} // namespace qpdf | ||
| 36 | + | ||
| 32 | class QPDFTokenizer | 37 | class QPDFTokenizer |
| 33 | { | 38 | { |
| 34 | public: | 39 | public: |
| @@ -129,6 +134,9 @@ class QPDFTokenizer | @@ -129,6 +134,9 @@ class QPDFTokenizer | ||
| 129 | QPDF_DLL | 134 | QPDF_DLL |
| 130 | QPDFTokenizer(); | 135 | QPDFTokenizer(); |
| 131 | 136 | ||
| 137 | + QPDF_DLL | ||
| 138 | + ~QPDFTokenizer(); | ||
| 139 | + | ||
| 132 | // If called, treat EOF as a separate token type instead of an error. This was introduced in | 140 | // If called, treat EOF as a separate token type instead of an error. This was introduced in |
| 133 | // QPDF 4.1 to facilitate tokenizing content streams. | 141 | // QPDF 4.1 to facilitate tokenizing content streams. |
| 134 | QPDF_DLL | 142 | QPDF_DLL |
| @@ -218,103 +226,7 @@ class QPDFTokenizer | @@ -218,103 +226,7 @@ class QPDFTokenizer | ||
| 218 | QPDFTokenizer(QPDFTokenizer const&) = delete; | 226 | QPDFTokenizer(QPDFTokenizer const&) = delete; |
| 219 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; | 227 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; |
| 220 | 228 | ||
| 221 | - bool isSpace(char); | ||
| 222 | - bool isDelimiter(char); | ||
| 223 | - void findEI(InputSource& input); | ||
| 224 | - | ||
| 225 | - enum state_e { | ||
| 226 | - st_top, | ||
| 227 | - st_in_hexstring, | ||
| 228 | - st_in_string, | ||
| 229 | - st_in_hexstring_2nd, | ||
| 230 | - st_name, | ||
| 231 | - st_literal, | ||
| 232 | - st_in_space, | ||
| 233 | - st_in_comment, | ||
| 234 | - st_string_escape, | ||
| 235 | - st_char_code, | ||
| 236 | - st_string_after_cr, | ||
| 237 | - st_lt, | ||
| 238 | - st_gt, | ||
| 239 | - st_inline_image, | ||
| 240 | - st_sign, | ||
| 241 | - st_number, | ||
| 242 | - st_real, | ||
| 243 | - st_decimal, | ||
| 244 | - st_name_hex1, | ||
| 245 | - st_name_hex2, | ||
| 246 | - st_before_token, | ||
| 247 | - st_token_ready | ||
| 248 | - }; | ||
| 249 | - | ||
| 250 | - void handleCharacter(char); | ||
| 251 | - void inBeforeToken(char); | ||
| 252 | - void inTop(char); | ||
| 253 | - void inSpace(char); | ||
| 254 | - void inComment(char); | ||
| 255 | - void inString(char); | ||
| 256 | - void inName(char); | ||
| 257 | - void inLt(char); | ||
| 258 | - void inGt(char); | ||
| 259 | - void inStringAfterCR(char); | ||
| 260 | - void inStringEscape(char); | ||
| 261 | - void inLiteral(char); | ||
| 262 | - void inCharCode(char); | ||
| 263 | - void inHexstring(char); | ||
| 264 | - void inHexstring2nd(char); | ||
| 265 | - void inInlineImage(char); | ||
| 266 | - void inTokenReady(char); | ||
| 267 | - void inNameHex1(char); | ||
| 268 | - void inNameHex2(char); | ||
| 269 | - void inSign(char); | ||
| 270 | - void inDecimal(char); | ||
| 271 | - void inNumber(char); | ||
| 272 | - void inReal(char); | ||
| 273 | - void reset(); | ||
| 274 | - | ||
| 275 | - // Lexer state | ||
| 276 | - state_e state; | ||
| 277 | - | ||
| 278 | - bool allow_eof; | ||
| 279 | - bool include_ignorable; | ||
| 280 | - | ||
| 281 | - // Current token accumulation | ||
| 282 | - token_type_e type; | ||
| 283 | - std::string val; | ||
| 284 | - std::string raw_val; | ||
| 285 | - std::string error_message; | ||
| 286 | - bool before_token; | ||
| 287 | - bool in_token; | ||
| 288 | - char char_to_unread; | ||
| 289 | - size_t inline_image_bytes; | ||
| 290 | - bool bad; | ||
| 291 | - | ||
| 292 | - // State for strings | ||
| 293 | - int string_depth; | ||
| 294 | - int char_code; | ||
| 295 | - char hex_char; | ||
| 296 | - int digit_count; | 229 | + std::unique_ptr<qpdf::Tokenizer> m; |
| 297 | }; | 230 | }; |
| 298 | 231 | ||
| 299 | -inline QPDFTokenizer::token_type_e | ||
| 300 | -QPDFTokenizer::getType() const noexcept | ||
| 301 | -{ | ||
| 302 | - return this->type; | ||
| 303 | -} | ||
| 304 | -inline std::string const& | ||
| 305 | -QPDFTokenizer::getValue() const noexcept | ||
| 306 | -{ | ||
| 307 | - return (this->type == tt_name || this->type == tt_string) ? this->val : this->raw_val; | ||
| 308 | -} | ||
| 309 | -inline std::string const& | ||
| 310 | -QPDFTokenizer::getRawValue() const noexcept | ||
| 311 | -{ | ||
| 312 | - return this->raw_val; | ||
| 313 | -} | ||
| 314 | -inline std::string const& | ||
| 315 | -QPDFTokenizer::getErrorMessage() const noexcept | ||
| 316 | -{ | ||
| 317 | - return this->error_message; | ||
| 318 | -} | ||
| 319 | - | ||
| 320 | #endif // QPDFTOKENIZER_HH | 232 | #endif // QPDFTOKENIZER_HH |
libqpdf/QPDFParser.cc
| @@ -4,6 +4,7 @@ | @@ -4,6 +4,7 @@ | ||
| 4 | #include <qpdf/QPDFObjGen.hh> | 4 | #include <qpdf/QPDFObjGen.hh> |
| 5 | #include <qpdf/QPDFObjectHandle.hh> | 5 | #include <qpdf/QPDFObjectHandle.hh> |
| 6 | #include <qpdf/QPDFObject_private.hh> | 6 | #include <qpdf/QPDFObject_private.hh> |
| 7 | +#include <qpdf/QPDFTokenizer_private.hh> | ||
| 7 | #include <qpdf/QTC.hh> | 8 | #include <qpdf/QTC.hh> |
| 8 | #include <qpdf/QUtil.hh> | 9 | #include <qpdf/QUtil.hh> |
| 9 | 10 |
libqpdf/QPDFTokenizer.cc
| 1 | -#include <qpdf/QPDFTokenizer.hh> | 1 | +#include <qpdf/QPDFTokenizer_private.hh> |
| 2 | 2 | ||
| 3 | // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of | 3 | // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of |
| 4 | // including it in case it may accidentally be used. | 4 | // including it in case it may accidentally be used. |
| @@ -16,6 +16,9 @@ | @@ -16,6 +16,9 @@ | ||
| 16 | 16 | ||
| 17 | using namespace qpdf; | 17 | using namespace qpdf; |
| 18 | 18 | ||
| 19 | +using Token = QPDFTokenizer::Token; | ||
| 20 | +using tt = QPDFTokenizer::token_type_e; | ||
| 21 | + | ||
| 19 | static inline bool | 22 | static inline bool |
| 20 | is_delimiter(char ch) | 23 | is_delimiter(char ch) |
| 21 | { | 24 | { |
| @@ -77,10 +80,10 @@ QPDFWordTokenFinder::check() | @@ -77,10 +80,10 @@ QPDFWordTokenFinder::check() | ||
| 77 | } | 80 | } |
| 78 | 81 | ||
| 79 | void | 82 | void |
| 80 | -QPDFTokenizer::reset() | 83 | +Tokenizer::reset() |
| 81 | { | 84 | { |
| 82 | state = st_before_token; | 85 | state = st_before_token; |
| 83 | - type = tt_bad; | 86 | + type = tt::tt_bad; |
| 84 | val.clear(); | 87 | val.clear(); |
| 85 | raw_val.clear(); | 88 | raw_val.clear(); |
| 86 | error_message = ""; | 89 | error_message = ""; |
| @@ -105,8 +108,13 @@ QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : | @@ -105,8 +108,13 @@ QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : | ||
| 105 | } | 108 | } |
| 106 | 109 | ||
| 107 | QPDFTokenizer::QPDFTokenizer() : | 110 | QPDFTokenizer::QPDFTokenizer() : |
| 108 | - allow_eof(false), | ||
| 109 | - include_ignorable(false) | 111 | + m(std::make_unique<qpdf::Tokenizer>()) |
| 112 | +{ | ||
| 113 | +} | ||
| 114 | + | ||
| 115 | +QPDFTokenizer::~QPDFTokenizer() = default; | ||
| 116 | + | ||
| 117 | +Tokenizer::Tokenizer() | ||
| 110 | { | 118 | { |
| 111 | reset(); | 119 | reset(); |
| 112 | } | 120 | } |
| @@ -114,23 +122,35 @@ QPDFTokenizer::QPDFTokenizer() : | @@ -114,23 +122,35 @@ QPDFTokenizer::QPDFTokenizer() : | ||
| 114 | void | 122 | void |
| 115 | QPDFTokenizer::allowEOF() | 123 | QPDFTokenizer::allowEOF() |
| 116 | { | 124 | { |
| 117 | - this->allow_eof = true; | 125 | + m->allowEOF(); |
| 126 | +} | ||
| 127 | + | ||
| 128 | +void | ||
| 129 | +Tokenizer::allowEOF() | ||
| 130 | +{ | ||
| 131 | + allow_eof = true; | ||
| 118 | } | 132 | } |
| 119 | 133 | ||
| 120 | void | 134 | void |
| 121 | QPDFTokenizer::includeIgnorable() | 135 | QPDFTokenizer::includeIgnorable() |
| 122 | { | 136 | { |
| 123 | - this->include_ignorable = true; | 137 | + m->includeIgnorable(); |
| 138 | +} | ||
| 139 | + | ||
| 140 | +void | ||
| 141 | +Tokenizer::includeIgnorable() | ||
| 142 | +{ | ||
| 143 | + include_ignorable = true; | ||
| 124 | } | 144 | } |
| 125 | 145 | ||
| 126 | bool | 146 | bool |
| 127 | -QPDFTokenizer::isSpace(char ch) | 147 | +Tokenizer::isSpace(char ch) |
| 128 | { | 148 | { |
| 129 | return (ch == '\0' || util::is_space(ch)); | 149 | return (ch == '\0' || util::is_space(ch)); |
| 130 | } | 150 | } |
| 131 | 151 | ||
| 132 | bool | 152 | bool |
| 133 | -QPDFTokenizer::isDelimiter(char ch) | 153 | +Tokenizer::isDelimiter(char ch) |
| 134 | { | 154 | { |
| 135 | return is_delimiter(ch); | 155 | return is_delimiter(ch); |
| 136 | } | 156 | } |
| @@ -138,6 +158,12 @@ QPDFTokenizer::isDelimiter(char ch) | @@ -138,6 +158,12 @@ QPDFTokenizer::isDelimiter(char ch) | ||
| 138 | void | 158 | void |
| 139 | QPDFTokenizer::presentCharacter(char ch) | 159 | QPDFTokenizer::presentCharacter(char ch) |
| 140 | { | 160 | { |
| 161 | + m->presentCharacter(ch); | ||
| 162 | +} | ||
| 163 | + | ||
| 164 | +void | ||
| 165 | +Tokenizer::presentCharacter(char ch) | ||
| 166 | +{ | ||
| 141 | handleCharacter(ch); | 167 | handleCharacter(ch); |
| 142 | 168 | ||
| 143 | if (this->in_token) { | 169 | if (this->in_token) { |
| @@ -146,7 +172,7 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -146,7 +172,7 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 146 | } | 172 | } |
| 147 | 173 | ||
| 148 | void | 174 | void |
| 149 | -QPDFTokenizer::handleCharacter(char ch) | 175 | +Tokenizer::handleCharacter(char ch) |
| 150 | { | 176 | { |
| 151 | // In some cases, functions called below may call a second handler. This happens whenever you | 177 | // In some cases, functions called below may call a second handler. This happens whenever you |
| 152 | // have to use a character from the next token to detect the end of the current token. | 178 | // have to use a character from the next token to detect the end of the current token. |
| @@ -246,14 +272,14 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -246,14 +272,14 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 246 | } | 272 | } |
| 247 | 273 | ||
| 248 | void | 274 | void |
| 249 | -QPDFTokenizer::inTokenReady(char ch) | 275 | +Tokenizer::inTokenReady(char ch) |
| 250 | { | 276 | { |
| 251 | throw std::logic_error( | 277 | throw std::logic_error( |
| 252 | "INTERNAL ERROR: QPDF tokenizer presented character while token is waiting"); | 278 | "INTERNAL ERROR: QPDF tokenizer presented character while token is waiting"); |
| 253 | } | 279 | } |
| 254 | 280 | ||
| 255 | void | 281 | void |
| 256 | -QPDFTokenizer::inBeforeToken(char ch) | 282 | +Tokenizer::inBeforeToken(char ch) |
| 257 | { | 283 | { |
| 258 | // Note: we specifically do not use ctype here. It is locale-dependent. | 284 | // Note: we specifically do not use ctype here. It is locale-dependent. |
| 259 | if (isSpace(ch)) { | 285 | if (isSpace(ch)) { |
| @@ -274,7 +300,7 @@ QPDFTokenizer::inBeforeToken(char ch) | @@ -274,7 +300,7 @@ QPDFTokenizer::inBeforeToken(char ch) | ||
| 274 | } | 300 | } |
| 275 | 301 | ||
| 276 | void | 302 | void |
| 277 | -QPDFTokenizer::inTop(char ch) | 303 | +Tokenizer::inTop(char ch) |
| 278 | { | 304 | { |
| 279 | switch (ch) { | 305 | switch (ch) { |
| 280 | case '(': | 306 | case '(': |
| @@ -291,29 +317,29 @@ QPDFTokenizer::inTop(char ch) | @@ -291,29 +317,29 @@ QPDFTokenizer::inTop(char ch) | ||
| 291 | return; | 317 | return; |
| 292 | 318 | ||
| 293 | case (')'): | 319 | case (')'): |
| 294 | - this->type = tt_bad; | 320 | + this->type = tt::tt_bad; |
| 295 | QTC::TC("qpdf", "QPDFTokenizer bad )"); | 321 | QTC::TC("qpdf", "QPDFTokenizer bad )"); |
| 296 | this->error_message = "unexpected )"; | 322 | this->error_message = "unexpected )"; |
| 297 | this->state = st_token_ready; | 323 | this->state = st_token_ready; |
| 298 | return; | 324 | return; |
| 299 | 325 | ||
| 300 | case '[': | 326 | case '[': |
| 301 | - this->type = tt_array_open; | 327 | + this->type = tt::tt_array_open; |
| 302 | this->state = st_token_ready; | 328 | this->state = st_token_ready; |
| 303 | return; | 329 | return; |
| 304 | 330 | ||
| 305 | case ']': | 331 | case ']': |
| 306 | - this->type = tt_array_close; | 332 | + this->type = tt::tt_array_close; |
| 307 | this->state = st_token_ready; | 333 | this->state = st_token_ready; |
| 308 | return; | 334 | return; |
| 309 | 335 | ||
| 310 | case '{': | 336 | case '{': |
| 311 | - this->type = tt_brace_open; | 337 | + this->type = tt::tt_brace_open; |
| 312 | this->state = st_token_ready; | 338 | this->state = st_token_ready; |
| 313 | return; | 339 | return; |
| 314 | 340 | ||
| 315 | case '}': | 341 | case '}': |
| 316 | - this->type = tt_brace_close; | 342 | + this->type = tt::tt_brace_close; |
| 317 | this->state = st_token_ready; | 343 | this->state = st_token_ready; |
| 318 | return; | 344 | return; |
| 319 | 345 | ||
| @@ -351,11 +377,11 @@ QPDFTokenizer::inTop(char ch) | @@ -351,11 +377,11 @@ QPDFTokenizer::inTop(char ch) | ||
| 351 | } | 377 | } |
| 352 | 378 | ||
| 353 | void | 379 | void |
| 354 | -QPDFTokenizer::inSpace(char ch) | 380 | +Tokenizer::inSpace(char ch) |
| 355 | { | 381 | { |
| 356 | // We only enter this state if include_ignorable is true. | 382 | // We only enter this state if include_ignorable is true. |
| 357 | if (!isSpace(ch)) { | 383 | if (!isSpace(ch)) { |
| 358 | - this->type = tt_space; | 384 | + this->type = tt::tt_space; |
| 359 | this->in_token = false; | 385 | this->in_token = false; |
| 360 | this->char_to_unread = ch; | 386 | this->char_to_unread = ch; |
| 361 | this->state = st_token_ready; | 387 | this->state = st_token_ready; |
| @@ -363,11 +389,11 @@ QPDFTokenizer::inSpace(char ch) | @@ -363,11 +389,11 @@ QPDFTokenizer::inSpace(char ch) | ||
| 363 | } | 389 | } |
| 364 | 390 | ||
| 365 | void | 391 | void |
| 366 | -QPDFTokenizer::inComment(char ch) | 392 | +Tokenizer::inComment(char ch) |
| 367 | { | 393 | { |
| 368 | if ((ch == '\r') || (ch == '\n')) { | 394 | if ((ch == '\r') || (ch == '\n')) { |
| 369 | if (this->include_ignorable) { | 395 | if (this->include_ignorable) { |
| 370 | - this->type = tt_comment; | 396 | + this->type = tt::tt_comment; |
| 371 | this->in_token = false; | 397 | this->in_token = false; |
| 372 | this->char_to_unread = ch; | 398 | this->char_to_unread = ch; |
| 373 | this->state = st_token_ready; | 399 | this->state = st_token_ready; |
| @@ -378,7 +404,7 @@ QPDFTokenizer::inComment(char ch) | @@ -378,7 +404,7 @@ QPDFTokenizer::inComment(char ch) | ||
| 378 | } | 404 | } |
| 379 | 405 | ||
| 380 | void | 406 | void |
| 381 | -QPDFTokenizer::inString(char ch) | 407 | +Tokenizer::inString(char ch) |
| 382 | { | 408 | { |
| 383 | switch (ch) { | 409 | switch (ch) { |
| 384 | case '\\': | 410 | case '\\': |
| @@ -392,7 +418,7 @@ QPDFTokenizer::inString(char ch) | @@ -392,7 +418,7 @@ QPDFTokenizer::inString(char ch) | ||
| 392 | 418 | ||
| 393 | case ')': | 419 | case ')': |
| 394 | if (--this->string_depth == 0) { | 420 | if (--this->string_depth == 0) { |
| 395 | - this->type = tt_string; | 421 | + this->type = tt::tt_string; |
| 396 | this->state = st_token_ready; | 422 | this->state = st_token_ready; |
| 397 | return; | 423 | return; |
| 398 | } | 424 | } |
| @@ -417,7 +443,7 @@ QPDFTokenizer::inString(char ch) | @@ -417,7 +443,7 @@ QPDFTokenizer::inString(char ch) | ||
| 417 | } | 443 | } |
| 418 | 444 | ||
| 419 | void | 445 | void |
| 420 | -QPDFTokenizer::inName(char ch) | 446 | +Tokenizer::inName(char ch) |
| 421 | { | 447 | { |
| 422 | if (isDelimiter(ch)) { | 448 | if (isDelimiter(ch)) { |
| 423 | // A C-locale whitespace character or delimiter terminates token. It is important to unread | 449 | // A C-locale whitespace character or delimiter terminates token. It is important to unread |
| @@ -426,7 +452,7 @@ QPDFTokenizer::inName(char ch) | @@ -426,7 +452,7 @@ QPDFTokenizer::inName(char ch) | ||
| 426 | // though not on any files in the test suite as of this | 452 | // though not on any files in the test suite as of this |
| 427 | // writing. | 453 | // writing. |
| 428 | 454 | ||
| 429 | - this->type = this->bad ? tt_bad : tt_name; | 455 | + this->type = this->bad ? tt::tt_bad : tt::tt_name; |
| 430 | this->in_token = false; | 456 | this->in_token = false; |
| 431 | this->char_to_unread = ch; | 457 | this->char_to_unread = ch; |
| 432 | this->state = st_token_ready; | 458 | this->state = st_token_ready; |
| @@ -439,7 +465,7 @@ QPDFTokenizer::inName(char ch) | @@ -439,7 +465,7 @@ QPDFTokenizer::inName(char ch) | ||
| 439 | } | 465 | } |
| 440 | 466 | ||
| 441 | void | 467 | void |
| 442 | -QPDFTokenizer::inNameHex1(char ch) | 468 | +Tokenizer::inNameHex1(char ch) |
| 443 | { | 469 | { |
| 444 | this->hex_char = ch; | 470 | this->hex_char = ch; |
| 445 | 471 | ||
| @@ -457,7 +483,7 @@ QPDFTokenizer::inNameHex1(char ch) | @@ -457,7 +483,7 @@ QPDFTokenizer::inNameHex1(char ch) | ||
| 457 | } | 483 | } |
| 458 | 484 | ||
| 459 | void | 485 | void |
| 460 | -QPDFTokenizer::inNameHex2(char ch) | 486 | +Tokenizer::inNameHex2(char ch) |
| 461 | { | 487 | { |
| 462 | if (char hval = util::hex_decode_char(ch); hval < '\20') { | 488 | if (char hval = util::hex_decode_char(ch); hval < '\20') { |
| 463 | this->char_code |= int(hval); | 489 | this->char_code |= int(hval); |
| @@ -484,7 +510,7 @@ QPDFTokenizer::inNameHex2(char ch) | @@ -484,7 +510,7 @@ QPDFTokenizer::inNameHex2(char ch) | ||
| 484 | } | 510 | } |
| 485 | 511 | ||
| 486 | void | 512 | void |
| 487 | -QPDFTokenizer::inSign(char ch) | 513 | +Tokenizer::inSign(char ch) |
| 488 | { | 514 | { |
| 489 | if (util::is_digit(ch)) { | 515 | if (util::is_digit(ch)) { |
| 490 | this->state = st_number; | 516 | this->state = st_number; |
| @@ -497,7 +523,7 @@ QPDFTokenizer::inSign(char ch) | @@ -497,7 +523,7 @@ QPDFTokenizer::inSign(char ch) | ||
| 497 | } | 523 | } |
| 498 | 524 | ||
| 499 | void | 525 | void |
| 500 | -QPDFTokenizer::inDecimal(char ch) | 526 | +Tokenizer::inDecimal(char ch) |
| 501 | { | 527 | { |
| 502 | if (util::is_digit(ch)) { | 528 | if (util::is_digit(ch)) { |
| 503 | this->state = st_real; | 529 | this->state = st_real; |
| @@ -508,13 +534,13 @@ QPDFTokenizer::inDecimal(char ch) | @@ -508,13 +534,13 @@ QPDFTokenizer::inDecimal(char ch) | ||
| 508 | } | 534 | } |
| 509 | 535 | ||
| 510 | void | 536 | void |
| 511 | -QPDFTokenizer::inNumber(char ch) | 537 | +Tokenizer::inNumber(char ch) |
| 512 | { | 538 | { |
| 513 | if (util::is_digit(ch)) { | 539 | if (util::is_digit(ch)) { |
| 514 | } else if (ch == '.') { | 540 | } else if (ch == '.') { |
| 515 | this->state = st_real; | 541 | this->state = st_real; |
| 516 | } else if (isDelimiter(ch)) { | 542 | } else if (isDelimiter(ch)) { |
| 517 | - this->type = tt_integer; | 543 | + this->type = tt::tt_integer; |
| 518 | this->state = st_token_ready; | 544 | this->state = st_token_ready; |
| 519 | this->in_token = false; | 545 | this->in_token = false; |
| 520 | this->char_to_unread = ch; | 546 | this->char_to_unread = ch; |
| @@ -524,11 +550,11 @@ QPDFTokenizer::inNumber(char ch) | @@ -524,11 +550,11 @@ QPDFTokenizer::inNumber(char ch) | ||
| 524 | } | 550 | } |
| 525 | 551 | ||
| 526 | void | 552 | void |
| 527 | -QPDFTokenizer::inReal(char ch) | 553 | +Tokenizer::inReal(char ch) |
| 528 | { | 554 | { |
| 529 | if (util::is_digit(ch)) { | 555 | if (util::is_digit(ch)) { |
| 530 | } else if (isDelimiter(ch)) { | 556 | } else if (isDelimiter(ch)) { |
| 531 | - this->type = tt_real; | 557 | + this->type = tt::tt_real; |
| 532 | this->state = st_token_ready; | 558 | this->state = st_token_ready; |
| 533 | this->in_token = false; | 559 | this->in_token = false; |
| 534 | this->char_to_unread = ch; | 560 | this->char_to_unread = ch; |
| @@ -537,7 +563,7 @@ QPDFTokenizer::inReal(char ch) | @@ -537,7 +563,7 @@ QPDFTokenizer::inReal(char ch) | ||
| 537 | } | 563 | } |
| 538 | } | 564 | } |
| 539 | void | 565 | void |
| 540 | -QPDFTokenizer::inStringEscape(char ch) | 566 | +Tokenizer::inStringEscape(char ch) |
| 541 | { | 567 | { |
| 542 | this->state = st_in_string; | 568 | this->state = st_in_string; |
| 543 | switch (ch) { | 569 | switch (ch) { |
| @@ -590,7 +616,7 @@ QPDFTokenizer::inStringEscape(char ch) | @@ -590,7 +616,7 @@ QPDFTokenizer::inStringEscape(char ch) | ||
| 590 | } | 616 | } |
| 591 | 617 | ||
| 592 | void | 618 | void |
| 593 | -QPDFTokenizer::inStringAfterCR(char ch) | 619 | +Tokenizer::inStringAfterCR(char ch) |
| 594 | { | 620 | { |
| 595 | this->state = st_in_string; | 621 | this->state = st_in_string; |
| 596 | if (ch != '\n') { | 622 | if (ch != '\n') { |
| @@ -599,10 +625,10 @@ QPDFTokenizer::inStringAfterCR(char ch) | @@ -599,10 +625,10 @@ QPDFTokenizer::inStringAfterCR(char ch) | ||
| 599 | } | 625 | } |
| 600 | 626 | ||
| 601 | void | 627 | void |
| 602 | -QPDFTokenizer::inLt(char ch) | 628 | +Tokenizer::inLt(char ch) |
| 603 | { | 629 | { |
| 604 | if (ch == '<') { | 630 | if (ch == '<') { |
| 605 | - this->type = tt_dict_open; | 631 | + this->type = tt::tt_dict_open; |
| 606 | this->state = st_token_ready; | 632 | this->state = st_token_ready; |
| 607 | return; | 633 | return; |
| 608 | } | 634 | } |
| @@ -612,13 +638,13 @@ QPDFTokenizer::inLt(char ch) | @@ -612,13 +638,13 @@ QPDFTokenizer::inLt(char ch) | ||
| 612 | } | 638 | } |
| 613 | 639 | ||
| 614 | void | 640 | void |
| 615 | -QPDFTokenizer::inGt(char ch) | 641 | +Tokenizer::inGt(char ch) |
| 616 | { | 642 | { |
| 617 | if (ch == '>') { | 643 | if (ch == '>') { |
| 618 | - this->type = tt_dict_close; | 644 | + this->type = tt::tt_dict_close; |
| 619 | this->state = st_token_ready; | 645 | this->state = st_token_ready; |
| 620 | } else { | 646 | } else { |
| 621 | - this->type = tt_bad; | 647 | + this->type = tt::tt_bad; |
| 622 | QTC::TC("qpdf", "QPDFTokenizer bad >"); | 648 | QTC::TC("qpdf", "QPDFTokenizer bad >"); |
| 623 | this->error_message = "unexpected >"; | 649 | this->error_message = "unexpected >"; |
| 624 | this->in_token = false; | 650 | this->in_token = false; |
| @@ -628,7 +654,7 @@ QPDFTokenizer::inGt(char ch) | @@ -628,7 +654,7 @@ QPDFTokenizer::inGt(char ch) | ||
| 628 | } | 654 | } |
| 629 | 655 | ||
| 630 | void | 656 | void |
| 631 | -QPDFTokenizer::inLiteral(char ch) | 657 | +Tokenizer::inLiteral(char ch) |
| 632 | { | 658 | { |
| 633 | if (isDelimiter(ch)) { | 659 | if (isDelimiter(ch)) { |
| 634 | // A C-locale whitespace character or delimiter terminates token. It is important to unread | 660 | // A C-locale whitespace character or delimiter terminates token. It is important to unread |
| @@ -640,27 +666,27 @@ QPDFTokenizer::inLiteral(char ch) | @@ -640,27 +666,27 @@ QPDFTokenizer::inLiteral(char ch) | ||
| 640 | this->char_to_unread = ch; | 666 | this->char_to_unread = ch; |
| 641 | this->state = st_token_ready; | 667 | this->state = st_token_ready; |
| 642 | this->type = (this->raw_val == "true") || (this->raw_val == "false") | 668 | this->type = (this->raw_val == "true") || (this->raw_val == "false") |
| 643 | - ? tt_bool | ||
| 644 | - : (this->raw_val == "null" ? tt_null : tt_word); | 669 | + ? tt::tt_bool |
| 670 | + : (this->raw_val == "null" ? tt::tt_null : tt::tt_word); | ||
| 645 | } | 671 | } |
| 646 | } | 672 | } |
| 647 | 673 | ||
| 648 | void | 674 | void |
| 649 | -QPDFTokenizer::inHexstring(char ch) | 675 | +Tokenizer::inHexstring(char ch) |
| 650 | { | 676 | { |
| 651 | if (char hval = util::hex_decode_char(ch); hval < '\20') { | 677 | if (char hval = util::hex_decode_char(ch); hval < '\20') { |
| 652 | this->char_code = int(hval) << 4; | 678 | this->char_code = int(hval) << 4; |
| 653 | this->state = st_in_hexstring_2nd; | 679 | this->state = st_in_hexstring_2nd; |
| 654 | 680 | ||
| 655 | } else if (ch == '>') { | 681 | } else if (ch == '>') { |
| 656 | - this->type = tt_string; | 682 | + this->type = tt::tt_string; |
| 657 | this->state = st_token_ready; | 683 | this->state = st_token_ready; |
| 658 | 684 | ||
| 659 | } else if (isSpace(ch)) { | 685 | } else if (isSpace(ch)) { |
| 660 | // ignore | 686 | // ignore |
| 661 | 687 | ||
| 662 | } else { | 688 | } else { |
| 663 | - this->type = tt_bad; | 689 | + this->type = tt::tt_bad; |
| 664 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); | 690 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); |
| 665 | this->error_message = std::string("invalid character (") + ch + ") in hexstring"; | 691 | this->error_message = std::string("invalid character (") + ch + ") in hexstring"; |
| 666 | this->state = st_token_ready; | 692 | this->state = st_token_ready; |
| @@ -668,7 +694,7 @@ QPDFTokenizer::inHexstring(char ch) | @@ -668,7 +694,7 @@ QPDFTokenizer::inHexstring(char ch) | ||
| 668 | } | 694 | } |
| 669 | 695 | ||
| 670 | void | 696 | void |
| 671 | -QPDFTokenizer::inHexstring2nd(char ch) | 697 | +Tokenizer::inHexstring2nd(char ch) |
| 672 | { | 698 | { |
| 673 | if (char hval = util::hex_decode_char(ch); hval < '\20') { | 699 | if (char hval = util::hex_decode_char(ch); hval < '\20') { |
| 674 | this->val += char(this->char_code) | hval; | 700 | this->val += char(this->char_code) | hval; |
| @@ -677,14 +703,14 @@ QPDFTokenizer::inHexstring2nd(char ch) | @@ -677,14 +703,14 @@ QPDFTokenizer::inHexstring2nd(char ch) | ||
| 677 | } else if (ch == '>') { | 703 | } else if (ch == '>') { |
| 678 | // PDF spec says odd hexstrings have implicit trailing 0. | 704 | // PDF spec says odd hexstrings have implicit trailing 0. |
| 679 | this->val += char(this->char_code); | 705 | this->val += char(this->char_code); |
| 680 | - this->type = tt_string; | 706 | + this->type = tt::tt_string; |
| 681 | this->state = st_token_ready; | 707 | this->state = st_token_ready; |
| 682 | 708 | ||
| 683 | } else if (isSpace(ch)) { | 709 | } else if (isSpace(ch)) { |
| 684 | // ignore | 710 | // ignore |
| 685 | 711 | ||
| 686 | } else { | 712 | } else { |
| 687 | - this->type = tt_bad; | 713 | + this->type = tt::tt_bad; |
| 688 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring 2nd character"); | 714 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring 2nd character"); |
| 689 | this->error_message = std::string("invalid character (") + ch + ") in hexstring"; | 715 | this->error_message = std::string("invalid character (") + ch + ") in hexstring"; |
| 690 | this->state = st_token_ready; | 716 | this->state = st_token_ready; |
| @@ -692,7 +718,7 @@ QPDFTokenizer::inHexstring2nd(char ch) | @@ -692,7 +718,7 @@ QPDFTokenizer::inHexstring2nd(char ch) | ||
| 692 | } | 718 | } |
| 693 | 719 | ||
| 694 | void | 720 | void |
| 695 | -QPDFTokenizer::inCharCode(char ch) | 721 | +Tokenizer::inCharCode(char ch) |
| 696 | { | 722 | { |
| 697 | bool handled = false; | 723 | bool handled = false; |
| 698 | if (('0' <= ch) && (ch <= '7')) { | 724 | if (('0' <= ch) && (ch <= '7')) { |
| @@ -712,11 +738,11 @@ QPDFTokenizer::inCharCode(char ch) | @@ -712,11 +738,11 @@ QPDFTokenizer::inCharCode(char ch) | ||
| 712 | } | 738 | } |
| 713 | 739 | ||
| 714 | void | 740 | void |
| 715 | -QPDFTokenizer::inInlineImage(char ch) | 741 | +Tokenizer::inInlineImage(char ch) |
| 716 | { | 742 | { |
| 717 | if ((this->raw_val.length() + 1) == this->inline_image_bytes) { | 743 | if ((this->raw_val.length() + 1) == this->inline_image_bytes) { |
| 718 | QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); | 744 | QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); |
| 719 | - this->type = tt_inline_image; | 745 | + this->type = tt::tt_inline_image; |
| 720 | this->inline_image_bytes = 0; | 746 | this->inline_image_bytes = 0; |
| 721 | this->state = st_token_ready; | 747 | this->state = st_token_ready; |
| 722 | } | 748 | } |
| @@ -725,6 +751,12 @@ QPDFTokenizer::inInlineImage(char ch) | @@ -725,6 +751,12 @@ QPDFTokenizer::inInlineImage(char ch) | ||
| 725 | void | 751 | void |
| 726 | QPDFTokenizer::presentEOF() | 752 | QPDFTokenizer::presentEOF() |
| 727 | { | 753 | { |
| 754 | + m->presentEOF(); | ||
| 755 | +} | ||
| 756 | + | ||
| 757 | +void | ||
| 758 | +Tokenizer::presentEOF() | ||
| 759 | +{ | ||
| 728 | switch (this->state) { | 760 | switch (this->state) { |
| 729 | case st_name: | 761 | case st_name: |
| 730 | case st_name_hex1: | 762 | case st_name_hex1: |
| @@ -742,15 +774,15 @@ QPDFTokenizer::presentEOF() | @@ -742,15 +774,15 @@ QPDFTokenizer::presentEOF() | ||
| 742 | 774 | ||
| 743 | case st_top: | 775 | case st_top: |
| 744 | case st_before_token: | 776 | case st_before_token: |
| 745 | - this->type = tt_eof; | 777 | + this->type = tt::tt_eof; |
| 746 | break; | 778 | break; |
| 747 | 779 | ||
| 748 | case st_in_space: | 780 | case st_in_space: |
| 749 | - this->type = this->include_ignorable ? tt_space : tt_eof; | 781 | + this->type = this->include_ignorable ? tt::tt_space : tt::tt_eof; |
| 750 | break; | 782 | break; |
| 751 | 783 | ||
| 752 | case st_in_comment: | 784 | case st_in_comment: |
| 753 | - this->type = this->include_ignorable ? tt_comment : tt_bad; | 785 | + this->type = this->include_ignorable ? tt::tt_comment : tt::tt_bad; |
| 754 | break; | 786 | break; |
| 755 | 787 | ||
| 756 | case st_token_ready: | 788 | case st_token_ready: |
| @@ -758,7 +790,7 @@ QPDFTokenizer::presentEOF() | @@ -758,7 +790,7 @@ QPDFTokenizer::presentEOF() | ||
| 758 | 790 | ||
| 759 | default: | 791 | default: |
| 760 | QTC::TC("qpdf", "QPDFTokenizer EOF reading token"); | 792 | QTC::TC("qpdf", "QPDFTokenizer EOF reading token"); |
| 761 | - this->type = tt_bad; | 793 | + this->type = tt::tt_bad; |
| 762 | this->error_message = "EOF while reading token"; | 794 | this->error_message = "EOF while reading token"; |
| 763 | } | 795 | } |
| 764 | this->state = st_token_ready; | 796 | this->state = st_token_ready; |
| @@ -767,12 +799,24 @@ QPDFTokenizer::presentEOF() | @@ -767,12 +799,24 @@ QPDFTokenizer::presentEOF() | ||
| 767 | void | 799 | void |
| 768 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) | 800 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) |
| 769 | { | 801 | { |
| 802 | + m->expectInlineImage(input); | ||
| 803 | +} | ||
| 804 | + | ||
| 805 | +void | ||
| 806 | +Tokenizer::expectInlineImage(std::shared_ptr<InputSource> input) | ||
| 807 | +{ | ||
| 770 | expectInlineImage(*input); | 808 | expectInlineImage(*input); |
| 771 | } | 809 | } |
| 772 | 810 | ||
| 773 | void | 811 | void |
| 774 | QPDFTokenizer::expectInlineImage(InputSource& input) | 812 | QPDFTokenizer::expectInlineImage(InputSource& input) |
| 775 | { | 813 | { |
| 814 | + m->expectInlineImage(input); | ||
| 815 | +} | ||
| 816 | + | ||
| 817 | +void | ||
| 818 | +Tokenizer::expectInlineImage(InputSource& input) | ||
| 819 | +{ | ||
| 776 | if (this->state == st_token_ready) { | 820 | if (this->state == st_token_ready) { |
| 777 | reset(); | 821 | reset(); |
| 778 | } else if (this->state != st_before_token) { | 822 | } else if (this->state != st_before_token) { |
| @@ -786,7 +830,7 @@ QPDFTokenizer::expectInlineImage(InputSource& input) | @@ -786,7 +830,7 @@ QPDFTokenizer::expectInlineImage(InputSource& input) | ||
| 786 | } | 830 | } |
| 787 | 831 | ||
| 788 | void | 832 | void |
| 789 | -QPDFTokenizer::findEI(InputSource& input) | 833 | +Tokenizer::findEI(InputSource& input) |
| 790 | { | 834 | { |
| 791 | qpdf_offset_t last_offset = input.getLastOffset(); | 835 | qpdf_offset_t last_offset = input.getLastOffset(); |
| 792 | qpdf_offset_t pos = input.tell(); | 836 | qpdf_offset_t pos = input.tell(); |
| @@ -816,10 +860,10 @@ QPDFTokenizer::findEI(InputSource& input) | @@ -816,10 +860,10 @@ QPDFTokenizer::findEI(InputSource& input) | ||
| 816 | // be pretty sure we've found the actual EI. | 860 | // be pretty sure we've found the actual EI. |
| 817 | for (int i = 0; i < 10; ++i) { | 861 | for (int i = 0; i < 10; ++i) { |
| 818 | QPDFTokenizer::Token t = check.readToken(input, "checker", true); | 862 | QPDFTokenizer::Token t = check.readToken(input, "checker", true); |
| 819 | - token_type_e type = t.getType(); | ||
| 820 | - if (type == tt_eof) { | 863 | + QPDFTokenizer::token_type_e type = t.getType(); |
| 864 | + if (type == tt::tt_eof) { | ||
| 821 | okay = true; | 865 | okay = true; |
| 822 | - } else if (type == tt_bad) { | 866 | + } else if (type == tt::tt_bad) { |
| 823 | found_bad = true; | 867 | found_bad = true; |
| 824 | } else if (t.isWord()) { | 868 | } else if (t.isWord()) { |
| 825 | // The qpdf tokenizer lumps alphabetic and otherwise uncategorized characters into | 869 | // The qpdf tokenizer lumps alphabetic and otherwise uncategorized characters into |
| @@ -870,11 +914,17 @@ QPDFTokenizer::findEI(InputSource& input) | @@ -870,11 +914,17 @@ QPDFTokenizer::findEI(InputSource& input) | ||
| 870 | bool | 914 | bool |
| 871 | QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) | 915 | QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) |
| 872 | { | 916 | { |
| 917 | + return m->getToken(token, unread_char, ch); | ||
| 918 | +} | ||
| 919 | + | ||
| 920 | +bool | ||
| 921 | +Tokenizer::getToken(Token& token, bool& unread_char, char& ch) | ||
| 922 | +{ | ||
| 873 | bool ready = (this->state == st_token_ready); | 923 | bool ready = (this->state == st_token_ready); |
| 874 | unread_char = !this->in_token && !this->before_token; | 924 | unread_char = !this->in_token && !this->before_token; |
| 875 | ch = this->char_to_unread; | 925 | ch = this->char_to_unread; |
| 876 | if (ready) { | 926 | if (ready) { |
| 877 | - token = (!(this->type == tt_name || this->type == tt_string)) | 927 | + token = (!(this->type == tt::tt_name || this->type == tt::tt_string)) |
| 878 | ? Token(this->type, this->raw_val, this->raw_val, this->error_message) | 928 | ? Token(this->type, this->raw_val, this->raw_val, this->error_message) |
| 879 | : Token(this->type, this->val, this->raw_val, this->error_message); | 929 | : Token(this->type, this->val, this->raw_val, this->error_message); |
| 880 | 930 | ||
| @@ -886,13 +936,19 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) | @@ -886,13 +936,19 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) | ||
| 886 | bool | 936 | bool |
| 887 | QPDFTokenizer::betweenTokens() | 937 | QPDFTokenizer::betweenTokens() |
| 888 | { | 938 | { |
| 889 | - return this->before_token; | 939 | + return m->before_token; |
| 890 | } | 940 | } |
| 891 | 941 | ||
| 892 | QPDFTokenizer::Token | 942 | QPDFTokenizer::Token |
| 893 | QPDFTokenizer::readToken( | 943 | QPDFTokenizer::readToken( |
| 894 | InputSource& input, std::string const& context, bool allow_bad, size_t max_len) | 944 | InputSource& input, std::string const& context, bool allow_bad, size_t max_len) |
| 895 | { | 945 | { |
| 946 | + return m->readToken(input, context, allow_bad, max_len); | ||
| 947 | +} | ||
| 948 | + | ||
| 949 | +QPDFTokenizer::Token | ||
| 950 | +Tokenizer::readToken(InputSource& input, std::string const& context, bool allow_bad, size_t max_len) | ||
| 951 | +{ | ||
| 896 | nextToken(input, context, max_len); | 952 | nextToken(input, context, max_len); |
| 897 | 953 | ||
| 898 | Token token; | 954 | Token token; |
| @@ -900,7 +956,7 @@ QPDFTokenizer::readToken( | @@ -900,7 +956,7 @@ QPDFTokenizer::readToken( | ||
| 900 | char char_to_unread; | 956 | char char_to_unread; |
| 901 | getToken(token, unread_char, char_to_unread); | 957 | getToken(token, unread_char, char_to_unread); |
| 902 | 958 | ||
| 903 | - if (token.getType() == tt_bad) { | 959 | + if (token.getType() == tt::tt_bad) { |
| 904 | if (allow_bad) { | 960 | if (allow_bad) { |
| 905 | QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); | 961 | QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); |
| 906 | } else { | 962 | } else { |
| @@ -919,12 +975,25 @@ QPDFTokenizer::Token | @@ -919,12 +975,25 @@ QPDFTokenizer::Token | ||
| 919 | QPDFTokenizer::readToken( | 975 | QPDFTokenizer::readToken( |
| 920 | std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len) | 976 | std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len) |
| 921 | { | 977 | { |
| 978 | + return m->readToken(*input, context, allow_bad, max_len); | ||
| 979 | +} | ||
| 980 | + | ||
| 981 | +QPDFTokenizer::Token | ||
| 982 | +Tokenizer::readToken( | ||
| 983 | + std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len) | ||
| 984 | +{ | ||
| 922 | return readToken(*input, context, allow_bad, max_len); | 985 | return readToken(*input, context, allow_bad, max_len); |
| 923 | } | 986 | } |
| 924 | 987 | ||
| 925 | bool | 988 | bool |
| 926 | QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len) | 989 | QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len) |
| 927 | { | 990 | { |
| 991 | + return m->nextToken(input, context, max_len); | ||
| 992 | +} | ||
| 993 | + | ||
| 994 | +bool | ||
| 995 | +Tokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len) | ||
| 996 | +{ | ||
| 928 | if (this->state != st_inline_image) { | 997 | if (this->state != st_inline_image) { |
| 929 | reset(); | 998 | reset(); |
| 930 | } | 999 | } |
| @@ -935,10 +1004,10 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t | @@ -935,10 +1004,10 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t | ||
| 935 | if (!input.fastRead(ch)) { | 1004 | if (!input.fastRead(ch)) { |
| 936 | presentEOF(); | 1005 | presentEOF(); |
| 937 | 1006 | ||
| 938 | - if ((this->type == tt_eof) && (!this->allow_eof)) { | 1007 | + if ((this->type == tt::tt_eof) && (!this->allow_eof)) { |
| 939 | // Nothing in the qpdf library calls readToken without allowEOF anymore, so this | 1008 | // Nothing in the qpdf library calls readToken without allowEOF anymore, so this |
| 940 | // case is not exercised. | 1009 | // case is not exercised. |
| 941 | - this->type = tt_bad; | 1010 | + this->type = tt::tt_bad; |
| 942 | this->error_message = "unexpected EOF"; | 1011 | this->error_message = "unexpected EOF"; |
| 943 | offset = input.getLastOffset(); | 1012 | offset = input.getLastOffset(); |
| 944 | } | 1013 | } |
| @@ -953,7 +1022,7 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t | @@ -953,7 +1022,7 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t | ||
| 953 | if (max_len && (this->raw_val.length() >= max_len) && (this->state != st_token_ready)) { | 1022 | if (max_len && (this->raw_val.length() >= max_len) && (this->state != st_token_ready)) { |
| 954 | // terminate this token now | 1023 | // terminate this token now |
| 955 | QTC::TC("qpdf", "QPDFTokenizer block long token"); | 1024 | QTC::TC("qpdf", "QPDFTokenizer block long token"); |
| 956 | - this->type = tt_bad; | 1025 | + this->type = tt::tt_bad; |
| 957 | this->state = st_token_ready; | 1026 | this->state = st_token_ready; |
| 958 | this->error_message = "exceeded allowable length while reading token"; | 1027 | this->error_message = "exceeded allowable length while reading token"; |
| 959 | } | 1028 | } |
| @@ -962,7 +1031,7 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t | @@ -962,7 +1031,7 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t | ||
| 962 | 1031 | ||
| 963 | input.fastUnread(!this->in_token && !this->before_token); | 1032 | input.fastUnread(!this->in_token && !this->before_token); |
| 964 | 1033 | ||
| 965 | - if (this->type != tt_eof) { | 1034 | + if (this->type != tt::tt_eof) { |
| 966 | input.setLastOffset(offset); | 1035 | input.setLastOffset(offset); |
| 967 | } | 1036 | } |
| 968 | 1037 |
libqpdf/qpdf/QPDFTokenizer_private.hh
0 โ 100644
| 1 | +#ifndef QPDFTOKENIZER_PRIVATE_HH | ||
| 2 | +#define QPDFTOKENIZER_PRIVATE_HH | ||
| 3 | + | ||
| 4 | +#include <qpdf/QPDFTokenizer.hh> | ||
| 5 | + | ||
| 6 | +namespace qpdf | ||
| 7 | +{ | ||
| 8 | + | ||
| 9 | + class Tokenizer | ||
| 10 | + { | ||
| 11 | + friend class ::QPDFTokenizer; | ||
| 12 | + | ||
| 13 | + public: | ||
| 14 | + Tokenizer(); | ||
| 15 | + | ||
| 16 | + // Methods to support QPDFTokenizer. See QPDFTokenizer.hh for detail. Some of these are used | ||
| 17 | + // by Tokenizer internally but are not accessed directly by the rest of qpdf. | ||
| 18 | + | ||
| 19 | + void allowEOF(); | ||
| 20 | + void includeIgnorable(); | ||
| 21 | + void presentCharacter(char ch); | ||
| 22 | + void presentEOF(); | ||
| 23 | + | ||
| 24 | + // If a token is available, return true and initialize token with the token, unread_char | ||
| 25 | + // with whether or not we have to unread the last character, and if unread_char, ch with the | ||
| 26 | + // character to unread. | ||
| 27 | + bool getToken(QPDFTokenizer::Token& token, bool& unread_char, char& ch); | ||
| 28 | + | ||
| 29 | + // Pull mode: | ||
| 30 | + | ||
| 31 | + // Read a token from an input source. Context describes the context in which the token is | ||
| 32 | + // being read and is used in the exception thrown if there is an error. After a token is | ||
| 33 | + // read, the position of the input source returned by input->tell() points to just after the | ||
| 34 | + // token, and the input source's "last offset" as returned by input->getLastOffset() points | ||
| 35 | + // to the beginning of the token. | ||
| 36 | + QPDFTokenizer::Token readToken( | ||
| 37 | + InputSource& input, | ||
| 38 | + std::string const& context, | ||
| 39 | + bool allow_bad = false, | ||
| 40 | + size_t max_len = 0); | ||
| 41 | + | ||
| 42 | + QPDFTokenizer::Token readToken( | ||
| 43 | + std::shared_ptr<InputSource> input, | ||
| 44 | + std::string const& context, | ||
| 45 | + bool allow_bad = false, | ||
| 46 | + size_t max_len = 0); | ||
| 47 | + | ||
| 48 | + // Calling this method puts the tokenizer in a state for reading inline images. You should | ||
| 49 | + // call this method after reading the character following the ID operator. In that state, it | ||
| 50 | + // will return all data up to BUT NOT INCLUDING the next EI token. After you call this | ||
| 51 | + // method, the next call to readToken (or the token created next time getToken returns true) | ||
| 52 | + // will either be tt_inline_image or tt_bad. This is the only way readToken returns a | ||
| 53 | + // tt_inline_image token. | ||
| 54 | + void expectInlineImage(std::shared_ptr<InputSource> input); | ||
| 55 | + | ||
| 56 | + void expectInlineImage(InputSource& input); | ||
| 57 | + | ||
| 58 | + private: | ||
| 59 | + // Read a token from an input source. Context describes the context in which the token is | ||
| 60 | + // being read and is used in the exception thrown if there is an error. After a token is | ||
| 61 | + // read, the position of the input source returned by input->tell() points to just after the | ||
| 62 | + // token, and the input source's "last offset" as returned by input->getLastOffset() points | ||
| 63 | + // to the beginning of the token. Returns false if the token is bad or if scanning produced | ||
| 64 | + // an error message for any reason. | ||
| 65 | + bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0); | ||
| 66 | + | ||
| 67 | + // The following methods are only valid after nextToken has been called and until another | ||
| 68 | + // QPDFTokenizer method is called. They allow the results of calling nextToken to be | ||
| 69 | + // accessed without creating a Token, thus avoiding copying information that may not be | ||
| 70 | + // needed. | ||
| 71 | + inline QPDFTokenizer::token_type_e getType() const; | ||
| 72 | + inline std::string const& getValue() const; | ||
| 73 | + inline std::string const& getRawValue() const; | ||
| 74 | + inline std::string const& getErrorMessage() const; | ||
| 75 | + | ||
| 76 | + Tokenizer(Tokenizer const&) = delete; | ||
| 77 | + Tokenizer& operator=(Tokenizer const&) = delete; | ||
| 78 | + | ||
| 79 | + bool isSpace(char); | ||
| 80 | + bool isDelimiter(char); | ||
| 81 | + void findEI(InputSource& input); | ||
| 82 | + | ||
| 83 | + enum state_e { | ||
| 84 | + st_top, | ||
| 85 | + st_in_hexstring, | ||
| 86 | + st_in_string, | ||
| 87 | + st_in_hexstring_2nd, | ||
| 88 | + st_name, | ||
| 89 | + st_literal, | ||
| 90 | + st_in_space, | ||
| 91 | + st_in_comment, | ||
| 92 | + st_string_escape, | ||
| 93 | + st_char_code, | ||
| 94 | + st_string_after_cr, | ||
| 95 | + st_lt, | ||
| 96 | + st_gt, | ||
| 97 | + st_inline_image, | ||
| 98 | + st_sign, | ||
| 99 | + st_number, | ||
| 100 | + st_real, | ||
| 101 | + st_decimal, | ||
| 102 | + st_name_hex1, | ||
| 103 | + st_name_hex2, | ||
| 104 | + st_before_token, | ||
| 105 | + st_token_ready | ||
| 106 | + }; | ||
| 107 | + | ||
| 108 | + void handleCharacter(char); | ||
| 109 | + void inBeforeToken(char); | ||
| 110 | + void inTop(char); | ||
| 111 | + void inSpace(char); | ||
| 112 | + void inComment(char); | ||
| 113 | + void inString(char); | ||
| 114 | + void inName(char); | ||
| 115 | + void inLt(char); | ||
| 116 | + void inGt(char); | ||
| 117 | + void inStringAfterCR(char); | ||
| 118 | + void inStringEscape(char); | ||
| 119 | + void inLiteral(char); | ||
| 120 | + void inCharCode(char); | ||
| 121 | + void inHexstring(char); | ||
| 122 | + void inHexstring2nd(char); | ||
| 123 | + void inInlineImage(char); | ||
| 124 | + void inTokenReady(char); | ||
| 125 | + void inNameHex1(char); | ||
| 126 | + void inNameHex2(char); | ||
| 127 | + void inSign(char); | ||
| 128 | + void inDecimal(char); | ||
| 129 | + void inNumber(char); | ||
| 130 | + void inReal(char); | ||
| 131 | + void reset(); | ||
| 132 | + | ||
| 133 | + // Lexer state | ||
| 134 | + state_e state; | ||
| 135 | + | ||
| 136 | + bool allow_eof{false}; | ||
| 137 | + bool include_ignorable{false}; | ||
| 138 | + | ||
| 139 | + // Current token accumulation | ||
| 140 | + QPDFTokenizer::token_type_e type; | ||
| 141 | + std::string val; | ||
| 142 | + std::string raw_val; | ||
| 143 | + std::string error_message; | ||
| 144 | + bool before_token; | ||
| 145 | + bool in_token; | ||
| 146 | + char char_to_unread; | ||
| 147 | + size_t inline_image_bytes; | ||
| 148 | + bool bad; | ||
| 149 | + | ||
| 150 | + // State for strings | ||
| 151 | + int string_depth; | ||
| 152 | + int char_code; | ||
| 153 | + char hex_char; | ||
| 154 | + int digit_count; | ||
| 155 | + }; | ||
| 156 | + | ||
| 157 | + inline QPDFTokenizer::token_type_e | ||
| 158 | + Tokenizer::getType() const | ||
| 159 | + { | ||
| 160 | + return this->type; | ||
| 161 | + } | ||
| 162 | + inline std::string const& | ||
| 163 | + Tokenizer::getValue() const | ||
| 164 | + { | ||
| 165 | + return (this->type == QPDFTokenizer::tt_name || this->type == QPDFTokenizer::tt_string) | ||
| 166 | + ? this->val | ||
| 167 | + : this->raw_val; | ||
| 168 | + } | ||
| 169 | + inline std::string const& | ||
| 170 | + Tokenizer::getRawValue() const | ||
| 171 | + { | ||
| 172 | + return this->raw_val; | ||
| 173 | + } | ||
| 174 | + inline std::string const& | ||
| 175 | + Tokenizer::getErrorMessage() const | ||
| 176 | + { | ||
| 177 | + return this->error_message; | ||
| 178 | + } | ||
| 179 | + | ||
| 180 | +} // namespace qpdf | ||
| 181 | + | ||
| 182 | +inline QPDFTokenizer::token_type_e | ||
| 183 | +QPDFTokenizer::getType() const noexcept | ||
| 184 | +{ | ||
| 185 | + return m->type; | ||
| 186 | +} | ||
| 187 | +inline std::string const& | ||
| 188 | +QPDFTokenizer::getValue() const noexcept | ||
| 189 | +{ | ||
| 190 | + return (m->type == tt_name || m->type == tt_string) ? m->val : m->raw_val; | ||
| 191 | +} | ||
| 192 | +inline std::string const& | ||
| 193 | +QPDFTokenizer::getRawValue() const noexcept | ||
| 194 | +{ | ||
| 195 | + return m->raw_val; | ||
| 196 | +} | ||
| 197 | +inline std::string const& | ||
| 198 | +QPDFTokenizer::getErrorMessage() const noexcept | ||
| 199 | +{ | ||
| 200 | + return m->error_message; | ||
| 201 | +} | ||
| 202 | + | ||
| 203 | +#endif // QPDFTOKENIZER_PRIVATE_HH |