Commit f29d0a63121f1243b80c74d90c59a8a88f0a9223
1 parent
d26b537a
Add state st_char_code in QPDFTokenizer
Showing
2 changed files
with
35 additions
and
23 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -203,6 +203,7 @@ class QPDFTokenizer | @@ -203,6 +203,7 @@ class QPDFTokenizer | ||
| 203 | st_in_space, | 203 | st_in_space, |
| 204 | st_in_comment, | 204 | st_in_comment, |
| 205 | st_in_string, | 205 | st_in_string, |
| 206 | + st_char_code, | ||
| 206 | st_lt, | 207 | st_lt, |
| 207 | st_gt, | 208 | st_gt, |
| 208 | st_literal, | 209 | st_literal, |
| @@ -212,8 +213,10 @@ class QPDFTokenizer | @@ -212,8 +213,10 @@ class QPDFTokenizer | ||
| 212 | }; | 213 | }; |
| 213 | 214 | ||
| 214 | void handleCharacter(char); | 215 | void handleCharacter(char); |
| 216 | + void inCharCode(char); | ||
| 215 | void inHexstring(char); | 217 | void inHexstring(char); |
| 216 | - void inString(char, size_t); | 218 | + void inString(char); |
| 219 | + | ||
| 217 | void reset(); | 220 | void reset(); |
| 218 | 221 | ||
| 219 | // Lexer state | 222 | // Lexer state |
libqpdf/QPDFTokenizer.cc
| @@ -330,6 +330,7 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -330,6 +330,7 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 330 | this->state = st_token_ready; | 330 | this->state = st_token_ready; |
| 331 | return; | 331 | return; |
| 332 | } | 332 | } |
| 333 | + | ||
| 333 | this->state = st_in_hexstring; | 334 | this->state = st_in_hexstring; |
| 334 | inHexstring(ch); | 335 | inHexstring(ch); |
| 335 | return; | 336 | return; |
| @@ -355,20 +356,7 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -355,20 +356,7 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 355 | if (this->string_ignoring_newline && (ch != '\n')) { | 356 | if (this->string_ignoring_newline && (ch != '\n')) { |
| 356 | this->string_ignoring_newline = false; | 357 | this->string_ignoring_newline = false; |
| 357 | } | 358 | } |
| 358 | - | ||
| 359 | - size_t bs_num_count = strlen(this->bs_num_register); | ||
| 360 | - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | ||
| 361 | - if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { | ||
| 362 | - // We've accumulated \ddd. PDF Spec says to ignore | ||
| 363 | - // high-order overflow. | ||
| 364 | - this->val += static_cast<char>( | ||
| 365 | - strtol(this->bs_num_register, nullptr, 8)); | ||
| 366 | - memset( | ||
| 367 | - this->bs_num_register, '\0', sizeof(this->bs_num_register)); | ||
| 368 | - bs_num_count = 0; | ||
| 369 | - } | ||
| 370 | - | ||
| 371 | - inString(ch, bs_num_count); | 359 | + inString(ch); |
| 372 | 360 | ||
| 373 | this->last_char_was_cr = | 361 | this->last_char_was_cr = |
| 374 | ((!this->string_ignoring_newline) && (ch == '\r')); | 362 | ((!this->string_ignoring_newline) && (ch == '\r')); |
| @@ -377,6 +365,10 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -377,6 +365,10 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 377 | } | 365 | } |
| 378 | return; | 366 | return; |
| 379 | 367 | ||
| 368 | + case (st_char_code): | ||
| 369 | + inCharCode(ch); | ||
| 370 | + return; | ||
| 371 | + | ||
| 380 | case st_literal: | 372 | case st_literal: |
| 381 | if (isDelimiter(ch)) { | 373 | if (isDelimiter(ch)) { |
| 382 | // A C-locale whitespace character or delimiter terminates | 374 | // A C-locale whitespace character or delimiter terminates |
| @@ -452,15 +444,16 @@ QPDFTokenizer::inHexstring(char ch) | @@ -452,15 +444,16 @@ QPDFTokenizer::inHexstring(char ch) | ||
| 452 | } | 444 | } |
| 453 | 445 | ||
| 454 | void | 446 | void |
| 455 | -QPDFTokenizer::inString(char ch, size_t bs_num_count) | 447 | +QPDFTokenizer::inString(char ch) |
| 456 | { | 448 | { |
| 457 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | 449 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); |
| 458 | if (this->string_ignoring_newline && (ch == '\n')) { | 450 | if (this->string_ignoring_newline && (ch == '\n')) { |
| 459 | // ignore | 451 | // ignore |
| 460 | this->string_ignoring_newline = false; | 452 | this->string_ignoring_newline = false; |
| 461 | return; | 453 | return; |
| 462 | - } else if (ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) { | ||
| 463 | - this->bs_num_register[bs_num_count++] = ch; | 454 | + } else if (ch_is_octal && this->last_char_was_bs) { |
| 455 | + this->state = st_char_code; | ||
| 456 | + inCharCode(ch); | ||
| 464 | return; | 457 | return; |
| 465 | } else if (this->last_char_was_bs) { | 458 | } else if (this->last_char_was_bs) { |
| 466 | switch (ch) { | 459 | switch (ch) { |
| @@ -498,11 +491,6 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) | @@ -498,11 +491,6 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) | ||
| 498 | } | 491 | } |
| 499 | } else if (ch == '\\') { | 492 | } else if (ch == '\\') { |
| 500 | // last_char_was_bs is set/cleared below as appropriate | 493 | // last_char_was_bs is set/cleared below as appropriate |
| 501 | - if (bs_num_count) { | ||
| 502 | - throw std::logic_error( | ||
| 503 | - "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 " | ||
| 504 | - "when ch == '\\'"); | ||
| 505 | - } | ||
| 506 | } else if (ch == '(') { | 494 | } else if (ch == '(') { |
| 507 | this->val += ch; | 495 | this->val += ch; |
| 508 | ++this->string_depth; | 496 | ++this->string_depth; |
| @@ -528,6 +516,27 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) | @@ -528,6 +516,27 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) | ||
| 528 | } | 516 | } |
| 529 | 517 | ||
| 530 | void | 518 | void |
| 519 | +QPDFTokenizer::inCharCode(char ch) | ||
| 520 | +{ | ||
| 521 | + size_t bs_num_count = strlen(this->bs_num_register); | ||
| 522 | + bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | ||
| 523 | + if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { | ||
| 524 | + // We've accumulated \ddd. PDF Spec says to ignore | ||
| 525 | + // high-order overflow. | ||
| 526 | + this->val += | ||
| 527 | + static_cast<char>(strtol(this->bs_num_register, nullptr, 8)); | ||
| 528 | + memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); | ||
| 529 | + bs_num_count = 0; | ||
| 530 | + this->state = st_in_string; | ||
| 531 | + handleCharacter(ch); | ||
| 532 | + return; | ||
| 533 | + } else if (ch_is_octal) { | ||
| 534 | + this->bs_num_register[bs_num_count++] = ch; | ||
| 535 | + return; | ||
| 536 | + } | ||
| 537 | +} | ||
| 538 | + | ||
| 539 | +void | ||
| 531 | QPDFTokenizer::presentEOF() | 540 | QPDFTokenizer::presentEOF() |
| 532 | { | 541 | { |
| 533 | if (this->state == st_literal) { | 542 | if (this->state == st_literal) { |