Commit f29d0a63121f1243b80c74d90c59a8a88f0a9223
1 parent
d26b537a
Add state st_char_code in QPDFTokenizer
Showing
2 changed files
with
35 additions
and
23 deletions
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -203,6 +203,7 @@ class QPDFTokenizer |
| 203 | 203 | st_in_space, |
| 204 | 204 | st_in_comment, |
| 205 | 205 | st_in_string, |
| 206 | + st_char_code, | |
| 206 | 207 | st_lt, |
| 207 | 208 | st_gt, |
| 208 | 209 | st_literal, |
| ... | ... | @@ -212,8 +213,10 @@ class QPDFTokenizer |
| 212 | 213 | }; |
| 213 | 214 | |
| 214 | 215 | void handleCharacter(char); |
| 216 | + void inCharCode(char); | |
| 215 | 217 | void inHexstring(char); |
| 216 | - void inString(char, size_t); | |
| 218 | + void inString(char); | |
| 219 | + | |
| 217 | 220 | void reset(); |
| 218 | 221 | |
| 219 | 222 | // Lexer state | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -330,6 +330,7 @@ QPDFTokenizer::handleCharacter(char ch) |
| 330 | 330 | this->state = st_token_ready; |
| 331 | 331 | return; |
| 332 | 332 | } |
| 333 | + | |
| 333 | 334 | this->state = st_in_hexstring; |
| 334 | 335 | inHexstring(ch); |
| 335 | 336 | return; |
| ... | ... | @@ -355,20 +356,7 @@ QPDFTokenizer::handleCharacter(char ch) |
| 355 | 356 | if (this->string_ignoring_newline && (ch != '\n')) { |
| 356 | 357 | this->string_ignoring_newline = false; |
| 357 | 358 | } |
| 358 | - | |
| 359 | - size_t bs_num_count = strlen(this->bs_num_register); | |
| 360 | - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | |
| 361 | - if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { | |
| 362 | - // We've accumulated \ddd. PDF Spec says to ignore | |
| 363 | - // high-order overflow. | |
| 364 | - this->val += static_cast<char>( | |
| 365 | - strtol(this->bs_num_register, nullptr, 8)); | |
| 366 | - memset( | |
| 367 | - this->bs_num_register, '\0', sizeof(this->bs_num_register)); | |
| 368 | - bs_num_count = 0; | |
| 369 | - } | |
| 370 | - | |
| 371 | - inString(ch, bs_num_count); | |
| 359 | + inString(ch); | |
| 372 | 360 | |
| 373 | 361 | this->last_char_was_cr = |
| 374 | 362 | ((!this->string_ignoring_newline) && (ch == '\r')); |
| ... | ... | @@ -377,6 +365,10 @@ QPDFTokenizer::handleCharacter(char ch) |
| 377 | 365 | } |
| 378 | 366 | return; |
| 379 | 367 | |
| 368 | + case (st_char_code): | |
| 369 | + inCharCode(ch); | |
| 370 | + return; | |
| 371 | + | |
| 380 | 372 | case st_literal: |
| 381 | 373 | if (isDelimiter(ch)) { |
| 382 | 374 | // A C-locale whitespace character or delimiter terminates |
| ... | ... | @@ -452,15 +444,16 @@ QPDFTokenizer::inHexstring(char ch) |
| 452 | 444 | } |
| 453 | 445 | |
| 454 | 446 | void |
| 455 | -QPDFTokenizer::inString(char ch, size_t bs_num_count) | |
| 447 | +QPDFTokenizer::inString(char ch) | |
| 456 | 448 | { |
| 457 | 449 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); |
| 458 | 450 | if (this->string_ignoring_newline && (ch == '\n')) { |
| 459 | 451 | // ignore |
| 460 | 452 | this->string_ignoring_newline = false; |
| 461 | 453 | return; |
| 462 | - } else if (ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) { | |
| 463 | - this->bs_num_register[bs_num_count++] = ch; | |
| 454 | + } else if (ch_is_octal && this->last_char_was_bs) { | |
| 455 | + this->state = st_char_code; | |
| 456 | + inCharCode(ch); | |
| 464 | 457 | return; |
| 465 | 458 | } else if (this->last_char_was_bs) { |
| 466 | 459 | switch (ch) { |
| ... | ... | @@ -498,11 +491,6 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) |
| 498 | 491 | } |
| 499 | 492 | } else if (ch == '\\') { |
| 500 | 493 | // last_char_was_bs is set/cleared below as appropriate |
| 501 | - if (bs_num_count) { | |
| 502 | - throw std::logic_error( | |
| 503 | - "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 " | |
| 504 | - "when ch == '\\'"); | |
| 505 | - } | |
| 506 | 494 | } else if (ch == '(') { |
| 507 | 495 | this->val += ch; |
| 508 | 496 | ++this->string_depth; |
| ... | ... | @@ -528,6 +516,27 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) |
| 528 | 516 | } |
| 529 | 517 | |
| 530 | 518 | void |
| 519 | +QPDFTokenizer::inCharCode(char ch) | |
| 520 | +{ | |
| 521 | + size_t bs_num_count = strlen(this->bs_num_register); | |
| 522 | + bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | |
| 523 | + if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { | |
| 524 | + // We've accumulated \ddd. PDF Spec says to ignore | |
| 525 | + // high-order overflow. | |
| 526 | + this->val += | |
| 527 | + static_cast<char>(strtol(this->bs_num_register, nullptr, 8)); | |
| 528 | + memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); | |
| 529 | + bs_num_count = 0; | |
| 530 | + this->state = st_in_string; | |
| 531 | + handleCharacter(ch); | |
| 532 | + return; | |
| 533 | + } else if (ch_is_octal) { | |
| 534 | + this->bs_num_register[bs_num_count++] = ch; | |
| 535 | + return; | |
| 536 | + } | |
| 537 | +} | |
| 538 | + | |
| 539 | +void | |
| 531 | 540 | QPDFTokenizer::presentEOF() |
| 532 | 541 | { |
| 533 | 542 | if (this->state == st_literal) { | ... | ... |