Commit d26b537a7c65b4aa9ed4c632bfb9eaf921fbbd2d
1 parent
2697ba49
Add private method QPDFTokenizer::inString
Showing
2 changed files
with
78 additions
and
64 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -213,6 +213,7 @@ class QPDFTokenizer | @@ -213,6 +213,7 @@ class QPDFTokenizer | ||
| 213 | 213 | ||
| 214 | void handleCharacter(char); | 214 | void handleCharacter(char); |
| 215 | void inHexstring(char); | 215 | void inHexstring(char); |
| 216 | + void inString(char, size_t); | ||
| 216 | void reset(); | 217 | void reset(); |
| 217 | 218 | ||
| 218 | // Lexer state | 219 | // Lexer state |
libqpdf/QPDFTokenizer.cc
| @@ -368,70 +368,7 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -368,70 +368,7 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 368 | bs_num_count = 0; | 368 | bs_num_count = 0; |
| 369 | } | 369 | } |
| 370 | 370 | ||
| 371 | - if (this->string_ignoring_newline && (ch == '\n')) { | ||
| 372 | - // ignore | ||
| 373 | - this->string_ignoring_newline = false; | ||
| 374 | - } else if ( | ||
| 375 | - ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) { | ||
| 376 | - this->bs_num_register[bs_num_count++] = ch; | ||
| 377 | - } else if (this->last_char_was_bs) { | ||
| 378 | - switch (ch) { | ||
| 379 | - case 'n': | ||
| 380 | - this->val += '\n'; | ||
| 381 | - break; | ||
| 382 | - | ||
| 383 | - case 'r': | ||
| 384 | - this->val += '\r'; | ||
| 385 | - break; | ||
| 386 | - | ||
| 387 | - case 't': | ||
| 388 | - this->val += '\t'; | ||
| 389 | - break; | ||
| 390 | - | ||
| 391 | - case 'b': | ||
| 392 | - this->val += '\b'; | ||
| 393 | - break; | ||
| 394 | - | ||
| 395 | - case 'f': | ||
| 396 | - this->val += '\f'; | ||
| 397 | - break; | ||
| 398 | - | ||
| 399 | - case '\n': | ||
| 400 | - break; | ||
| 401 | - | ||
| 402 | - case '\r': | ||
| 403 | - this->string_ignoring_newline = true; | ||
| 404 | - break; | ||
| 405 | - | ||
| 406 | - default: | ||
| 407 | - // PDF spec says backslash is ignored before anything else | ||
| 408 | - this->val += ch; | ||
| 409 | - break; | ||
| 410 | - } | ||
| 411 | - } else if (ch == '\\') { | ||
| 412 | - // last_char_was_bs is set/cleared below as appropriate | ||
| 413 | - if (bs_num_count) { | ||
| 414 | - throw std::logic_error( | ||
| 415 | - "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 " | ||
| 416 | - "when ch == '\\'"); | ||
| 417 | - } | ||
| 418 | - } else if (ch == '(') { | ||
| 419 | - this->val += ch; | ||
| 420 | - ++this->string_depth; | ||
| 421 | - } else if ((ch == ')') && (--this->string_depth == 0)) { | ||
| 422 | - this->type = tt_string; | ||
| 423 | - this->state = st_token_ready; | ||
| 424 | - } else if (ch == '\r') { | ||
| 425 | - // CR by itself is converted to LF | ||
| 426 | - this->val += '\n'; | ||
| 427 | - } else if (ch == '\n') { | ||
| 428 | - // CR LF is converted to LF | ||
| 429 | - if (!this->last_char_was_cr) { | ||
| 430 | - this->val += ch; | ||
| 431 | - } | ||
| 432 | - } else { | ||
| 433 | - this->val += ch; | ||
| 434 | - } | 371 | + inString(ch, bs_num_count); |
| 435 | 372 | ||
| 436 | this->last_char_was_cr = | 373 | this->last_char_was_cr = |
| 437 | ((!this->string_ignoring_newline) && (ch == '\r')); | 374 | ((!this->string_ignoring_newline) && (ch == '\r')); |
| @@ -515,6 +452,82 @@ QPDFTokenizer::inHexstring(char ch) | @@ -515,6 +452,82 @@ QPDFTokenizer::inHexstring(char ch) | ||
| 515 | } | 452 | } |
| 516 | 453 | ||
| 517 | void | 454 | void |
| 455 | +QPDFTokenizer::inString(char ch, size_t bs_num_count) | ||
| 456 | +{ | ||
| 457 | + bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | ||
| 458 | + if (this->string_ignoring_newline && (ch == '\n')) { | ||
| 459 | + // ignore | ||
| 460 | + this->string_ignoring_newline = false; | ||
| 461 | + return; | ||
| 462 | + } else if (ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) { | ||
| 463 | + this->bs_num_register[bs_num_count++] = ch; | ||
| 464 | + return; | ||
| 465 | + } else if (this->last_char_was_bs) { | ||
| 466 | + switch (ch) { | ||
| 467 | + case 'n': | ||
| 468 | + this->val += '\n'; | ||
| 469 | + return; | ||
| 470 | + | ||
| 471 | + case 'r': | ||
| 472 | + this->val += '\r'; | ||
| 473 | + return; | ||
| 474 | + | ||
| 475 | + case 't': | ||
| 476 | + this->val += '\t'; | ||
| 477 | + return; | ||
| 478 | + | ||
| 479 | + case 'b': | ||
| 480 | + this->val += '\b'; | ||
| 481 | + return; | ||
| 482 | + | ||
| 483 | + case 'f': | ||
| 484 | + this->val += '\f'; | ||
| 485 | + return; | ||
| 486 | + | ||
| 487 | + case '\n': | ||
| 488 | + return; | ||
| 489 | + | ||
| 490 | + case '\r': | ||
| 491 | + this->string_ignoring_newline = true; | ||
| 492 | + return; | ||
| 493 | + | ||
| 494 | + default: | ||
| 495 | + // PDF spec says backslash is ignored before anything else | ||
| 496 | + this->val += ch; | ||
| 497 | + return; | ||
| 498 | + } | ||
| 499 | + } else if (ch == '\\') { | ||
| 500 | + // last_char_was_bs is set/cleared below as appropriate | ||
| 501 | + if (bs_num_count) { | ||
| 502 | + throw std::logic_error( | ||
| 503 | + "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 " | ||
| 504 | + "when ch == '\\'"); | ||
| 505 | + } | ||
| 506 | + } else if (ch == '(') { | ||
| 507 | + this->val += ch; | ||
| 508 | + ++this->string_depth; | ||
| 509 | + return; | ||
| 510 | + } else if ((ch == ')') && (--this->string_depth == 0)) { | ||
| 511 | + this->type = tt_string; | ||
| 512 | + this->state = st_token_ready; | ||
| 513 | + return; | ||
| 514 | + } else if (ch == '\r') { | ||
| 515 | + // CR by itself is converted to LF | ||
| 516 | + this->val += '\n'; | ||
| 517 | + return; | ||
| 518 | + } else if (ch == '\n') { | ||
| 519 | + // CR LF is converted to LF | ||
| 520 | + if (!this->last_char_was_cr) { | ||
| 521 | + this->val += ch; | ||
| 522 | + } | ||
| 523 | + return; | ||
| 524 | + } else { | ||
| 525 | + this->val += ch; | ||
| 526 | + return; | ||
| 527 | + } | ||
| 528 | +} | ||
| 529 | + | ||
| 530 | +void | ||
| 518 | QPDFTokenizer::presentEOF() | 531 | QPDFTokenizer::presentEOF() |
| 519 | { | 532 | { |
| 520 | if (this->state == st_literal) { | 533 | if (this->state == st_literal) { |