Commit 7c32f6cc2e90058b8a1fbaec48e07bf21bd66afa
1 parent
7c5778f9
Add state st_string_escape in QPDFTokenizer
Showing
2 changed files
with
55 additions
and
51 deletions
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -203,6 +203,7 @@ class QPDFTokenizer |
| 203 | 203 | st_in_space, |
| 204 | 204 | st_in_comment, |
| 205 | 205 | st_in_string, |
| 206 | + st_string_escape, | |
| 206 | 207 | st_char_code, |
| 207 | 208 | st_string_after_cr, |
| 208 | 209 | st_lt, |
| ... | ... | @@ -238,7 +239,6 @@ class QPDFTokenizer |
| 238 | 239 | // State for strings |
| 239 | 240 | int string_depth; |
| 240 | 241 | char bs_num_register[4]; |
| 241 | - bool last_char_was_bs; | |
| 242 | 242 | }; |
| 243 | 243 | |
| 244 | 244 | #endif // QPDFTOKENIZER_HH | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -85,7 +85,6 @@ QPDFTokenizer::reset() |
| 85 | 85 | char_to_unread = '\0'; |
| 86 | 86 | inline_image_bytes = 0; |
| 87 | 87 | string_depth = 0; |
| 88 | - last_char_was_bs = false; | |
| 89 | 88 | } |
| 90 | 89 | |
| 91 | 90 | QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : |
| ... | ... | @@ -244,7 +243,6 @@ QPDFTokenizer::handleCharacter(char ch) |
| 244 | 243 | case '(': |
| 245 | 244 | this->string_depth = 1; |
| 246 | 245 | memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); |
| 247 | - this->last_char_was_bs = false; | |
| 248 | 246 | this->state = st_in_string; |
| 249 | 247 | return; |
| 250 | 248 | |
| ... | ... | @@ -348,22 +346,66 @@ QPDFTokenizer::handleCharacter(char ch) |
| 348 | 346 | return; |
| 349 | 347 | |
| 350 | 348 | case st_in_string: |
| 351 | - { | |
| 352 | - inString(ch); | |
| 353 | - this->last_char_was_bs = | |
| 354 | - ((!this->last_char_was_bs) && (ch == '\\')); | |
| 355 | - } | |
| 349 | + inString(ch); | |
| 356 | 350 | return; |
| 357 | 351 | |
| 358 | - case (st_string_after_cr): | |
| 352 | + case st_string_after_cr: | |
| 359 | 353 | // CR LF in strings are either ignored or normalized to CR |
| 360 | 354 | this->state = st_in_string; |
| 361 | 355 | if (ch != '\n') { |
| 362 | - handleCharacter(ch); | |
| 356 | + inString(ch); | |
| 363 | 357 | } |
| 364 | 358 | return; |
| 365 | 359 | |
| 366 | - case (st_char_code): | |
| 360 | + case st_string_escape: | |
| 361 | + this->state = st_in_string; | |
| 362 | + switch (ch) { | |
| 363 | + case '0': | |
| 364 | + case '1': | |
| 365 | + case '2': | |
| 366 | + case '3': | |
| 367 | + case '4': | |
| 368 | + case '5': | |
| 369 | + case '6': | |
| 370 | + case '7': | |
| 371 | + this->state = st_char_code; | |
| 372 | + inCharCode(ch); | |
| 373 | + return; | |
| 374 | + | |
| 375 | + case 'n': | |
| 376 | + this->val += '\n'; | |
| 377 | + return; | |
| 378 | + | |
| 379 | + case 'r': | |
| 380 | + this->val += '\r'; | |
| 381 | + return; | |
| 382 | + | |
| 383 | + case 't': | |
| 384 | + this->val += '\t'; | |
| 385 | + return; | |
| 386 | + | |
| 387 | + case 'b': | |
| 388 | + this->val += '\b'; | |
| 389 | + return; | |
| 390 | + | |
| 391 | + case 'f': | |
| 392 | + this->val += '\f'; | |
| 393 | + return; | |
| 394 | + | |
| 395 | + case '\n': | |
| 396 | + return; | |
| 397 | + | |
| 398 | + case '\r': | |
| 399 | + this->state = st_string_after_cr; | |
| 400 | + return; | |
| 401 | + | |
| 402 | + default: | |
| 403 | + // PDF spec says backslash is ignored before anything else | |
| 404 | + this->val += ch; | |
| 405 | + return; | |
| 406 | + } | |
| 407 | + | |
| 408 | + case st_char_code: | |
| 367 | 409 | inCharCode(ch); |
| 368 | 410 | return; |
| 369 | 411 | |
| ... | ... | @@ -444,47 +486,9 @@ QPDFTokenizer::inHexstring(char ch) |
| 444 | 486 | void |
| 445 | 487 | QPDFTokenizer::inString(char ch) |
| 446 | 488 | { |
| 447 | - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | |
| 448 | - if (ch_is_octal && this->last_char_was_bs) { | |
| 449 | - this->state = st_char_code; | |
| 450 | - inCharCode(ch); | |
| 489 | + if (ch == '\\') { | |
| 490 | + this->state = st_string_escape; | |
| 451 | 491 | return; |
| 452 | - } else if (this->last_char_was_bs) { | |
| 453 | - switch (ch) { | |
| 454 | - case 'n': | |
| 455 | - this->val += '\n'; | |
| 456 | - return; | |
| 457 | - | |
| 458 | - case 'r': | |
| 459 | - this->val += '\r'; | |
| 460 | - return; | |
| 461 | - | |
| 462 | - case 't': | |
| 463 | - this->val += '\t'; | |
| 464 | - return; | |
| 465 | - | |
| 466 | - case 'b': | |
| 467 | - this->val += '\b'; | |
| 468 | - return; | |
| 469 | - | |
| 470 | - case 'f': | |
| 471 | - this->val += '\f'; | |
| 472 | - return; | |
| 473 | - | |
| 474 | - case '\n': | |
| 475 | - return; | |
| 476 | - | |
| 477 | - case '\r': | |
| 478 | - this->state = st_string_after_cr; | |
| 479 | - return; | |
| 480 | - | |
| 481 | - default: | |
| 482 | - // PDF spec says backslash is ignored before anything else | |
| 483 | - this->val += ch; | |
| 484 | - return; | |
| 485 | - } | |
| 486 | - } else if (ch == '\\') { | |
| 487 | - // last_char_was_bs is set/cleared below as appropriate | |
| 488 | 492 | } else if (ch == '(') { |
| 489 | 493 | this->val += ch; |
| 490 | 494 | ++this->string_depth; | ... | ... |