Commit 7c32f6cc2e90058b8a1fbaec48e07bf21bd66afa
1 parent
7c5778f9
Add state st_string_escape in QPDFTokenizer
Showing
2 changed files
with
55 additions
and
51 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -203,6 +203,7 @@ class QPDFTokenizer | @@ -203,6 +203,7 @@ class QPDFTokenizer | ||
| 203 | st_in_space, | 203 | st_in_space, |
| 204 | st_in_comment, | 204 | st_in_comment, |
| 205 | st_in_string, | 205 | st_in_string, |
| 206 | + st_string_escape, | ||
| 206 | st_char_code, | 207 | st_char_code, |
| 207 | st_string_after_cr, | 208 | st_string_after_cr, |
| 208 | st_lt, | 209 | st_lt, |
| @@ -238,7 +239,6 @@ class QPDFTokenizer | @@ -238,7 +239,6 @@ class QPDFTokenizer | ||
| 238 | // State for strings | 239 | // State for strings |
| 239 | int string_depth; | 240 | int string_depth; |
| 240 | char bs_num_register[4]; | 241 | char bs_num_register[4]; |
| 241 | - bool last_char_was_bs; | ||
| 242 | }; | 242 | }; |
| 243 | 243 | ||
| 244 | #endif // QPDFTOKENIZER_HH | 244 | #endif // QPDFTOKENIZER_HH |
libqpdf/QPDFTokenizer.cc
| @@ -85,7 +85,6 @@ QPDFTokenizer::reset() | @@ -85,7 +85,6 @@ QPDFTokenizer::reset() | ||
| 85 | char_to_unread = '\0'; | 85 | char_to_unread = '\0'; |
| 86 | inline_image_bytes = 0; | 86 | inline_image_bytes = 0; |
| 87 | string_depth = 0; | 87 | string_depth = 0; |
| 88 | - last_char_was_bs = false; | ||
| 89 | } | 88 | } |
| 90 | 89 | ||
| 91 | QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : | 90 | QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : |
| @@ -244,7 +243,6 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -244,7 +243,6 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 244 | case '(': | 243 | case '(': |
| 245 | this->string_depth = 1; | 244 | this->string_depth = 1; |
| 246 | memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); | 245 | memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); |
| 247 | - this->last_char_was_bs = false; | ||
| 248 | this->state = st_in_string; | 246 | this->state = st_in_string; |
| 249 | return; | 247 | return; |
| 250 | 248 | ||
| @@ -348,22 +346,66 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -348,22 +346,66 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 348 | return; | 346 | return; |
| 349 | 347 | ||
| 350 | case st_in_string: | 348 | case st_in_string: |
| 351 | - { | ||
| 352 | - inString(ch); | ||
| 353 | - this->last_char_was_bs = | ||
| 354 | - ((!this->last_char_was_bs) && (ch == '\\')); | ||
| 355 | - } | 349 | + inString(ch); |
| 356 | return; | 350 | return; |
| 357 | 351 | ||
| 358 | - case (st_string_after_cr): | 352 | + case st_string_after_cr: |
| 359 | // CR LF in strings are either ignored or normalized to CR | 353 | // CR LF in strings are either ignored or normalized to CR |
| 360 | this->state = st_in_string; | 354 | this->state = st_in_string; |
| 361 | if (ch != '\n') { | 355 | if (ch != '\n') { |
| 362 | - handleCharacter(ch); | 356 | + inString(ch); |
| 363 | } | 357 | } |
| 364 | return; | 358 | return; |
| 365 | 359 | ||
| 366 | - case (st_char_code): | 360 | + case st_string_escape: |
| 361 | + this->state = st_in_string; | ||
| 362 | + switch (ch) { | ||
| 363 | + case '0': | ||
| 364 | + case '1': | ||
| 365 | + case '2': | ||
| 366 | + case '3': | ||
| 367 | + case '4': | ||
| 368 | + case '5': | ||
| 369 | + case '6': | ||
| 370 | + case '7': | ||
| 371 | + this->state = st_char_code; | ||
| 372 | + inCharCode(ch); | ||
| 373 | + return; | ||
| 374 | + | ||
| 375 | + case 'n': | ||
| 376 | + this->val += '\n'; | ||
| 377 | + return; | ||
| 378 | + | ||
| 379 | + case 'r': | ||
| 380 | + this->val += '\r'; | ||
| 381 | + return; | ||
| 382 | + | ||
| 383 | + case 't': | ||
| 384 | + this->val += '\t'; | ||
| 385 | + return; | ||
| 386 | + | ||
| 387 | + case 'b': | ||
| 388 | + this->val += '\b'; | ||
| 389 | + return; | ||
| 390 | + | ||
| 391 | + case 'f': | ||
| 392 | + this->val += '\f'; | ||
| 393 | + return; | ||
| 394 | + | ||
| 395 | + case '\n': | ||
| 396 | + return; | ||
| 397 | + | ||
| 398 | + case '\r': | ||
| 399 | + this->state = st_string_after_cr; | ||
| 400 | + return; | ||
| 401 | + | ||
| 402 | + default: | ||
| 403 | + // PDF spec says backslash is ignored before anything else | ||
| 404 | + this->val += ch; | ||
| 405 | + return; | ||
| 406 | + } | ||
| 407 | + | ||
| 408 | + case st_char_code: | ||
| 367 | inCharCode(ch); | 409 | inCharCode(ch); |
| 368 | return; | 410 | return; |
| 369 | 411 | ||
| @@ -444,47 +486,9 @@ QPDFTokenizer::inHexstring(char ch) | @@ -444,47 +486,9 @@ QPDFTokenizer::inHexstring(char ch) | ||
| 444 | void | 486 | void |
| 445 | QPDFTokenizer::inString(char ch) | 487 | QPDFTokenizer::inString(char ch) |
| 446 | { | 488 | { |
| 447 | - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | ||
| 448 | - if (ch_is_octal && this->last_char_was_bs) { | ||
| 449 | - this->state = st_char_code; | ||
| 450 | - inCharCode(ch); | 489 | + if (ch == '\\') { |
| 490 | + this->state = st_string_escape; | ||
| 451 | return; | 491 | return; |
| 452 | - } else if (this->last_char_was_bs) { | ||
| 453 | - switch (ch) { | ||
| 454 | - case 'n': | ||
| 455 | - this->val += '\n'; | ||
| 456 | - return; | ||
| 457 | - | ||
| 458 | - case 'r': | ||
| 459 | - this->val += '\r'; | ||
| 460 | - return; | ||
| 461 | - | ||
| 462 | - case 't': | ||
| 463 | - this->val += '\t'; | ||
| 464 | - return; | ||
| 465 | - | ||
| 466 | - case 'b': | ||
| 467 | - this->val += '\b'; | ||
| 468 | - return; | ||
| 469 | - | ||
| 470 | - case 'f': | ||
| 471 | - this->val += '\f'; | ||
| 472 | - return; | ||
| 473 | - | ||
| 474 | - case '\n': | ||
| 475 | - return; | ||
| 476 | - | ||
| 477 | - case '\r': | ||
| 478 | - this->state = st_string_after_cr; | ||
| 479 | - return; | ||
| 480 | - | ||
| 481 | - default: | ||
| 482 | - // PDF spec says backslash is ignored before anything else | ||
| 483 | - this->val += ch; | ||
| 484 | - return; | ||
| 485 | - } | ||
| 486 | - } else if (ch == '\\') { | ||
| 487 | - // last_char_was_bs is set/cleared below as appropriate | ||
| 488 | } else if (ch == '(') { | 492 | } else if (ch == '(') { |
| 489 | this->val += ch; | 493 | this->val += ch; |
| 490 | ++this->string_depth; | 494 | ++this->string_depth; |