Commit 7c5778f999e15cc1af6360710f8055c2fa234d03
1 parent
f29d0a63
Add state st_string_after_cr in QPDFTokenizer
Showing
2 changed files
with
13 additions
and
22 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -204,6 +204,7 @@ class QPDFTokenizer | @@ -204,6 +204,7 @@ class QPDFTokenizer | ||
| 204 | st_in_comment, | 204 | st_in_comment, |
| 205 | st_in_string, | 205 | st_in_string, |
| 206 | st_char_code, | 206 | st_char_code, |
| 207 | + st_string_after_cr, | ||
| 207 | st_lt, | 208 | st_lt, |
| 208 | st_gt, | 209 | st_gt, |
| 209 | st_literal, | 210 | st_literal, |
| @@ -236,10 +237,8 @@ class QPDFTokenizer | @@ -236,10 +237,8 @@ class QPDFTokenizer | ||
| 236 | 237 | ||
| 237 | // State for strings | 238 | // State for strings |
| 238 | int string_depth; | 239 | int string_depth; |
| 239 | - bool string_ignoring_newline; | ||
| 240 | char bs_num_register[4]; | 240 | char bs_num_register[4]; |
| 241 | bool last_char_was_bs; | 241 | bool last_char_was_bs; |
| 242 | - bool last_char_was_cr; | ||
| 243 | }; | 242 | }; |
| 244 | 243 | ||
| 245 | #endif // QPDFTOKENIZER_HH | 244 | #endif // QPDFTOKENIZER_HH |
libqpdf/QPDFTokenizer.cc
| @@ -85,9 +85,7 @@ QPDFTokenizer::reset() | @@ -85,9 +85,7 @@ QPDFTokenizer::reset() | ||
| 85 | char_to_unread = '\0'; | 85 | char_to_unread = '\0'; |
| 86 | inline_image_bytes = 0; | 86 | inline_image_bytes = 0; |
| 87 | string_depth = 0; | 87 | string_depth = 0; |
| 88 | - string_ignoring_newline = false; | ||
| 89 | last_char_was_bs = false; | 88 | last_char_was_bs = false; |
| 90 | - last_char_was_cr = false; | ||
| 91 | } | 89 | } |
| 92 | 90 | ||
| 93 | QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : | 91 | QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : |
| @@ -245,10 +243,8 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -245,10 +243,8 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 245 | 243 | ||
| 246 | case '(': | 244 | case '(': |
| 247 | this->string_depth = 1; | 245 | this->string_depth = 1; |
| 248 | - this->string_ignoring_newline = false; | ||
| 249 | memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); | 246 | memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); |
| 250 | this->last_char_was_bs = false; | 247 | this->last_char_was_bs = false; |
| 251 | - this->last_char_was_cr = false; | ||
| 252 | this->state = st_in_string; | 248 | this->state = st_in_string; |
| 253 | return; | 249 | return; |
| 254 | 250 | ||
| @@ -353,18 +349,20 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -353,18 +349,20 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 353 | 349 | ||
| 354 | case st_in_string: | 350 | case st_in_string: |
| 355 | { | 351 | { |
| 356 | - if (this->string_ignoring_newline && (ch != '\n')) { | ||
| 357 | - this->string_ignoring_newline = false; | ||
| 358 | - } | ||
| 359 | inString(ch); | 352 | inString(ch); |
| 360 | - | ||
| 361 | - this->last_char_was_cr = | ||
| 362 | - ((!this->string_ignoring_newline) && (ch == '\r')); | ||
| 363 | this->last_char_was_bs = | 353 | this->last_char_was_bs = |
| 364 | ((!this->last_char_was_bs) && (ch == '\\')); | 354 | ((!this->last_char_was_bs) && (ch == '\\')); |
| 365 | } | 355 | } |
| 366 | return; | 356 | return; |
| 367 | 357 | ||
| 358 | + case (st_string_after_cr): | ||
| 359 | + // CR LF in strings are either ignored or normalized to CR | ||
| 360 | + this->state = st_in_string; | ||
| 361 | + if (ch != '\n') { | ||
| 362 | + handleCharacter(ch); | ||
| 363 | + } | ||
| 364 | + return; | ||
| 365 | + | ||
| 368 | case (st_char_code): | 366 | case (st_char_code): |
| 369 | inCharCode(ch); | 367 | inCharCode(ch); |
| 370 | return; | 368 | return; |
| @@ -447,11 +445,7 @@ void | @@ -447,11 +445,7 @@ void | ||
| 447 | QPDFTokenizer::inString(char ch) | 445 | QPDFTokenizer::inString(char ch) |
| 448 | { | 446 | { |
| 449 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | 447 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); |
| 450 | - if (this->string_ignoring_newline && (ch == '\n')) { | ||
| 451 | - // ignore | ||
| 452 | - this->string_ignoring_newline = false; | ||
| 453 | - return; | ||
| 454 | - } else if (ch_is_octal && this->last_char_was_bs) { | 448 | + if (ch_is_octal && this->last_char_was_bs) { |
| 455 | this->state = st_char_code; | 449 | this->state = st_char_code; |
| 456 | inCharCode(ch); | 450 | inCharCode(ch); |
| 457 | return; | 451 | return; |
| @@ -481,7 +475,7 @@ QPDFTokenizer::inString(char ch) | @@ -481,7 +475,7 @@ QPDFTokenizer::inString(char ch) | ||
| 481 | return; | 475 | return; |
| 482 | 476 | ||
| 483 | case '\r': | 477 | case '\r': |
| 484 | - this->string_ignoring_newline = true; | 478 | + this->state = st_string_after_cr; |
| 485 | return; | 479 | return; |
| 486 | 480 | ||
| 487 | default: | 481 | default: |
| @@ -502,12 +496,10 @@ QPDFTokenizer::inString(char ch) | @@ -502,12 +496,10 @@ QPDFTokenizer::inString(char ch) | ||
| 502 | } else if (ch == '\r') { | 496 | } else if (ch == '\r') { |
| 503 | // CR by itself is converted to LF | 497 | // CR by itself is converted to LF |
| 504 | this->val += '\n'; | 498 | this->val += '\n'; |
| 499 | + this->state = st_string_after_cr; | ||
| 505 | return; | 500 | return; |
| 506 | } else if (ch == '\n') { | 501 | } else if (ch == '\n') { |
| 507 | - // CR LF is converted to LF | ||
| 508 | - if (!this->last_char_was_cr) { | ||
| 509 | - this->val += ch; | ||
| 510 | - } | 502 | + this->val += ch; |
| 511 | return; | 503 | return; |
| 512 | } else { | 504 | } else { |
| 513 | this->val += ch; | 505 | this->val += ch; |