Commit 7c5778f999e15cc1af6360710f8055c2fa234d03

Authored by m-holger
1 parent f29d0a63

Add state st_string_after_cr in QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
... ... @@ -204,6 +204,7 @@ class QPDFTokenizer
204 204 st_in_comment,
205 205 st_in_string,
206 206 st_char_code,
  207 + st_string_after_cr,
207 208 st_lt,
208 209 st_gt,
209 210 st_literal,
... ... @@ -236,10 +237,8 @@ class QPDFTokenizer
236 237  
237 238 // State for strings
238 239 int string_depth;
239   - bool string_ignoring_newline;
240 240 char bs_num_register[4];
241 241 bool last_char_was_bs;
242   - bool last_char_was_cr;
243 242 };
244 243  
245 244 #endif // QPDFTOKENIZER_HH
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -85,9 +85,7 @@ QPDFTokenizer::reset()
85 85 char_to_unread = '\0';
86 86 inline_image_bytes = 0;
87 87 string_depth = 0;
88   - string_ignoring_newline = false;
89 88 last_char_was_bs = false;
90   - last_char_was_cr = false;
91 89 }
92 90  
93 91 QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
... ... @@ -245,10 +243,8 @@ QPDFTokenizer::handleCharacter(char ch)
245 243  
246 244 case '(':
247 245 this->string_depth = 1;
248   - this->string_ignoring_newline = false;
249 246 memset(this->bs_num_register, '\0', sizeof(this->bs_num_register));
250 247 this->last_char_was_bs = false;
251   - this->last_char_was_cr = false;
252 248 this->state = st_in_string;
253 249 return;
254 250  
... ... @@ -353,18 +349,20 @@ QPDFTokenizer::handleCharacter(char ch)
353 349  
354 350 case st_in_string:
355 351 {
356   - if (this->string_ignoring_newline && (ch != '\n')) {
357   - this->string_ignoring_newline = false;
358   - }
359 352 inString(ch);
360   -
361   - this->last_char_was_cr =
362   - ((!this->string_ignoring_newline) && (ch == '\r'));
363 353 this->last_char_was_bs =
364 354 ((!this->last_char_was_bs) && (ch == '\\'));
365 355 }
366 356 return;
367 357  
  358 + case (st_string_after_cr):
  359 + // CR LF in strings are either ignored or normalized to CR
  360 + this->state = st_in_string;
  361 + if (ch != '\n') {
  362 + handleCharacter(ch);
  363 + }
  364 + return;
  365 +
368 366 case (st_char_code):
369 367 inCharCode(ch);
370 368 return;
... ... @@ -447,11 +445,7 @@ void
447 445 QPDFTokenizer::inString(char ch)
448 446 {
449 447 bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
450   - if (this->string_ignoring_newline && (ch == '\n')) {
451   - // ignore
452   - this->string_ignoring_newline = false;
453   - return;
454   - } else if (ch_is_octal && this->last_char_was_bs) {
  448 + if (ch_is_octal && this->last_char_was_bs) {
455 449 this->state = st_char_code;
456 450 inCharCode(ch);
457 451 return;
... ... @@ -481,7 +475,7 @@ QPDFTokenizer::inString(char ch)
481 475 return;
482 476  
483 477 case '\r':
484   - this->string_ignoring_newline = true;
  478 + this->state = st_string_after_cr;
485 479 return;
486 480  
487 481 default:
... ... @@ -502,12 +496,10 @@ QPDFTokenizer::inString(char ch)
502 496 } else if (ch == '\r') {
503 497 // CR by itself is converted to LF
504 498 this->val += '\n';
  499 + this->state = st_string_after_cr;
505 500 return;
506 501 } else if (ch == '\n') {
507   - // CR LF is converted to LF
508   - if (!this->last_char_was_cr) {
509   - this->val += ch;
510   - }
  502 + this->val += ch;
511 503 return;
512 504 } else {
513 505 this->val += ch;
... ...