Commit 7c5778f999e15cc1af6360710f8055c2fa234d03

Authored by m-holger
1 parent f29d0a63

Add state st_string_after_cr in QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
@@ -204,6 +204,7 @@ class QPDFTokenizer @@ -204,6 +204,7 @@ class QPDFTokenizer
204 st_in_comment, 204 st_in_comment,
205 st_in_string, 205 st_in_string,
206 st_char_code, 206 st_char_code,
  207 + st_string_after_cr,
207 st_lt, 208 st_lt,
208 st_gt, 209 st_gt,
209 st_literal, 210 st_literal,
@@ -236,10 +237,8 @@ class QPDFTokenizer @@ -236,10 +237,8 @@ class QPDFTokenizer
236 237
237 // State for strings 238 // State for strings
238 int string_depth; 239 int string_depth;
239 - bool string_ignoring_newline;  
240 char bs_num_register[4]; 240 char bs_num_register[4];
241 bool last_char_was_bs; 241 bool last_char_was_bs;
242 - bool last_char_was_cr;  
243 }; 242 };
244 243
245 #endif // QPDFTOKENIZER_HH 244 #endif // QPDFTOKENIZER_HH
libqpdf/QPDFTokenizer.cc
@@ -85,9 +85,7 @@ QPDFTokenizer::reset() @@ -85,9 +85,7 @@ QPDFTokenizer::reset()
85 char_to_unread = '\0'; 85 char_to_unread = '\0';
86 inline_image_bytes = 0; 86 inline_image_bytes = 0;
87 string_depth = 0; 87 string_depth = 0;
88 - string_ignoring_newline = false;  
89 last_char_was_bs = false; 88 last_char_was_bs = false;
90 - last_char_was_cr = false;  
91 } 89 }
92 90
93 QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : 91 QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
@@ -245,10 +243,8 @@ QPDFTokenizer::handleCharacter(char ch) @@ -245,10 +243,8 @@ QPDFTokenizer::handleCharacter(char ch)
245 243
246 case '(': 244 case '(':
247 this->string_depth = 1; 245 this->string_depth = 1;
248 - this->string_ignoring_newline = false;  
249 memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); 246 memset(this->bs_num_register, '\0', sizeof(this->bs_num_register));
250 this->last_char_was_bs = false; 247 this->last_char_was_bs = false;
251 - this->last_char_was_cr = false;  
252 this->state = st_in_string; 248 this->state = st_in_string;
253 return; 249 return;
254 250
@@ -353,18 +349,20 @@ QPDFTokenizer::handleCharacter(char ch) @@ -353,18 +349,20 @@ QPDFTokenizer::handleCharacter(char ch)
353 349
354 case st_in_string: 350 case st_in_string:
355 { 351 {
356 - if (this->string_ignoring_newline && (ch != '\n')) {  
357 - this->string_ignoring_newline = false;  
358 - }  
359 inString(ch); 352 inString(ch);
360 -  
361 - this->last_char_was_cr =  
362 - ((!this->string_ignoring_newline) && (ch == '\r'));  
363 this->last_char_was_bs = 353 this->last_char_was_bs =
364 ((!this->last_char_was_bs) && (ch == '\\')); 354 ((!this->last_char_was_bs) && (ch == '\\'));
365 } 355 }
366 return; 356 return;
367 357
  358 + case (st_string_after_cr):
  359 + // CR LF in strings are either ignored or normalized to CR
  360 + this->state = st_in_string;
  361 + if (ch != '\n') {
  362 + handleCharacter(ch);
  363 + }
  364 + return;
  365 +
368 case (st_char_code): 366 case (st_char_code):
369 inCharCode(ch); 367 inCharCode(ch);
370 return; 368 return;
@@ -447,11 +445,7 @@ void @@ -447,11 +445,7 @@ void
447 QPDFTokenizer::inString(char ch) 445 QPDFTokenizer::inString(char ch)
448 { 446 {
449 bool ch_is_octal = ((ch >= '0') && (ch <= '7')); 447 bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
450 - if (this->string_ignoring_newline && (ch == '\n')) {  
451 - // ignore  
452 - this->string_ignoring_newline = false;  
453 - return;  
454 - } else if (ch_is_octal && this->last_char_was_bs) { 448 + if (ch_is_octal && this->last_char_was_bs) {
455 this->state = st_char_code; 449 this->state = st_char_code;
456 inCharCode(ch); 450 inCharCode(ch);
457 return; 451 return;
@@ -481,7 +475,7 @@ QPDFTokenizer::inString(char ch) @@ -481,7 +475,7 @@ QPDFTokenizer::inString(char ch)
481 return; 475 return;
482 476
483 case '\r': 477 case '\r':
484 - this->string_ignoring_newline = true; 478 + this->state = st_string_after_cr;
485 return; 479 return;
486 480
487 default: 481 default:
@@ -502,12 +496,10 @@ QPDFTokenizer::inString(char ch) @@ -502,12 +496,10 @@ QPDFTokenizer::inString(char ch)
502 } else if (ch == '\r') { 496 } else if (ch == '\r') {
503 // CR by itself is converted to LF 497 // CR by itself is converted to LF
504 this->val += '\n'; 498 this->val += '\n';
  499 + this->state = st_string_after_cr;
505 return; 500 return;
506 } else if (ch == '\n') { 501 } else if (ch == '\n') {
507 - // CR LF is converted to LF  
508 - if (!this->last_char_was_cr) {  
509 - this->val += ch;  
510 - } 502 + this->val += ch;
511 return; 503 return;
512 } else { 504 } else {
513 this->val += ch; 505 this->val += ch;