Commit 7c32f6cc2e90058b8a1fbaec48e07bf21bd66afa

Authored by m-holger
1 parent 7c5778f9

Add state st_string_escape in QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
@@ -203,6 +203,7 @@ class QPDFTokenizer @@ -203,6 +203,7 @@ class QPDFTokenizer
203 st_in_space, 203 st_in_space,
204 st_in_comment, 204 st_in_comment,
205 st_in_string, 205 st_in_string,
  206 + st_string_escape,
206 st_char_code, 207 st_char_code,
207 st_string_after_cr, 208 st_string_after_cr,
208 st_lt, 209 st_lt,
@@ -238,7 +239,6 @@ class QPDFTokenizer @@ -238,7 +239,6 @@ class QPDFTokenizer
238 // State for strings 239 // State for strings
239 int string_depth; 240 int string_depth;
240 char bs_num_register[4]; 241 char bs_num_register[4];
241 - bool last_char_was_bs;  
242 }; 242 };
243 243
244 #endif // QPDFTOKENIZER_HH 244 #endif // QPDFTOKENIZER_HH
libqpdf/QPDFTokenizer.cc
@@ -85,7 +85,6 @@ QPDFTokenizer::reset() @@ -85,7 +85,6 @@ QPDFTokenizer::reset()
85 char_to_unread = '\0'; 85 char_to_unread = '\0';
86 inline_image_bytes = 0; 86 inline_image_bytes = 0;
87 string_depth = 0; 87 string_depth = 0;
88 - last_char_was_bs = false;  
89 } 88 }
90 89
91 QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : 90 QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
@@ -244,7 +243,6 @@ QPDFTokenizer::handleCharacter(char ch) @@ -244,7 +243,6 @@ QPDFTokenizer::handleCharacter(char ch)
244 case '(': 243 case '(':
245 this->string_depth = 1; 244 this->string_depth = 1;
246 memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); 245 memset(this->bs_num_register, '\0', sizeof(this->bs_num_register));
247 - this->last_char_was_bs = false;  
248 this->state = st_in_string; 246 this->state = st_in_string;
249 return; 247 return;
250 248
@@ -348,22 +346,66 @@ QPDFTokenizer::handleCharacter(char ch) @@ -348,22 +346,66 @@ QPDFTokenizer::handleCharacter(char ch)
348 return; 346 return;
349 347
350 case st_in_string: 348 case st_in_string:
351 - {  
352 - inString(ch);  
353 - this->last_char_was_bs =  
354 - ((!this->last_char_was_bs) && (ch == '\\'));  
355 - } 349 + inString(ch);
356 return; 350 return;
357 351
358 - case (st_string_after_cr): 352 + case st_string_after_cr:
359 // CR LF in strings are either ignored or normalized to CR 353 // CR LF in strings are either ignored or normalized to CR
360 this->state = st_in_string; 354 this->state = st_in_string;
361 if (ch != '\n') { 355 if (ch != '\n') {
362 - handleCharacter(ch); 356 + inString(ch);
363 } 357 }
364 return; 358 return;
365 359
366 - case (st_char_code): 360 + case st_string_escape:
  361 + this->state = st_in_string;
  362 + switch (ch) {
  363 + case '0':
  364 + case '1':
  365 + case '2':
  366 + case '3':
  367 + case '4':
  368 + case '5':
  369 + case '6':
  370 + case '7':
  371 + this->state = st_char_code;
  372 + inCharCode(ch);
  373 + return;
  374 +
  375 + case 'n':
  376 + this->val += '\n';
  377 + return;
  378 +
  379 + case 'r':
  380 + this->val += '\r';
  381 + return;
  382 +
  383 + case 't':
  384 + this->val += '\t';
  385 + return;
  386 +
  387 + case 'b':
  388 + this->val += '\b';
  389 + return;
  390 +
  391 + case 'f':
  392 + this->val += '\f';
  393 + return;
  394 +
  395 + case '\n':
  396 + return;
  397 +
  398 + case '\r':
  399 + this->state = st_string_after_cr;
  400 + return;
  401 +
  402 + default:
  403 + // PDF spec says backslash is ignored before anything else
  404 + this->val += ch;
  405 + return;
  406 + }
  407 +
  408 + case st_char_code:
367 inCharCode(ch); 409 inCharCode(ch);
368 return; 410 return;
369 411
@@ -444,47 +486,9 @@ QPDFTokenizer::inHexstring(char ch) @@ -444,47 +486,9 @@ QPDFTokenizer::inHexstring(char ch)
444 void 486 void
445 QPDFTokenizer::inString(char ch) 487 QPDFTokenizer::inString(char ch)
446 { 488 {
447 - bool ch_is_octal = ((ch >= '0') && (ch <= '7'));  
448 - if (ch_is_octal && this->last_char_was_bs) {  
449 - this->state = st_char_code;  
450 - inCharCode(ch); 489 + if (ch == '\\') {
  490 + this->state = st_string_escape;
451 return; 491 return;
452 - } else if (this->last_char_was_bs) {  
453 - switch (ch) {  
454 - case 'n':  
455 - this->val += '\n';  
456 - return;  
457 -  
458 - case 'r':  
459 - this->val += '\r';  
460 - return;  
461 -  
462 - case 't':  
463 - this->val += '\t';  
464 - return;  
465 -  
466 - case 'b':  
467 - this->val += '\b';  
468 - return;  
469 -  
470 - case 'f':  
471 - this->val += '\f';  
472 - return;  
473 -  
474 - case '\n':  
475 - return;  
476 -  
477 - case '\r':  
478 - this->state = st_string_after_cr;  
479 - return;  
480 -  
481 - default:  
482 - // PDF spec says backslash is ignored before anything else  
483 - this->val += ch;  
484 - return;  
485 - }  
486 - } else if (ch == '\\') {  
487 - // last_char_was_bs is set/cleared below as appropriate  
488 } else if (ch == '(') { 492 } else if (ch == '(') {
489 this->val += ch; 493 this->val += ch;
490 ++this->string_depth; 494 ++this->string_depth;