Commit 7c32f6cc2e90058b8a1fbaec48e07bf21bd66afa

Authored by m-holger
1 parent 7c5778f9

Add state st_string_escape in QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
... ... @@ -203,6 +203,7 @@ class QPDFTokenizer
203 203 st_in_space,
204 204 st_in_comment,
205 205 st_in_string,
  206 + st_string_escape,
206 207 st_char_code,
207 208 st_string_after_cr,
208 209 st_lt,
... ... @@ -238,7 +239,6 @@ class QPDFTokenizer
238 239 // State for strings
239 240 int string_depth;
240 241 char bs_num_register[4];
241   - bool last_char_was_bs;
242 242 };
243 243  
244 244 #endif // QPDFTOKENIZER_HH
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -85,7 +85,6 @@ QPDFTokenizer::reset()
85 85 char_to_unread = '\0';
86 86 inline_image_bytes = 0;
87 87 string_depth = 0;
88   - last_char_was_bs = false;
89 88 }
90 89  
91 90 QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
... ... @@ -244,7 +243,6 @@ QPDFTokenizer::handleCharacter(char ch)
244 243 case '(':
245 244 this->string_depth = 1;
246 245 memset(this->bs_num_register, '\0', sizeof(this->bs_num_register));
247   - this->last_char_was_bs = false;
248 246 this->state = st_in_string;
249 247 return;
250 248  
... ... @@ -348,22 +346,66 @@ QPDFTokenizer::handleCharacter(char ch)
348 346 return;
349 347  
350 348 case st_in_string:
351   - {
352   - inString(ch);
353   - this->last_char_was_bs =
354   - ((!this->last_char_was_bs) && (ch == '\\'));
355   - }
  349 + inString(ch);
356 350 return;
357 351  
358   - case (st_string_after_cr):
  352 + case st_string_after_cr:
359 353 // CR LF in strings are either ignored or normalized to CR
360 354 this->state = st_in_string;
361 355 if (ch != '\n') {
362   - handleCharacter(ch);
  356 + inString(ch);
363 357 }
364 358 return;
365 359  
366   - case (st_char_code):
  360 + case st_string_escape:
  361 + this->state = st_in_string;
  362 + switch (ch) {
  363 + case '0':
  364 + case '1':
  365 + case '2':
  366 + case '3':
  367 + case '4':
  368 + case '5':
  369 + case '6':
  370 + case '7':
  371 + this->state = st_char_code;
  372 + inCharCode(ch);
  373 + return;
  374 +
  375 + case 'n':
  376 + this->val += '\n';
  377 + return;
  378 +
  379 + case 'r':
  380 + this->val += '\r';
  381 + return;
  382 +
  383 + case 't':
  384 + this->val += '\t';
  385 + return;
  386 +
  387 + case 'b':
  388 + this->val += '\b';
  389 + return;
  390 +
  391 + case 'f':
  392 + this->val += '\f';
  393 + return;
  394 +
  395 + case '\n':
  396 + return;
  397 +
  398 + case '\r':
  399 + this->state = st_string_after_cr;
  400 + return;
  401 +
  402 + default:
  403 + // PDF spec says backslash is ignored before anything else
  404 + this->val += ch;
  405 + return;
  406 + }
  407 +
  408 + case st_char_code:
367 409 inCharCode(ch);
368 410 return;
369 411  
... ... @@ -444,47 +486,9 @@ QPDFTokenizer::inHexstring(char ch)
444 486 void
445 487 QPDFTokenizer::inString(char ch)
446 488 {
447   - bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
448   - if (ch_is_octal && this->last_char_was_bs) {
449   - this->state = st_char_code;
450   - inCharCode(ch);
  489 + if (ch == '\\') {
  490 + this->state = st_string_escape;
451 491 return;
452   - } else if (this->last_char_was_bs) {
453   - switch (ch) {
454   - case 'n':
455   - this->val += '\n';
456   - return;
457   -
458   - case 'r':
459   - this->val += '\r';
460   - return;
461   -
462   - case 't':
463   - this->val += '\t';
464   - return;
465   -
466   - case 'b':
467   - this->val += '\b';
468   - return;
469   -
470   - case 'f':
471   - this->val += '\f';
472   - return;
473   -
474   - case '\n':
475   - return;
476   -
477   - case '\r':
478   - this->state = st_string_after_cr;
479   - return;
480   -
481   - default:
482   - // PDF spec says backslash is ignored before anything else
483   - this->val += ch;
484   - return;
485   - }
486   - } else if (ch == '\\') {
487   - // last_char_was_bs is set/cleared below as appropriate
488 492 } else if (ch == '(') {
489 493 this->val += ch;
490 494 ++this->string_depth;
... ...