Commit d26b537a7c65b4aa9ed4c632bfb9eaf921fbbd2d

Authored by m-holger
1 parent 2697ba49

Add private method QPDFTokenizer::inString

include/qpdf/QPDFTokenizer.hh
@@ -213,6 +213,7 @@ class QPDFTokenizer @@ -213,6 +213,7 @@ class QPDFTokenizer
213 213
214 void handleCharacter(char); 214 void handleCharacter(char);
215 void inHexstring(char); 215 void inHexstring(char);
  216 + void inString(char, size_t);
216 void reset(); 217 void reset();
217 218
218 // Lexer state 219 // Lexer state
libqpdf/QPDFTokenizer.cc
@@ -368,70 +368,7 @@ QPDFTokenizer::handleCharacter(char ch) @@ -368,70 +368,7 @@ QPDFTokenizer::handleCharacter(char ch)
368 bs_num_count = 0; 368 bs_num_count = 0;
369 } 369 }
370 370
371 - if (this->string_ignoring_newline && (ch == '\n')) {  
372 - // ignore  
373 - this->string_ignoring_newline = false;  
374 - } else if (  
375 - ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) {  
376 - this->bs_num_register[bs_num_count++] = ch;  
377 - } else if (this->last_char_was_bs) {  
378 - switch (ch) {  
379 - case 'n':  
380 - this->val += '\n';  
381 - break;  
382 -  
383 - case 'r':  
384 - this->val += '\r';  
385 - break;  
386 -  
387 - case 't':  
388 - this->val += '\t';  
389 - break;  
390 -  
391 - case 'b':  
392 - this->val += '\b';  
393 - break;  
394 -  
395 - case 'f':  
396 - this->val += '\f';  
397 - break;  
398 -  
399 - case '\n':  
400 - break;  
401 -  
402 - case '\r':  
403 - this->string_ignoring_newline = true;  
404 - break;  
405 -  
406 - default:  
407 - // PDF spec says backslash is ignored before anything else  
408 - this->val += ch;  
409 - break;  
410 - }  
411 - } else if (ch == '\\') {  
412 - // last_char_was_bs is set/cleared below as appropriate  
413 - if (bs_num_count) {  
414 - throw std::logic_error(  
415 - "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "  
416 - "when ch == '\\'");  
417 - }  
418 - } else if (ch == '(') {  
419 - this->val += ch;  
420 - ++this->string_depth;  
421 - } else if ((ch == ')') && (--this->string_depth == 0)) {  
422 - this->type = tt_string;  
423 - this->state = st_token_ready;  
424 - } else if (ch == '\r') {  
425 - // CR by itself is converted to LF  
426 - this->val += '\n';  
427 - } else if (ch == '\n') {  
428 - // CR LF is converted to LF  
429 - if (!this->last_char_was_cr) {  
430 - this->val += ch;  
431 - }  
432 - } else {  
433 - this->val += ch;  
434 - } 371 + inString(ch, bs_num_count);
435 372
436 this->last_char_was_cr = 373 this->last_char_was_cr =
437 ((!this->string_ignoring_newline) && (ch == '\r')); 374 ((!this->string_ignoring_newline) && (ch == '\r'));
@@ -515,6 +452,82 @@ QPDFTokenizer::inHexstring(char ch) @@ -515,6 +452,82 @@ QPDFTokenizer::inHexstring(char ch)
515 } 452 }
516 453
517 void 454 void
  455 +QPDFTokenizer::inString(char ch, size_t bs_num_count)
  456 +{
  457 + bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
  458 + if (this->string_ignoring_newline && (ch == '\n')) {
  459 + // ignore
  460 + this->string_ignoring_newline = false;
  461 + return;
  462 + } else if (ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) {
  463 + this->bs_num_register[bs_num_count++] = ch;
  464 + return;
  465 + } else if (this->last_char_was_bs) {
  466 + switch (ch) {
  467 + case 'n':
  468 + this->val += '\n';
  469 + return;
  470 +
  471 + case 'r':
  472 + this->val += '\r';
  473 + return;
  474 +
  475 + case 't':
  476 + this->val += '\t';
  477 + return;
  478 +
  479 + case 'b':
  480 + this->val += '\b';
  481 + return;
  482 +
  483 + case 'f':
  484 + this->val += '\f';
  485 + return;
  486 +
  487 + case '\n':
  488 + return;
  489 +
  490 + case '\r':
  491 + this->string_ignoring_newline = true;
  492 + return;
  493 +
  494 + default:
  495 + // PDF spec says backslash is ignored before anything else
  496 + this->val += ch;
  497 + return;
  498 + }
  499 + } else if (ch == '\\') {
  500 + // last_char_was_bs is set/cleared below as appropriate
  501 + if (bs_num_count) {
  502 + throw std::logic_error(
  503 + "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "
  504 + "when ch == '\\'");
  505 + }
  506 + } else if (ch == '(') {
  507 + this->val += ch;
  508 + ++this->string_depth;
  509 + return;
  510 + } else if ((ch == ')') && (--this->string_depth == 0)) {
  511 + this->type = tt_string;
  512 + this->state = st_token_ready;
  513 + return;
  514 + } else if (ch == '\r') {
  515 + // CR by itself is converted to LF
  516 + this->val += '\n';
  517 + return;
  518 + } else if (ch == '\n') {
  519 + // CR LF is converted to LF
  520 + if (!this->last_char_was_cr) {
  521 + this->val += ch;
  522 + }
  523 + return;
  524 + } else {
  525 + this->val += ch;
  526 + return;
  527 + }
  528 +}
  529 +
  530 +void
518 QPDFTokenizer::presentEOF() 531 QPDFTokenizer::presentEOF()
519 { 532 {
520 if (this->state == st_literal) { 533 if (this->state == st_literal) {