Commit d26b537a7c65b4aa9ed4c632bfb9eaf921fbbd2d

Authored by m-holger
1 parent 2697ba49

Add private method QPDFTokenizer::inString

include/qpdf/QPDFTokenizer.hh
... ... @@ -213,6 +213,7 @@ class QPDFTokenizer
213 213  
214 214 void handleCharacter(char);
215 215 void inHexstring(char);
  216 + void inString(char, size_t);
216 217 void reset();
217 218  
218 219 // Lexer state
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -368,70 +368,7 @@ QPDFTokenizer::handleCharacter(char ch)
368 368 bs_num_count = 0;
369 369 }
370 370  
371   - if (this->string_ignoring_newline && (ch == '\n')) {
372   - // ignore
373   - this->string_ignoring_newline = false;
374   - } else if (
375   - ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) {
376   - this->bs_num_register[bs_num_count++] = ch;
377   - } else if (this->last_char_was_bs) {
378   - switch (ch) {
379   - case 'n':
380   - this->val += '\n';
381   - break;
382   -
383   - case 'r':
384   - this->val += '\r';
385   - break;
386   -
387   - case 't':
388   - this->val += '\t';
389   - break;
390   -
391   - case 'b':
392   - this->val += '\b';
393   - break;
394   -
395   - case 'f':
396   - this->val += '\f';
397   - break;
398   -
399   - case '\n':
400   - break;
401   -
402   - case '\r':
403   - this->string_ignoring_newline = true;
404   - break;
405   -
406   - default:
407   - // PDF spec says backslash is ignored before anything else
408   - this->val += ch;
409   - break;
410   - }
411   - } else if (ch == '\\') {
412   - // last_char_was_bs is set/cleared below as appropriate
413   - if (bs_num_count) {
414   - throw std::logic_error(
415   - "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "
416   - "when ch == '\\'");
417   - }
418   - } else if (ch == '(') {
419   - this->val += ch;
420   - ++this->string_depth;
421   - } else if ((ch == ')') && (--this->string_depth == 0)) {
422   - this->type = tt_string;
423   - this->state = st_token_ready;
424   - } else if (ch == '\r') {
425   - // CR by itself is converted to LF
426   - this->val += '\n';
427   - } else if (ch == '\n') {
428   - // CR LF is converted to LF
429   - if (!this->last_char_was_cr) {
430   - this->val += ch;
431   - }
432   - } else {
433   - this->val += ch;
434   - }
  371 + inString(ch, bs_num_count);
435 372  
436 373 this->last_char_was_cr =
437 374 ((!this->string_ignoring_newline) && (ch == '\r'));
... ... @@ -515,6 +452,82 @@ QPDFTokenizer::inHexstring(char ch)
515 452 }
516 453  
517 454 void
  455 +QPDFTokenizer::inString(char ch, size_t bs_num_count)
  456 +{
  457 + bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
  458 + if (this->string_ignoring_newline && (ch == '\n')) {
  459 + // ignore
  460 + this->string_ignoring_newline = false;
  461 + return;
  462 + } else if (ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) {
  463 + this->bs_num_register[bs_num_count++] = ch;
  464 + return;
  465 + } else if (this->last_char_was_bs) {
  466 + switch (ch) {
  467 + case 'n':
  468 + this->val += '\n';
  469 + return;
  470 +
  471 + case 'r':
  472 + this->val += '\r';
  473 + return;
  474 +
  475 + case 't':
  476 + this->val += '\t';
  477 + return;
  478 +
  479 + case 'b':
  480 + this->val += '\b';
  481 + return;
  482 +
  483 + case 'f':
  484 + this->val += '\f';
  485 + return;
  486 +
  487 + case '\n':
  488 + return;
  489 +
  490 + case '\r':
  491 + this->string_ignoring_newline = true;
  492 + return;
  493 +
  494 + default:
  495 + // PDF spec says backslash is ignored before anything else
  496 + this->val += ch;
  497 + return;
  498 + }
  499 + } else if (ch == '\\') {
  500 + // last_char_was_bs is set/cleared below as appropriate
  501 + if (bs_num_count) {
  502 + throw std::logic_error(
  503 + "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "
  504 + "when ch == '\\'");
  505 + }
  506 + } else if (ch == '(') {
  507 + this->val += ch;
  508 + ++this->string_depth;
  509 + return;
  510 + } else if ((ch == ')') && (--this->string_depth == 0)) {
  511 + this->type = tt_string;
  512 + this->state = st_token_ready;
  513 + return;
  514 + } else if (ch == '\r') {
  515 + // CR by itself is converted to LF
  516 + this->val += '\n';
  517 + return;
  518 + } else if (ch == '\n') {
  519 + // CR LF is converted to LF
  520 + if (!this->last_char_was_cr) {
  521 + this->val += ch;
  522 + }
  523 + return;
  524 + } else {
  525 + this->val += ch;
  526 + return;
  527 + }
  528 +}
  529 +
  530 +void
518 531 QPDFTokenizer::presentEOF()
519 532 {
520 533 if (this->state == st_literal) {
... ...