Commit f29d0a63121f1243b80c74d90c59a8a88f0a9223

Authored by m-holger
1 parent d26b537a

Add state st_char_code in QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
@@ -203,6 +203,7 @@ class QPDFTokenizer @@ -203,6 +203,7 @@ class QPDFTokenizer
203 st_in_space, 203 st_in_space,
204 st_in_comment, 204 st_in_comment,
205 st_in_string, 205 st_in_string,
  206 + st_char_code,
206 st_lt, 207 st_lt,
207 st_gt, 208 st_gt,
208 st_literal, 209 st_literal,
@@ -212,8 +213,10 @@ class QPDFTokenizer @@ -212,8 +213,10 @@ class QPDFTokenizer
212 }; 213 };
213 214
214 void handleCharacter(char); 215 void handleCharacter(char);
  216 + void inCharCode(char);
215 void inHexstring(char); 217 void inHexstring(char);
216 - void inString(char, size_t); 218 + void inString(char);
  219 +
217 void reset(); 220 void reset();
218 221
219 // Lexer state 222 // Lexer state
libqpdf/QPDFTokenizer.cc
@@ -330,6 +330,7 @@ QPDFTokenizer::handleCharacter(char ch) @@ -330,6 +330,7 @@ QPDFTokenizer::handleCharacter(char ch)
330 this->state = st_token_ready; 330 this->state = st_token_ready;
331 return; 331 return;
332 } 332 }
  333 +
333 this->state = st_in_hexstring; 334 this->state = st_in_hexstring;
334 inHexstring(ch); 335 inHexstring(ch);
335 return; 336 return;
@@ -355,20 +356,7 @@ QPDFTokenizer::handleCharacter(char ch) @@ -355,20 +356,7 @@ QPDFTokenizer::handleCharacter(char ch)
355 if (this->string_ignoring_newline && (ch != '\n')) { 356 if (this->string_ignoring_newline && (ch != '\n')) {
356 this->string_ignoring_newline = false; 357 this->string_ignoring_newline = false;
357 } 358 }
358 -  
359 - size_t bs_num_count = strlen(this->bs_num_register);  
360 - bool ch_is_octal = ((ch >= '0') && (ch <= '7'));  
361 - if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) {  
362 - // We've accumulated \ddd. PDF Spec says to ignore  
363 - // high-order overflow.  
364 - this->val += static_cast<char>(  
365 - strtol(this->bs_num_register, nullptr, 8));  
366 - memset(  
367 - this->bs_num_register, '\0', sizeof(this->bs_num_register));  
368 - bs_num_count = 0;  
369 - }  
370 -  
371 - inString(ch, bs_num_count); 359 + inString(ch);
372 360
373 this->last_char_was_cr = 361 this->last_char_was_cr =
374 ((!this->string_ignoring_newline) && (ch == '\r')); 362 ((!this->string_ignoring_newline) && (ch == '\r'));
@@ -377,6 +365,10 @@ QPDFTokenizer::handleCharacter(char ch) @@ -377,6 +365,10 @@ QPDFTokenizer::handleCharacter(char ch)
377 } 365 }
378 return; 366 return;
379 367
  368 + case (st_char_code):
  369 + inCharCode(ch);
  370 + return;
  371 +
380 case st_literal: 372 case st_literal:
381 if (isDelimiter(ch)) { 373 if (isDelimiter(ch)) {
382 // A C-locale whitespace character or delimiter terminates 374 // A C-locale whitespace character or delimiter terminates
@@ -452,15 +444,16 @@ QPDFTokenizer::inHexstring(char ch) @@ -452,15 +444,16 @@ QPDFTokenizer::inHexstring(char ch)
452 } 444 }
453 445
454 void 446 void
455 -QPDFTokenizer::inString(char ch, size_t bs_num_count) 447 +QPDFTokenizer::inString(char ch)
456 { 448 {
457 bool ch_is_octal = ((ch >= '0') && (ch <= '7')); 449 bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
458 if (this->string_ignoring_newline && (ch == '\n')) { 450 if (this->string_ignoring_newline && (ch == '\n')) {
459 // ignore 451 // ignore
460 this->string_ignoring_newline = false; 452 this->string_ignoring_newline = false;
461 return; 453 return;
462 - } else if (ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) {  
463 - this->bs_num_register[bs_num_count++] = ch; 454 + } else if (ch_is_octal && this->last_char_was_bs) {
  455 + this->state = st_char_code;
  456 + inCharCode(ch);
464 return; 457 return;
465 } else if (this->last_char_was_bs) { 458 } else if (this->last_char_was_bs) {
466 switch (ch) { 459 switch (ch) {
@@ -498,11 +491,6 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) @@ -498,11 +491,6 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count)
498 } 491 }
499 } else if (ch == '\\') { 492 } else if (ch == '\\') {
500 // last_char_was_bs is set/cleared below as appropriate 493 // last_char_was_bs is set/cleared below as appropriate
501 - if (bs_num_count) {  
502 - throw std::logic_error(  
503 - "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "  
504 - "when ch == '\\'");  
505 - }  
506 } else if (ch == '(') { 494 } else if (ch == '(') {
507 this->val += ch; 495 this->val += ch;
508 ++this->string_depth; 496 ++this->string_depth;
@@ -528,6 +516,27 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count) @@ -528,6 +516,27 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count)
528 } 516 }
529 517
530 void 518 void
  519 +QPDFTokenizer::inCharCode(char ch)
  520 +{
  521 + size_t bs_num_count = strlen(this->bs_num_register);
  522 + bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
  523 + if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) {
  524 + // We've accumulated \ddd. PDF Spec says to ignore
  525 + // high-order overflow.
  526 + this->val +=
  527 + static_cast<char>(strtol(this->bs_num_register, nullptr, 8));
  528 + memset(this->bs_num_register, '\0', sizeof(this->bs_num_register));
  529 + bs_num_count = 0;
  530 + this->state = st_in_string;
  531 + handleCharacter(ch);
  532 + return;
  533 + } else if (ch_is_octal) {
  534 + this->bs_num_register[bs_num_count++] = ch;
  535 + return;
  536 + }
  537 +}
  538 +
  539 +void
531 QPDFTokenizer::presentEOF() 540 QPDFTokenizer::presentEOF()
532 { 541 {
533 if (this->state == st_literal) { 542 if (this->state == st_literal) {