Commit e4fe0d5cf57d1d5b7d34492ffb616746ecd6ae35

Authored by m-holger
1 parent a5d2e887

Refactor QPDFTokenizer::inHexstring

include/qpdf/QPDFTokenizer.hh
@@ -210,6 +210,7 @@ class QPDFTokenizer @@ -210,6 +210,7 @@ class QPDFTokenizer
210 st_gt, 210 st_gt,
211 st_literal, 211 st_literal,
212 st_in_hexstring, 212 st_in_hexstring,
  213 + st_in_hexstring_2nd,
213 st_inline_image, 214 st_inline_image,
214 st_token_ready 215 st_token_ready
215 }; 216 };
@@ -217,6 +218,7 @@ class QPDFTokenizer @@ -217,6 +218,7 @@ class QPDFTokenizer
217 void handleCharacter(char); 218 void handleCharacter(char);
218 void inCharCode(char); 219 void inCharCode(char);
219 void inHexstring(char); 220 void inHexstring(char);
  221 + void inHexstring2nd(char);
220 void inString(char); 222 void inString(char);
221 223
222 void reset(); 224 void reset();
@@ -238,6 +240,7 @@ class QPDFTokenizer @@ -238,6 +240,7 @@ class QPDFTokenizer
238 240
239 // State for strings 241 // State for strings
240 int string_depth; 242 int string_depth;
  243 + int char_code;
241 char bs_num_register[4]; 244 char bs_num_register[4];
242 }; 245 };
243 246
libqpdf/QPDFTokenizer.cc
@@ -442,6 +442,10 @@ QPDFTokenizer::handleCharacter(char ch) @@ -442,6 +442,10 @@ QPDFTokenizer::handleCharacter(char ch)
442 inHexstring(ch); 442 inHexstring(ch);
443 return; 443 return;
444 444
  445 + case (st_in_hexstring_2nd):
  446 + inHexstring2nd(ch);
  447 + return;
  448 +
445 default: 449 default:
446 throw std::logic_error( 450 throw std::logic_error(
447 "INTERNAL ERROR: invalid state while reading token"); 451 "INTERNAL ERROR: invalid state while reading token");
@@ -451,29 +455,25 @@ QPDFTokenizer::handleCharacter(char ch) @@ -451,29 +455,25 @@ QPDFTokenizer::handleCharacter(char ch)
451 void 455 void
452 QPDFTokenizer::inHexstring(char ch) 456 QPDFTokenizer::inHexstring(char ch)
453 { 457 {
454 - if (ch == '>') { 458 + if ('0' <= ch && ch <= '9') {
  459 + this->char_code = 16 * (int(ch) - int('0'));
  460 + this->state = st_in_hexstring_2nd;
  461 +
  462 + } else if ('A' <= ch && ch <= 'F') {
  463 + this->char_code = 16 * (10 + int(ch) - int('A'));
  464 + this->state = st_in_hexstring_2nd;
  465 +
  466 + } else if ('a' <= ch && ch <= 'f') {
  467 + this->char_code = 16 * (10 + int(ch) - int('a'));
  468 + this->state = st_in_hexstring_2nd;
  469 +
  470 + } else if (ch == '>') {
455 this->type = tt_string; 471 this->type = tt_string;
456 this->state = st_token_ready; 472 this->state = st_token_ready;
457 - if (this->val.length() % 2) {  
458 - // PDF spec says odd hexstrings have implicit  
459 - // trailing 0.  
460 - this->val += '0';  
461 - }  
462 - char num[3];  
463 - num[2] = '\0';  
464 - std::string nval;  
465 - for (unsigned int i = 0; i < this->val.length(); i += 2) {  
466 - num[0] = this->val.at(i);  
467 - num[1] = this->val.at(i + 1);  
468 - char nch = static_cast<char>(strtol(num, nullptr, 16));  
469 - nval += nch;  
470 - }  
471 - this->val.clear();  
472 - this->val += nval;  
473 - } else if (QUtil::is_hex_digit(ch)) {  
474 - this->val += ch; 473 +
475 } else if (isSpace(ch)) { 474 } else if (isSpace(ch)) {
476 // ignore 475 // ignore
  476 +
477 } else { 477 } else {
478 this->type = tt_bad; 478 this->type = tt_bad;
479 QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); 479 QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
@@ -484,6 +484,39 @@ QPDFTokenizer::inHexstring(char ch) @@ -484,6 +484,39 @@ QPDFTokenizer::inHexstring(char ch)
484 } 484 }
485 485
486 void 486 void
  487 +QPDFTokenizer::inHexstring2nd(char ch)
  488 +{
  489 + if ('0' <= ch && ch <= '9') {
  490 + this->val += char(this->char_code + int(ch) - int('0'));
  491 + this->state = st_in_hexstring;
  492 +
  493 + } else if ('A' <= ch && ch <= 'F') {
  494 + this->val += char(this->char_code + 10 + int(ch) - int('A'));
  495 + this->state = st_in_hexstring;
  496 +
  497 + } else if ('a' <= ch && ch <= 'f') {
  498 + this->val += char(this->char_code + 10 + int(ch) - int('a'));
  499 + this->state = st_in_hexstring;
  500 +
  501 + } else if (ch == '>') {
  502 + // PDF spec says odd hexstrings have implicit trailing 0.
  503 + this->val += char(this->char_code);
  504 + this->type = tt_string;
  505 + this->state = st_token_ready;
  506 +
  507 + } else if (isSpace(ch)) {
  508 + // ignore
  509 +
  510 + } else {
  511 + this->type = tt_bad;
  512 + QTC::TC("qpdf", "QPDFTokenizer bad hexstring 2nd character");
  513 + this->error_message =
  514 + std::string("invalid character (") + ch + ") in hexstring";
  515 + this->state = st_token_ready;
  516 + }
  517 +}
  518 +
  519 +void
487 QPDFTokenizer::inString(char ch) 520 QPDFTokenizer::inString(char ch)
488 { 521 {
489 switch (ch) { 522 switch (ch) {
@@ -526,7 +559,7 @@ void @@ -526,7 +559,7 @@ void
526 QPDFTokenizer::inCharCode(char ch) 559 QPDFTokenizer::inCharCode(char ch)
527 { 560 {
528 size_t bs_num_count = strlen(this->bs_num_register); 561 size_t bs_num_count = strlen(this->bs_num_register);
529 - bool ch_is_octal = ((ch >= '0') && (ch <= '7')); 562 + bool ch_is_octal = ('0' <= ch && ch <= '7');
530 if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { 563 if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) {
531 // We've accumulated \ddd. PDF Spec says to ignore 564 // We've accumulated \ddd. PDF Spec says to ignore
532 // high-order overflow. 565 // high-order overflow.
qpdf/qpdf.testcov
@@ -66,6 +66,7 @@ QPDF can&#39;t find xref 0 @@ -66,6 +66,7 @@ QPDF can&#39;t find xref 0
66 QPDFTokenizer bad ) 0 66 QPDFTokenizer bad ) 0
67 QPDFTokenizer bad > 0 67 QPDFTokenizer bad > 0
68 QPDFTokenizer bad hexstring character 0 68 QPDFTokenizer bad hexstring character 0
  69 +QPDFTokenizer bad hexstring 2nd character 0
69 QPDFTokenizer null in name 0 70 QPDFTokenizer null in name 0
70 QPDFTokenizer bad name 0 71 QPDFTokenizer bad name 0
71 QPDF_Stream invalid filter 0 72 QPDF_Stream invalid filter 0