Commit 86ade3f9cd367296e64ae86870ec12ebf34883f6

Authored by m-holger
1 parent 91fb61ed

Add private method QPDFTokenizer::handleCharacter

include/qpdf/QPDFTokenizer.hh
@@ -197,6 +197,7 @@ class QPDFTokenizer @@ -197,6 +197,7 @@ class QPDFTokenizer
197 bool isSpace(char); 197 bool isSpace(char);
198 bool isDelimiter(char); 198 bool isDelimiter(char);
199 void findEI(std::shared_ptr<InputSource> input); 199 void findEI(std::shared_ptr<InputSource> input);
  200 + void handleCharacter(char);
200 201
201 enum state_e { 202 enum state_e {
202 st_top, 203 st_top,
libqpdf/QPDFTokenizer.cc
@@ -200,12 +200,25 @@ QPDFTokenizer::presentCharacter(char ch) @@ -200,12 +200,25 @@ QPDFTokenizer::presentCharacter(char ch)
200 { 200 {
201 char orig_ch = ch; 201 char orig_ch = ch;
202 202
  203 + handleCharacter(ch);
  204 +
  205 + if ((this->state == st_token_ready) && (this->type == tt_word)) {
  206 + resolveLiteral();
  207 + }
  208 +
  209 + if (!(betweenTokens() ||
  210 + ((this->state == st_token_ready) && this->unread_char))) {
  211 + this->raw_val += orig_ch;
  212 + }
  213 +}
  214 +
  215 +void
  216 +QPDFTokenizer::handleCharacter(char ch)
  217 +{
203 // State machine is implemented such that some characters may be 218 // State machine is implemented such that some characters may be
204 // handled more than once. This happens whenever you have to use 219 // handled more than once. This happens whenever you have to use
205 // the character that caused a state change in the new state. 220 // the character that caused a state change in the new state.
206 221
207 - bool handled = true;  
208 -  
209 switch (this->state) { 222 switch (this->state) {
210 case (st_token_ready): 223 case (st_token_ready):
211 throw std::logic_error( 224 throw std::logic_error(
@@ -259,7 +272,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -259,7 +272,7 @@ QPDFTokenizer::presentCharacter(char ch)
259 this->state = st_literal; 272 this->state = st_literal;
260 } 273 }
261 } 274 }
262 - break; 275 + return;
263 276
264 case st_in_space: 277 case st_in_space:
265 // We only enter this state if include_ignorable is true. 278 // We only enter this state if include_ignorable is true.
@@ -271,7 +284,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -271,7 +284,7 @@ QPDFTokenizer::presentCharacter(char ch)
271 } else { 284 } else {
272 this->val += ch; 285 this->val += ch;
273 } 286 }
274 - break; 287 + return;
275 288
276 case st_in_comment: 289 case st_in_comment:
277 if ((ch == '\r') || (ch == '\n')) { 290 if ((ch == '\r') || (ch == '\n')) {
@@ -286,17 +299,16 @@ QPDFTokenizer::presentCharacter(char ch) @@ -286,17 +299,16 @@ QPDFTokenizer::presentCharacter(char ch)
286 } else if (this->include_ignorable) { 299 } else if (this->include_ignorable) {
287 this->val += ch; 300 this->val += ch;
288 } 301 }
289 - break; 302 + return;
290 303
291 case st_lt: 304 case st_lt:
292 if (ch == '<') { 305 if (ch == '<') {
293 this->val += "<<"; 306 this->val += "<<";
294 this->type = tt_dict_open; 307 this->type = tt_dict_open;
295 this->state = st_token_ready; 308 this->state = st_token_ready;
296 - } else {  
297 - handled = false;  
298 - this->state = st_in_hexstring; 309 + return;
299 } 310 }
  311 + this->state = st_in_hexstring;
300 break; 312 break;
301 313
302 case st_gt: 314 case st_gt:
@@ -313,7 +325,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -313,7 +325,7 @@ QPDFTokenizer::presentCharacter(char ch)
313 this->char_to_unread = ch; 325 this->char_to_unread = ch;
314 this->state = st_token_ready; 326 this->state = st_token_ready;
315 } 327 }
316 - break; 328 + return;
317 329
318 case st_in_string: 330 case st_in_string:
319 { 331 {
@@ -403,7 +415,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -403,7 +415,7 @@ QPDFTokenizer::presentCharacter(char ch)
403 this->last_char_was_bs = 415 this->last_char_was_bs =
404 ((!this->last_char_was_bs) && (ch == '\\')); 416 ((!this->last_char_was_bs) && (ch == '\\'));
405 } 417 }
406 - break; 418 + return;
407 419
408 case st_literal: 420 case st_literal:
409 if (isDelimiter(ch)) { 421 if (isDelimiter(ch)) {
@@ -422,7 +434,7 @@ QPDFTokenizer::presentCharacter(char ch) @@ -422,7 +434,7 @@ QPDFTokenizer::presentCharacter(char ch)
422 } else { 434 } else {
423 this->val += ch; 435 this->val += ch;
424 } 436 }
425 - break; 437 + return;
426 438
427 case st_inline_image: 439 case st_inline_image:
428 this->val += ch; 440 this->val += ch;
@@ -432,57 +444,45 @@ QPDFTokenizer::presentCharacter(char ch) @@ -432,57 +444,45 @@ QPDFTokenizer::presentCharacter(char ch)
432 this->inline_image_bytes = 0; 444 this->inline_image_bytes = 0;
433 this->state = st_token_ready; 445 this->state = st_token_ready;
434 } 446 }
  447 + return;
  448 +
  449 + case (st_in_hexstring):
435 break; 450 break;
436 451
437 default: 452 default:
438 - handled = false;  
439 - }  
440 -  
441 - if (handled) {  
442 - // okay  
443 - } else if (this->state == st_in_hexstring) {  
444 - if (ch == '>') {  
445 - this->type = tt_string;  
446 - this->state = st_token_ready;  
447 - if (this->val.length() % 2) {  
448 - // PDF spec says odd hexstrings have implicit  
449 - // trailing 0.  
450 - this->val += '0';  
451 - }  
452 - char num[3];  
453 - num[2] = '\0';  
454 - std::string nval;  
455 - for (unsigned int i = 0; i < this->val.length(); i += 2) {  
456 - num[0] = this->val.at(i);  
457 - num[1] = this->val.at(i + 1);  
458 - char nch = static_cast<char>(strtol(num, nullptr, 16));  
459 - nval += nch;  
460 - }  
461 - this->val.clear();  
462 - this->val += nval;  
463 - } else if (QUtil::is_hex_digit(ch)) {  
464 - this->val += ch;  
465 - } else if (isSpace(ch)) {  
466 - // ignore  
467 - } else {  
468 - this->type = tt_bad;  
469 - QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");  
470 - this->error_message =  
471 - std::string("invalid character (") + ch + ") in hexstring";  
472 - this->state = st_token_ready;  
473 - }  
474 - } else {  
475 throw std::logic_error( 453 throw std::logic_error(
476 "INTERNAL ERROR: invalid state while reading token"); 454 "INTERNAL ERROR: invalid state while reading token");
477 } 455 }
478 456
479 - if ((this->state == st_token_ready) && (this->type == tt_word)) {  
480 - resolveLiteral();  
481 - }  
482 -  
483 - if (!(betweenTokens() ||  
484 - ((this->state == st_token_ready) && this->unread_char))) {  
485 - this->raw_val += orig_ch; 457 + if (ch == '>') {
  458 + this->type = tt_string;
  459 + this->state = st_token_ready;
  460 + if (this->val.length() % 2) {
  461 + // PDF spec says odd hexstrings have implicit
  462 + // trailing 0.
  463 + this->val += '0';
  464 + }
  465 + char num[3];
  466 + num[2] = '\0';
  467 + std::string nval;
  468 + for (unsigned int i = 0; i < this->val.length(); i += 2) {
  469 + num[0] = this->val.at(i);
  470 + num[1] = this->val.at(i + 1);
  471 + char nch = static_cast<char>(strtol(num, nullptr, 16));
  472 + nval += nch;
  473 + }
  474 + this->val.clear();
  475 + this->val += nval;
  476 + } else if (QUtil::is_hex_digit(ch)) {
  477 + this->val += ch;
  478 + } else if (isSpace(ch)) {
  479 + // ignore
  480 + } else {
  481 + this->type = tt_bad;
  482 + QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
  483 + this->error_message =
  484 + std::string("invalid character (") + ch + ") in hexstring";
  485 + this->state = st_token_ready;
486 } 486 }
487 } 487 }
488 488