Commit 86ade3f9cd367296e64ae86870ec12ebf34883f6

Authored by m-holger
1 parent 91fb61ed

Add private method QPDFTokenizer::handleCharacter

include/qpdf/QPDFTokenizer.hh
... ... @@ -197,6 +197,7 @@ class QPDFTokenizer
197 197 bool isSpace(char);
198 198 bool isDelimiter(char);
199 199 void findEI(std::shared_ptr<InputSource> input);
  200 + void handleCharacter(char);
200 201  
201 202 enum state_e {
202 203 st_top,
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -200,12 +200,25 @@ QPDFTokenizer::presentCharacter(char ch)
200 200 {
201 201 char orig_ch = ch;
202 202  
  203 + handleCharacter(ch);
  204 +
  205 + if ((this->state == st_token_ready) && (this->type == tt_word)) {
  206 + resolveLiteral();
  207 + }
  208 +
  209 + if (!(betweenTokens() ||
  210 + ((this->state == st_token_ready) && this->unread_char))) {
  211 + this->raw_val += orig_ch;
  212 + }
  213 +}
  214 +
  215 +void
  216 +QPDFTokenizer::handleCharacter(char ch)
  217 +{
203 218 // State machine is implemented such that some characters may be
204 219 // handled more than once. This happens whenever you have to use
205 220 // the character that caused a state change in the new state.
206 221  
207   - bool handled = true;
208   -
209 222 switch (this->state) {
210 223 case (st_token_ready):
211 224 throw std::logic_error(
... ... @@ -259,7 +272,7 @@ QPDFTokenizer::presentCharacter(char ch)
259 272 this->state = st_literal;
260 273 }
261 274 }
262   - break;
  275 + return;
263 276  
264 277 case st_in_space:
265 278 // We only enter this state if include_ignorable is true.
... ... @@ -271,7 +284,7 @@ QPDFTokenizer::presentCharacter(char ch)
271 284 } else {
272 285 this->val += ch;
273 286 }
274   - break;
  287 + return;
275 288  
276 289 case st_in_comment:
277 290 if ((ch == '\r') || (ch == '\n')) {
... ... @@ -286,17 +299,16 @@ QPDFTokenizer::presentCharacter(char ch)
286 299 } else if (this->include_ignorable) {
287 300 this->val += ch;
288 301 }
289   - break;
  302 + return;
290 303  
291 304 case st_lt:
292 305 if (ch == '<') {
293 306 this->val += "<<";
294 307 this->type = tt_dict_open;
295 308 this->state = st_token_ready;
296   - } else {
297   - handled = false;
298   - this->state = st_in_hexstring;
  309 + return;
299 310 }
  311 + this->state = st_in_hexstring;
300 312 break;
301 313  
302 314 case st_gt:
... ... @@ -313,7 +325,7 @@ QPDFTokenizer::presentCharacter(char ch)
313 325 this->char_to_unread = ch;
314 326 this->state = st_token_ready;
315 327 }
316   - break;
  328 + return;
317 329  
318 330 case st_in_string:
319 331 {
... ... @@ -403,7 +415,7 @@ QPDFTokenizer::presentCharacter(char ch)
403 415 this->last_char_was_bs =
404 416 ((!this->last_char_was_bs) && (ch == '\\'));
405 417 }
406   - break;
  418 + return;
407 419  
408 420 case st_literal:
409 421 if (isDelimiter(ch)) {
... ... @@ -422,7 +434,7 @@ QPDFTokenizer::presentCharacter(char ch)
422 434 } else {
423 435 this->val += ch;
424 436 }
425   - break;
  437 + return;
426 438  
427 439 case st_inline_image:
428 440 this->val += ch;
... ... @@ -432,57 +444,45 @@ QPDFTokenizer::presentCharacter(char ch)
432 444 this->inline_image_bytes = 0;
433 445 this->state = st_token_ready;
434 446 }
  447 + return;
  448 +
  449 + case (st_in_hexstring):
435 450 break;
436 451  
437 452 default:
438   - handled = false;
439   - }
440   -
441   - if (handled) {
442   - // okay
443   - } else if (this->state == st_in_hexstring) {
444   - if (ch == '>') {
445   - this->type = tt_string;
446   - this->state = st_token_ready;
447   - if (this->val.length() % 2) {
448   - // PDF spec says odd hexstrings have implicit
449   - // trailing 0.
450   - this->val += '0';
451   - }
452   - char num[3];
453   - num[2] = '\0';
454   - std::string nval;
455   - for (unsigned int i = 0; i < this->val.length(); i += 2) {
456   - num[0] = this->val.at(i);
457   - num[1] = this->val.at(i + 1);
458   - char nch = static_cast<char>(strtol(num, nullptr, 16));
459   - nval += nch;
460   - }
461   - this->val.clear();
462   - this->val += nval;
463   - } else if (QUtil::is_hex_digit(ch)) {
464   - this->val += ch;
465   - } else if (isSpace(ch)) {
466   - // ignore
467   - } else {
468   - this->type = tt_bad;
469   - QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
470   - this->error_message =
471   - std::string("invalid character (") + ch + ") in hexstring";
472   - this->state = st_token_ready;
473   - }
474   - } else {
475 453 throw std::logic_error(
476 454 "INTERNAL ERROR: invalid state while reading token");
477 455 }
478 456  
479   - if ((this->state == st_token_ready) && (this->type == tt_word)) {
480   - resolveLiteral();
481   - }
482   -
483   - if (!(betweenTokens() ||
484   - ((this->state == st_token_ready) && this->unread_char))) {
485   - this->raw_val += orig_ch;
  457 + if (ch == '>') {
  458 + this->type = tt_string;
  459 + this->state = st_token_ready;
  460 + if (this->val.length() % 2) {
  461 + // PDF spec says odd hexstrings have implicit
  462 + // trailing 0.
  463 + this->val += '0';
  464 + }
  465 + char num[3];
  466 + num[2] = '\0';
  467 + std::string nval;
  468 + for (unsigned int i = 0; i < this->val.length(); i += 2) {
  469 + num[0] = this->val.at(i);
  470 + num[1] = this->val.at(i + 1);
  471 + char nch = static_cast<char>(strtol(num, nullptr, 16));
  472 + nval += nch;
  473 + }
  474 + this->val.clear();
  475 + this->val += nval;
  476 + } else if (QUtil::is_hex_digit(ch)) {
  477 + this->val += ch;
  478 + } else if (isSpace(ch)) {
  479 + // ignore
  480 + } else {
  481 + this->type = tt_bad;
  482 + QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
  483 + this->error_message =
  484 + std::string("invalid character (") + ch + ") in hexstring";
  485 + this->state = st_token_ready;
486 486 }
487 487 }
488 488  
... ...