Commit 86ade3f9cd367296e64ae86870ec12ebf34883f6
1 parent
91fb61ed
Add private method QPDFTokenizer::handleCharacter
Showing
2 changed files
with
56 additions
and
55 deletions
include/qpdf/QPDFTokenizer.hh
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -200,12 +200,25 @@ QPDFTokenizer::presentCharacter(char ch) |
| 200 | 200 | { |
| 201 | 201 | char orig_ch = ch; |
| 202 | 202 | |
| 203 | + handleCharacter(ch); | |
| 204 | + | |
| 205 | + if ((this->state == st_token_ready) && (this->type == tt_word)) { | |
| 206 | + resolveLiteral(); | |
| 207 | + } | |
| 208 | + | |
| 209 | + if (!(betweenTokens() || | |
| 210 | + ((this->state == st_token_ready) && this->unread_char))) { | |
| 211 | + this->raw_val += orig_ch; | |
| 212 | + } | |
| 213 | +} | |
| 214 | + | |
| 215 | +void | |
| 216 | +QPDFTokenizer::handleCharacter(char ch) | |
| 217 | +{ | |
| 203 | 218 | // State machine is implemented such that some characters may be |
| 204 | 219 | // handled more than once. This happens whenever you have to use |
| 205 | 220 | // the character that caused a state change in the new state. |
| 206 | 221 | |
| 207 | - bool handled = true; | |
| 208 | - | |
| 209 | 222 | switch (this->state) { |
| 210 | 223 | case (st_token_ready): |
| 211 | 224 | throw std::logic_error( |
| ... | ... | @@ -259,7 +272,7 @@ QPDFTokenizer::presentCharacter(char ch) |
| 259 | 272 | this->state = st_literal; |
| 260 | 273 | } |
| 261 | 274 | } |
| 262 | - break; | |
| 275 | + return; | |
| 263 | 276 | |
| 264 | 277 | case st_in_space: |
| 265 | 278 | // We only enter this state if include_ignorable is true. |
| ... | ... | @@ -271,7 +284,7 @@ QPDFTokenizer::presentCharacter(char ch) |
| 271 | 284 | } else { |
| 272 | 285 | this->val += ch; |
| 273 | 286 | } |
| 274 | - break; | |
| 287 | + return; | |
| 275 | 288 | |
| 276 | 289 | case st_in_comment: |
| 277 | 290 | if ((ch == '\r') || (ch == '\n')) { |
| ... | ... | @@ -286,17 +299,16 @@ QPDFTokenizer::presentCharacter(char ch) |
| 286 | 299 | } else if (this->include_ignorable) { |
| 287 | 300 | this->val += ch; |
| 288 | 301 | } |
| 289 | - break; | |
| 302 | + return; | |
| 290 | 303 | |
| 291 | 304 | case st_lt: |
| 292 | 305 | if (ch == '<') { |
| 293 | 306 | this->val += "<<"; |
| 294 | 307 | this->type = tt_dict_open; |
| 295 | 308 | this->state = st_token_ready; |
| 296 | - } else { | |
| 297 | - handled = false; | |
| 298 | - this->state = st_in_hexstring; | |
| 309 | + return; | |
| 299 | 310 | } |
| 311 | + this->state = st_in_hexstring; | |
| 300 | 312 | break; |
| 301 | 313 | |
| 302 | 314 | case st_gt: |
| ... | ... | @@ -313,7 +325,7 @@ QPDFTokenizer::presentCharacter(char ch) |
| 313 | 325 | this->char_to_unread = ch; |
| 314 | 326 | this->state = st_token_ready; |
| 315 | 327 | } |
| 316 | - break; | |
| 328 | + return; | |
| 317 | 329 | |
| 318 | 330 | case st_in_string: |
| 319 | 331 | { |
| ... | ... | @@ -403,7 +415,7 @@ QPDFTokenizer::presentCharacter(char ch) |
| 403 | 415 | this->last_char_was_bs = |
| 404 | 416 | ((!this->last_char_was_bs) && (ch == '\\')); |
| 405 | 417 | } |
| 406 | - break; | |
| 418 | + return; | |
| 407 | 419 | |
| 408 | 420 | case st_literal: |
| 409 | 421 | if (isDelimiter(ch)) { |
| ... | ... | @@ -422,7 +434,7 @@ QPDFTokenizer::presentCharacter(char ch) |
| 422 | 434 | } else { |
| 423 | 435 | this->val += ch; |
| 424 | 436 | } |
| 425 | - break; | |
| 437 | + return; | |
| 426 | 438 | |
| 427 | 439 | case st_inline_image: |
| 428 | 440 | this->val += ch; |
| ... | ... | @@ -432,57 +444,45 @@ QPDFTokenizer::presentCharacter(char ch) |
| 432 | 444 | this->inline_image_bytes = 0; |
| 433 | 445 | this->state = st_token_ready; |
| 434 | 446 | } |
| 447 | + return; | |
| 448 | + | |
| 449 | + case (st_in_hexstring): | |
| 435 | 450 | break; |
| 436 | 451 | |
| 437 | 452 | default: |
| 438 | - handled = false; | |
| 439 | - } | |
| 440 | - | |
| 441 | - if (handled) { | |
| 442 | - // okay | |
| 443 | - } else if (this->state == st_in_hexstring) { | |
| 444 | - if (ch == '>') { | |
| 445 | - this->type = tt_string; | |
| 446 | - this->state = st_token_ready; | |
| 447 | - if (this->val.length() % 2) { | |
| 448 | - // PDF spec says odd hexstrings have implicit | |
| 449 | - // trailing 0. | |
| 450 | - this->val += '0'; | |
| 451 | - } | |
| 452 | - char num[3]; | |
| 453 | - num[2] = '\0'; | |
| 454 | - std::string nval; | |
| 455 | - for (unsigned int i = 0; i < this->val.length(); i += 2) { | |
| 456 | - num[0] = this->val.at(i); | |
| 457 | - num[1] = this->val.at(i + 1); | |
| 458 | - char nch = static_cast<char>(strtol(num, nullptr, 16)); | |
| 459 | - nval += nch; | |
| 460 | - } | |
| 461 | - this->val.clear(); | |
| 462 | - this->val += nval; | |
| 463 | - } else if (QUtil::is_hex_digit(ch)) { | |
| 464 | - this->val += ch; | |
| 465 | - } else if (isSpace(ch)) { | |
| 466 | - // ignore | |
| 467 | - } else { | |
| 468 | - this->type = tt_bad; | |
| 469 | - QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); | |
| 470 | - this->error_message = | |
| 471 | - std::string("invalid character (") + ch + ") in hexstring"; | |
| 472 | - this->state = st_token_ready; | |
| 473 | - } | |
| 474 | - } else { | |
| 475 | 453 | throw std::logic_error( |
| 476 | 454 | "INTERNAL ERROR: invalid state while reading token"); |
| 477 | 455 | } |
| 478 | 456 | |
| 479 | - if ((this->state == st_token_ready) && (this->type == tt_word)) { | |
| 480 | - resolveLiteral(); | |
| 481 | - } | |
| 482 | - | |
| 483 | - if (!(betweenTokens() || | |
| 484 | - ((this->state == st_token_ready) && this->unread_char))) { | |
| 485 | - this->raw_val += orig_ch; | |
| 457 | + if (ch == '>') { | |
| 458 | + this->type = tt_string; | |
| 459 | + this->state = st_token_ready; | |
| 460 | + if (this->val.length() % 2) { | |
| 461 | + // PDF spec says odd hexstrings have implicit | |
| 462 | + // trailing 0. | |
| 463 | + this->val += '0'; | |
| 464 | + } | |
| 465 | + char num[3]; | |
| 466 | + num[2] = '\0'; | |
| 467 | + std::string nval; | |
| 468 | + for (unsigned int i = 0; i < this->val.length(); i += 2) { | |
| 469 | + num[0] = this->val.at(i); | |
| 470 | + num[1] = this->val.at(i + 1); | |
| 471 | + char nch = static_cast<char>(strtol(num, nullptr, 16)); | |
| 472 | + nval += nch; | |
| 473 | + } | |
| 474 | + this->val.clear(); | |
| 475 | + this->val += nval; | |
| 476 | + } else if (QUtil::is_hex_digit(ch)) { | |
| 477 | + this->val += ch; | |
| 478 | + } else if (isSpace(ch)) { | |
| 479 | + // ignore | |
| 480 | + } else { | |
| 481 | + this->type = tt_bad; | |
| 482 | + QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); | |
| 483 | + this->error_message = | |
| 484 | + std::string("invalid character (") + ch + ") in hexstring"; | |
| 485 | + this->state = st_token_ready; | |
| 486 | 486 | } |
| 487 | 487 | } |
| 488 | 488 | ... | ... |