Commit 86ade3f9cd367296e64ae86870ec12ebf34883f6
1 parent
91fb61ed
Add private method QPDFTokenizer::handleCharacter
Showing
2 changed files
with
56 additions
and
55 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -197,6 +197,7 @@ class QPDFTokenizer | @@ -197,6 +197,7 @@ class QPDFTokenizer | ||
| 197 | bool isSpace(char); | 197 | bool isSpace(char); |
| 198 | bool isDelimiter(char); | 198 | bool isDelimiter(char); |
| 199 | void findEI(std::shared_ptr<InputSource> input); | 199 | void findEI(std::shared_ptr<InputSource> input); |
| 200 | + void handleCharacter(char); | ||
| 200 | 201 | ||
| 201 | enum state_e { | 202 | enum state_e { |
| 202 | st_top, | 203 | st_top, |
libqpdf/QPDFTokenizer.cc
| @@ -200,12 +200,25 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -200,12 +200,25 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 200 | { | 200 | { |
| 201 | char orig_ch = ch; | 201 | char orig_ch = ch; |
| 202 | 202 | ||
| 203 | + handleCharacter(ch); | ||
| 204 | + | ||
| 205 | + if ((this->state == st_token_ready) && (this->type == tt_word)) { | ||
| 206 | + resolveLiteral(); | ||
| 207 | + } | ||
| 208 | + | ||
| 209 | + if (!(betweenTokens() || | ||
| 210 | + ((this->state == st_token_ready) && this->unread_char))) { | ||
| 211 | + this->raw_val += orig_ch; | ||
| 212 | + } | ||
| 213 | +} | ||
| 214 | + | ||
| 215 | +void | ||
| 216 | +QPDFTokenizer::handleCharacter(char ch) | ||
| 217 | +{ | ||
| 203 | // State machine is implemented such that some characters may be | 218 | // State machine is implemented such that some characters may be |
| 204 | // handled more than once. This happens whenever you have to use | 219 | // handled more than once. This happens whenever you have to use |
| 205 | // the character that caused a state change in the new state. | 220 | // the character that caused a state change in the new state. |
| 206 | 221 | ||
| 207 | - bool handled = true; | ||
| 208 | - | ||
| 209 | switch (this->state) { | 222 | switch (this->state) { |
| 210 | case (st_token_ready): | 223 | case (st_token_ready): |
| 211 | throw std::logic_error( | 224 | throw std::logic_error( |
| @@ -259,7 +272,7 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -259,7 +272,7 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 259 | this->state = st_literal; | 272 | this->state = st_literal; |
| 260 | } | 273 | } |
| 261 | } | 274 | } |
| 262 | - break; | 275 | + return; |
| 263 | 276 | ||
| 264 | case st_in_space: | 277 | case st_in_space: |
| 265 | // We only enter this state if include_ignorable is true. | 278 | // We only enter this state if include_ignorable is true. |
| @@ -271,7 +284,7 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -271,7 +284,7 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 271 | } else { | 284 | } else { |
| 272 | this->val += ch; | 285 | this->val += ch; |
| 273 | } | 286 | } |
| 274 | - break; | 287 | + return; |
| 275 | 288 | ||
| 276 | case st_in_comment: | 289 | case st_in_comment: |
| 277 | if ((ch == '\r') || (ch == '\n')) { | 290 | if ((ch == '\r') || (ch == '\n')) { |
| @@ -286,17 +299,16 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -286,17 +299,16 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 286 | } else if (this->include_ignorable) { | 299 | } else if (this->include_ignorable) { |
| 287 | this->val += ch; | 300 | this->val += ch; |
| 288 | } | 301 | } |
| 289 | - break; | 302 | + return; |
| 290 | 303 | ||
| 291 | case st_lt: | 304 | case st_lt: |
| 292 | if (ch == '<') { | 305 | if (ch == '<') { |
| 293 | this->val += "<<"; | 306 | this->val += "<<"; |
| 294 | this->type = tt_dict_open; | 307 | this->type = tt_dict_open; |
| 295 | this->state = st_token_ready; | 308 | this->state = st_token_ready; |
| 296 | - } else { | ||
| 297 | - handled = false; | ||
| 298 | - this->state = st_in_hexstring; | 309 | + return; |
| 299 | } | 310 | } |
| 311 | + this->state = st_in_hexstring; | ||
| 300 | break; | 312 | break; |
| 301 | 313 | ||
| 302 | case st_gt: | 314 | case st_gt: |
| @@ -313,7 +325,7 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -313,7 +325,7 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 313 | this->char_to_unread = ch; | 325 | this->char_to_unread = ch; |
| 314 | this->state = st_token_ready; | 326 | this->state = st_token_ready; |
| 315 | } | 327 | } |
| 316 | - break; | 328 | + return; |
| 317 | 329 | ||
| 318 | case st_in_string: | 330 | case st_in_string: |
| 319 | { | 331 | { |
| @@ -403,7 +415,7 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -403,7 +415,7 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 403 | this->last_char_was_bs = | 415 | this->last_char_was_bs = |
| 404 | ((!this->last_char_was_bs) && (ch == '\\')); | 416 | ((!this->last_char_was_bs) && (ch == '\\')); |
| 405 | } | 417 | } |
| 406 | - break; | 418 | + return; |
| 407 | 419 | ||
| 408 | case st_literal: | 420 | case st_literal: |
| 409 | if (isDelimiter(ch)) { | 421 | if (isDelimiter(ch)) { |
| @@ -422,7 +434,7 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -422,7 +434,7 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 422 | } else { | 434 | } else { |
| 423 | this->val += ch; | 435 | this->val += ch; |
| 424 | } | 436 | } |
| 425 | - break; | 437 | + return; |
| 426 | 438 | ||
| 427 | case st_inline_image: | 439 | case st_inline_image: |
| 428 | this->val += ch; | 440 | this->val += ch; |
| @@ -432,57 +444,45 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -432,57 +444,45 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 432 | this->inline_image_bytes = 0; | 444 | this->inline_image_bytes = 0; |
| 433 | this->state = st_token_ready; | 445 | this->state = st_token_ready; |
| 434 | } | 446 | } |
| 447 | + return; | ||
| 448 | + | ||
| 449 | + case (st_in_hexstring): | ||
| 435 | break; | 450 | break; |
| 436 | 451 | ||
| 437 | default: | 452 | default: |
| 438 | - handled = false; | ||
| 439 | - } | ||
| 440 | - | ||
| 441 | - if (handled) { | ||
| 442 | - // okay | ||
| 443 | - } else if (this->state == st_in_hexstring) { | ||
| 444 | - if (ch == '>') { | ||
| 445 | - this->type = tt_string; | ||
| 446 | - this->state = st_token_ready; | ||
| 447 | - if (this->val.length() % 2) { | ||
| 448 | - // PDF spec says odd hexstrings have implicit | ||
| 449 | - // trailing 0. | ||
| 450 | - this->val += '0'; | ||
| 451 | - } | ||
| 452 | - char num[3]; | ||
| 453 | - num[2] = '\0'; | ||
| 454 | - std::string nval; | ||
| 455 | - for (unsigned int i = 0; i < this->val.length(); i += 2) { | ||
| 456 | - num[0] = this->val.at(i); | ||
| 457 | - num[1] = this->val.at(i + 1); | ||
| 458 | - char nch = static_cast<char>(strtol(num, nullptr, 16)); | ||
| 459 | - nval += nch; | ||
| 460 | - } | ||
| 461 | - this->val.clear(); | ||
| 462 | - this->val += nval; | ||
| 463 | - } else if (QUtil::is_hex_digit(ch)) { | ||
| 464 | - this->val += ch; | ||
| 465 | - } else if (isSpace(ch)) { | ||
| 466 | - // ignore | ||
| 467 | - } else { | ||
| 468 | - this->type = tt_bad; | ||
| 469 | - QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); | ||
| 470 | - this->error_message = | ||
| 471 | - std::string("invalid character (") + ch + ") in hexstring"; | ||
| 472 | - this->state = st_token_ready; | ||
| 473 | - } | ||
| 474 | - } else { | ||
| 475 | throw std::logic_error( | 453 | throw std::logic_error( |
| 476 | "INTERNAL ERROR: invalid state while reading token"); | 454 | "INTERNAL ERROR: invalid state while reading token"); |
| 477 | } | 455 | } |
| 478 | 456 | ||
| 479 | - if ((this->state == st_token_ready) && (this->type == tt_word)) { | ||
| 480 | - resolveLiteral(); | ||
| 481 | - } | ||
| 482 | - | ||
| 483 | - if (!(betweenTokens() || | ||
| 484 | - ((this->state == st_token_ready) && this->unread_char))) { | ||
| 485 | - this->raw_val += orig_ch; | 457 | + if (ch == '>') { |
| 458 | + this->type = tt_string; | ||
| 459 | + this->state = st_token_ready; | ||
| 460 | + if (this->val.length() % 2) { | ||
| 461 | + // PDF spec says odd hexstrings have implicit | ||
| 462 | + // trailing 0. | ||
| 463 | + this->val += '0'; | ||
| 464 | + } | ||
| 465 | + char num[3]; | ||
| 466 | + num[2] = '\0'; | ||
| 467 | + std::string nval; | ||
| 468 | + for (unsigned int i = 0; i < this->val.length(); i += 2) { | ||
| 469 | + num[0] = this->val.at(i); | ||
| 470 | + num[1] = this->val.at(i + 1); | ||
| 471 | + char nch = static_cast<char>(strtol(num, nullptr, 16)); | ||
| 472 | + nval += nch; | ||
| 473 | + } | ||
| 474 | + this->val.clear(); | ||
| 475 | + this->val += nval; | ||
| 476 | + } else if (QUtil::is_hex_digit(ch)) { | ||
| 477 | + this->val += ch; | ||
| 478 | + } else if (isSpace(ch)) { | ||
| 479 | + // ignore | ||
| 480 | + } else { | ||
| 481 | + this->type = tt_bad; | ||
| 482 | + QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); | ||
| 483 | + this->error_message = | ||
| 484 | + std::string("invalid character (") + ch + ") in hexstring"; | ||
| 485 | + this->state = st_token_ready; | ||
| 486 | } | 486 | } |
| 487 | } | 487 | } |
| 488 | 488 |