Commit a3f3238f371f07cd2b2e1a96753cde6590712dc5
1 parent
6111a6a4
Split QPDFTokenizer::handleCharacter into individual methods
Showing
2 changed files
with
302 additions
and
228 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -200,26 +200,36 @@ class QPDFTokenizer | @@ -200,26 +200,36 @@ class QPDFTokenizer | ||
| 200 | 200 | ||
| 201 | enum state_e { | 201 | enum state_e { |
| 202 | st_top, | 202 | st_top, |
| 203 | + st_in_hexstring, | ||
| 204 | + st_in_string, | ||
| 205 | + st_in_hexstring_2nd, | ||
| 206 | + st_literal, | ||
| 203 | st_in_space, | 207 | st_in_space, |
| 204 | st_in_comment, | 208 | st_in_comment, |
| 205 | - st_in_string, | ||
| 206 | st_string_escape, | 209 | st_string_escape, |
| 207 | st_char_code, | 210 | st_char_code, |
| 208 | st_string_after_cr, | 211 | st_string_after_cr, |
| 209 | st_lt, | 212 | st_lt, |
| 210 | st_gt, | 213 | st_gt, |
| 211 | - st_literal, | ||
| 212 | - st_in_hexstring, | ||
| 213 | - st_in_hexstring_2nd, | ||
| 214 | st_inline_image, | 214 | st_inline_image, |
| 215 | st_token_ready | 215 | st_token_ready |
| 216 | }; | 216 | }; |
| 217 | 217 | ||
| 218 | void handleCharacter(char); | 218 | void handleCharacter(char); |
| 219 | + void inTop(char); | ||
| 220 | + void inSpace(char); | ||
| 221 | + void inComment(char); | ||
| 222 | + void inString(char); | ||
| 223 | + void inLt(char); | ||
| 224 | + void inGt(char); | ||
| 225 | + void inStringAfterCR(char); | ||
| 226 | + void inStringEscape(char); | ||
| 227 | + void inLiteral(char); | ||
| 219 | void inCharCode(char); | 228 | void inCharCode(char); |
| 220 | void inHexstring(char); | 229 | void inHexstring(char); |
| 221 | void inHexstring2nd(char); | 230 | void inHexstring2nd(char); |
| 222 | - void inString(char); | 231 | + void inInlineImage(char); |
| 232 | + void inTokenReady(char); | ||
| 223 | 233 | ||
| 224 | void reset(); | 234 | void reset(); |
| 225 | 235 |
libqpdf/QPDFTokenizer.cc
| @@ -217,134 +217,24 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -217,134 +217,24 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 217 | // the character that caused a state change in the new state. | 217 | // the character that caused a state change in the new state. |
| 218 | 218 | ||
| 219 | switch (this->state) { | 219 | switch (this->state) { |
| 220 | - case (st_token_ready): | ||
| 221 | - throw std::logic_error( | ||
| 222 | - "INTERNAL ERROR: QPDF tokenizer presented character " | ||
| 223 | - "while token is waiting"); | ||
| 224 | - | ||
| 225 | case st_top: | 220 | case st_top: |
| 226 | - // Note: we specifically do not use ctype here. It is | ||
| 227 | - // locale-dependent. | ||
| 228 | - if (isSpace(ch)) { | ||
| 229 | - if (this->include_ignorable) { | ||
| 230 | - this->state = st_in_space; | ||
| 231 | - this->val += ch; | ||
| 232 | - } | ||
| 233 | - return; | ||
| 234 | - } | ||
| 235 | - switch (ch) { | ||
| 236 | - case '%': | ||
| 237 | - this->state = st_in_comment; | ||
| 238 | - if (this->include_ignorable) { | ||
| 239 | - this->val += ch; | ||
| 240 | - } | ||
| 241 | - return; | ||
| 242 | - | ||
| 243 | - case '(': | ||
| 244 | - this->string_depth = 1; | ||
| 245 | - this->state = st_in_string; | ||
| 246 | - return; | ||
| 247 | - | ||
| 248 | - case '<': | ||
| 249 | - this->state = st_lt; | ||
| 250 | - return; | ||
| 251 | - | ||
| 252 | - case '>': | ||
| 253 | - this->state = st_gt; | ||
| 254 | - return; | ||
| 255 | - | ||
| 256 | - case (')'): | ||
| 257 | - this->type = tt_bad; | ||
| 258 | - QTC::TC("qpdf", "QPDFTokenizer bad )"); | ||
| 259 | - this->error_message = "unexpected )"; | ||
| 260 | - this->val += ch; | ||
| 261 | - this->state = st_token_ready; | ||
| 262 | - return; | ||
| 263 | - | ||
| 264 | - case '[': | ||
| 265 | - this->type = tt_array_open; | ||
| 266 | - this->state = st_token_ready; | ||
| 267 | - this->val += ch; | ||
| 268 | - return; | ||
| 269 | - | ||
| 270 | - case ']': | ||
| 271 | - this->type = tt_array_close; | ||
| 272 | - this->val += ch; | ||
| 273 | - this->state = st_token_ready; | ||
| 274 | - return; | ||
| 275 | - | ||
| 276 | - case '{': | ||
| 277 | - this->type = tt_brace_open; | ||
| 278 | - this->state = st_token_ready; | ||
| 279 | - this->val += ch; | ||
| 280 | - return; | ||
| 281 | - | ||
| 282 | - case '}': | ||
| 283 | - this->type = tt_brace_close; | ||
| 284 | - this->state = st_token_ready; | ||
| 285 | - this->val += ch; | ||
| 286 | - return; | ||
| 287 | - | ||
| 288 | - default: | ||
| 289 | - this->state = st_literal; | ||
| 290 | - this->val += ch; | ||
| 291 | - return; | ||
| 292 | - } | 221 | + inTop(ch); |
| 222 | + return; | ||
| 293 | 223 | ||
| 294 | case st_in_space: | 224 | case st_in_space: |
| 295 | - // We only enter this state if include_ignorable is true. | ||
| 296 | - if (!isSpace(ch)) { | ||
| 297 | - this->type = tt_space; | ||
| 298 | - this->unread_char = true; | ||
| 299 | - this->char_to_unread = ch; | ||
| 300 | - this->state = st_token_ready; | ||
| 301 | - return; | ||
| 302 | - } else { | ||
| 303 | - this->val += ch; | ||
| 304 | - return; | ||
| 305 | - } | 225 | + inSpace(ch); |
| 226 | + return; | ||
| 306 | 227 | ||
| 307 | case st_in_comment: | 228 | case st_in_comment: |
| 308 | - if ((ch == '\r') || (ch == '\n')) { | ||
| 309 | - if (this->include_ignorable) { | ||
| 310 | - this->type = tt_comment; | ||
| 311 | - this->unread_char = true; | ||
| 312 | - this->char_to_unread = ch; | ||
| 313 | - this->state = st_token_ready; | ||
| 314 | - } else { | ||
| 315 | - this->state = st_top; | ||
| 316 | - } | ||
| 317 | - } else if (this->include_ignorable) { | ||
| 318 | - this->val += ch; | ||
| 319 | - } | 229 | + inComment(ch); |
| 320 | return; | 230 | return; |
| 321 | 231 | ||
| 322 | case st_lt: | 232 | case st_lt: |
| 323 | - if (ch == '<') { | ||
| 324 | - this->val += "<<"; | ||
| 325 | - this->type = tt_dict_open; | ||
| 326 | - this->state = st_token_ready; | ||
| 327 | - return; | ||
| 328 | - } | ||
| 329 | - | ||
| 330 | - this->state = st_in_hexstring; | ||
| 331 | - inHexstring(ch); | 233 | + inLt(ch); |
| 332 | return; | 234 | return; |
| 333 | 235 | ||
| 334 | case st_gt: | 236 | case st_gt: |
| 335 | - if (ch == '>') { | ||
| 336 | - this->val += ">>"; | ||
| 337 | - this->type = tt_dict_close; | ||
| 338 | - this->state = st_token_ready; | ||
| 339 | - } else { | ||
| 340 | - this->val += ">"; | ||
| 341 | - this->type = tt_bad; | ||
| 342 | - QTC::TC("qpdf", "QPDFTokenizer bad >"); | ||
| 343 | - this->error_message = "unexpected >"; | ||
| 344 | - this->unread_char = true; | ||
| 345 | - this->char_to_unread = ch; | ||
| 346 | - this->state = st_token_ready; | ||
| 347 | - } | 237 | + inGt(ch); |
| 348 | return; | 238 | return; |
| 349 | 239 | ||
| 350 | case st_in_string: | 240 | case st_in_string: |
| @@ -352,107 +242,308 @@ QPDFTokenizer::handleCharacter(char ch) | @@ -352,107 +242,308 @@ QPDFTokenizer::handleCharacter(char ch) | ||
| 352 | return; | 242 | return; |
| 353 | 243 | ||
| 354 | case st_string_after_cr: | 244 | case st_string_after_cr: |
| 355 | - // CR LF in strings are either ignored or normalized to CR | ||
| 356 | - this->state = st_in_string; | ||
| 357 | - if (ch != '\n') { | ||
| 358 | - inString(ch); | ||
| 359 | - } | 245 | + inStringAfterCR(ch); |
| 360 | return; | 246 | return; |
| 361 | 247 | ||
| 362 | case st_string_escape: | 248 | case st_string_escape: |
| 363 | - this->state = st_in_string; | ||
| 364 | - switch (ch) { | ||
| 365 | - case '0': | ||
| 366 | - case '1': | ||
| 367 | - case '2': | ||
| 368 | - case '3': | ||
| 369 | - case '4': | ||
| 370 | - case '5': | ||
| 371 | - case '6': | ||
| 372 | - case '7': | ||
| 373 | - this->state = st_char_code; | ||
| 374 | - this->char_code = 0; | ||
| 375 | - this->digit_count = 0; | ||
| 376 | - inCharCode(ch); | ||
| 377 | - return; | 249 | + inStringEscape(ch); |
| 250 | + return; | ||
| 378 | 251 | ||
| 379 | - case 'n': | ||
| 380 | - this->val += '\n'; | ||
| 381 | - return; | 252 | + case st_char_code: |
| 253 | + inCharCode(ch); | ||
| 254 | + return; | ||
| 382 | 255 | ||
| 383 | - case 'r': | ||
| 384 | - this->val += '\r'; | ||
| 385 | - return; | 256 | + case st_literal: |
| 257 | + inLiteral(ch); | ||
| 258 | + return; | ||
| 386 | 259 | ||
| 387 | - case 't': | ||
| 388 | - this->val += '\t'; | ||
| 389 | - return; | 260 | + case st_inline_image: |
| 261 | + inInlineImage(ch); | ||
| 262 | + return; | ||
| 263 | + this->val += ch; | ||
| 390 | 264 | ||
| 391 | - case 'b': | ||
| 392 | - this->val += '\b'; | ||
| 393 | - return; | 265 | + case st_in_hexstring: |
| 266 | + inHexstring(ch); | ||
| 267 | + return; | ||
| 394 | 268 | ||
| 395 | - case 'f': | ||
| 396 | - this->val += '\f'; | ||
| 397 | - return; | 269 | + case st_in_hexstring_2nd: |
| 270 | + inHexstring2nd(ch); | ||
| 271 | + return; | ||
| 398 | 272 | ||
| 399 | - case '\n': | ||
| 400 | - return; | 273 | + case (st_token_ready): |
| 274 | + inTokenReady(ch); | ||
| 275 | + return; | ||
| 401 | 276 | ||
| 402 | - case '\r': | ||
| 403 | - this->state = st_string_after_cr; | ||
| 404 | - return; | 277 | + default: |
| 278 | + throw std::logic_error( | ||
| 279 | + "INTERNAL ERROR: invalid state while reading token"); | ||
| 280 | + } | ||
| 281 | +} | ||
| 282 | + | ||
| 283 | +void | ||
| 284 | +QPDFTokenizer::inTokenReady(char ch) | ||
| 285 | +{ | ||
| 286 | + throw std::logic_error("INTERNAL ERROR: QPDF tokenizer presented character " | ||
| 287 | + "while token is waiting"); | ||
| 288 | +} | ||
| 405 | 289 | ||
| 406 | - default: | ||
| 407 | - // PDF spec says backslash is ignored before anything else | 290 | +void |
| 291 | +QPDFTokenizer::inTop(char ch) | ||
| 292 | +{ | ||
| 293 | + // Note: we specifically do not use ctype here. It is | ||
| 294 | + // locale-dependent. | ||
| 295 | + if (isSpace(ch)) { | ||
| 296 | + if (this->include_ignorable) { | ||
| 297 | + this->state = st_in_space; | ||
| 408 | this->val += ch; | 298 | this->val += ch; |
| 409 | return; | 299 | return; |
| 410 | } | 300 | } |
| 301 | + return; | ||
| 302 | + } | ||
| 303 | + switch (ch) { | ||
| 304 | + case '%': | ||
| 305 | + this->state = st_in_comment; | ||
| 306 | + if (this->include_ignorable) { | ||
| 307 | + this->val += ch; | ||
| 308 | + } | ||
| 309 | + return; | ||
| 411 | 310 | ||
| 412 | - case st_char_code: | ||
| 413 | - inCharCode(ch); | 311 | + case '(': |
| 312 | + this->string_depth = 1; | ||
| 313 | + this->state = st_in_string; | ||
| 414 | return; | 314 | return; |
| 415 | 315 | ||
| 416 | - case st_literal: | ||
| 417 | - if (isDelimiter(ch)) { | ||
| 418 | - // A C-locale whitespace character or delimiter terminates | ||
| 419 | - // token. It is important to unread the whitespace | ||
| 420 | - // character even though it is ignored since it may be the | ||
| 421 | - // newline after a stream keyword. Removing it here could | ||
| 422 | - // make the stream-reading code break on some files, | ||
| 423 | - // though not on any files in the test suite as of this | ||
| 424 | - // writing. | ||
| 425 | - | ||
| 426 | - this->type = tt_word; | 316 | + case '<': |
| 317 | + this->state = st_lt; | ||
| 318 | + return; | ||
| 319 | + | ||
| 320 | + case '>': | ||
| 321 | + this->state = st_gt; | ||
| 322 | + return; | ||
| 323 | + | ||
| 324 | + case (')'): | ||
| 325 | + this->type = tt_bad; | ||
| 326 | + QTC::TC("qpdf", "QPDFTokenizer bad )"); | ||
| 327 | + this->error_message = "unexpected )"; | ||
| 328 | + this->val += ch; | ||
| 329 | + this->state = st_token_ready; | ||
| 330 | + return; | ||
| 331 | + | ||
| 332 | + case '[': | ||
| 333 | + this->type = tt_array_open; | ||
| 334 | + this->state = st_token_ready; | ||
| 335 | + this->val += ch; | ||
| 336 | + return; | ||
| 337 | + | ||
| 338 | + case ']': | ||
| 339 | + this->type = tt_array_close; | ||
| 340 | + this->val += ch; | ||
| 341 | + this->state = st_token_ready; | ||
| 342 | + return; | ||
| 343 | + | ||
| 344 | + case '{': | ||
| 345 | + this->type = tt_brace_open; | ||
| 346 | + this->state = st_token_ready; | ||
| 347 | + this->val += ch; | ||
| 348 | + return; | ||
| 349 | + | ||
| 350 | + case '}': | ||
| 351 | + this->type = tt_brace_close; | ||
| 352 | + this->state = st_token_ready; | ||
| 353 | + this->val += ch; | ||
| 354 | + return; | ||
| 355 | + | ||
| 356 | + default: | ||
| 357 | + this->state = st_literal; | ||
| 358 | + this->val += ch; | ||
| 359 | + return; | ||
| 360 | + } | ||
| 361 | +} | ||
| 362 | + | ||
| 363 | +void | ||
| 364 | +QPDFTokenizer::inSpace(char ch) | ||
| 365 | +{ | ||
| 366 | + // We only enter this state if include_ignorable is true. | ||
| 367 | + if (!isSpace(ch)) { | ||
| 368 | + this->type = tt_space; | ||
| 369 | + this->unread_char = true; | ||
| 370 | + this->char_to_unread = ch; | ||
| 371 | + this->state = st_token_ready; | ||
| 372 | + return; | ||
| 373 | + } else { | ||
| 374 | + this->val += ch; | ||
| 375 | + return; | ||
| 376 | + } | ||
| 377 | +} | ||
| 378 | + | ||
| 379 | +void | ||
| 380 | +QPDFTokenizer::inComment(char ch) | ||
| 381 | +{ | ||
| 382 | + if ((ch == '\r') || (ch == '\n')) { | ||
| 383 | + if (this->include_ignorable) { | ||
| 384 | + this->type = tt_comment; | ||
| 427 | this->unread_char = true; | 385 | this->unread_char = true; |
| 428 | this->char_to_unread = ch; | 386 | this->char_to_unread = ch; |
| 429 | this->state = st_token_ready; | 387 | this->state = st_token_ready; |
| 430 | } else { | 388 | } else { |
| 431 | - this->val += ch; | 389 | + this->state = st_top; |
| 432 | } | 390 | } |
| 391 | + } else if (this->include_ignorable) { | ||
| 392 | + this->val += ch; | ||
| 393 | + } | ||
| 394 | +} | ||
| 395 | + | ||
| 396 | +void | ||
| 397 | +QPDFTokenizer::inString(char ch) | ||
| 398 | +{ | ||
| 399 | + switch (ch) { | ||
| 400 | + case '\\': | ||
| 401 | + this->state = st_string_escape; | ||
| 433 | return; | 402 | return; |
| 434 | 403 | ||
| 435 | - case st_inline_image: | 404 | + case '(': |
| 436 | this->val += ch; | 405 | this->val += ch; |
| 437 | - if (this->val.length() == this->inline_image_bytes) { | ||
| 438 | - QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); | ||
| 439 | - this->type = tt_inline_image; | ||
| 440 | - this->inline_image_bytes = 0; | 406 | + ++this->string_depth; |
| 407 | + return; | ||
| 408 | + | ||
| 409 | + case ')': | ||
| 410 | + if (--this->string_depth == 0) { | ||
| 411 | + this->type = tt_string; | ||
| 441 | this->state = st_token_ready; | 412 | this->state = st_token_ready; |
| 413 | + return; | ||
| 442 | } | 414 | } |
| 415 | + | ||
| 416 | + this->val += ch; | ||
| 443 | return; | 417 | return; |
| 444 | 418 | ||
| 445 | - case st_in_hexstring: | ||
| 446 | - inHexstring(ch); | 419 | + case '\r': |
| 420 | + // CR by itself is converted to LF | ||
| 421 | + this->val += '\n'; | ||
| 422 | + this->state = st_string_after_cr; | ||
| 447 | return; | 423 | return; |
| 448 | 424 | ||
| 449 | - case st_in_hexstring_2nd: | ||
| 450 | - inHexstring2nd(ch); | 425 | + case '\n': |
| 426 | + this->val += ch; | ||
| 451 | return; | 427 | return; |
| 452 | 428 | ||
| 453 | default: | 429 | default: |
| 454 | - throw std::logic_error( | ||
| 455 | - "INTERNAL ERROR: invalid state while reading token"); | 430 | + this->val += ch; |
| 431 | + return; | ||
| 432 | + } | ||
| 433 | +} | ||
| 434 | + | ||
| 435 | +void | ||
| 436 | +QPDFTokenizer::inStringEscape(char ch) | ||
| 437 | +{ | ||
| 438 | + this->state = st_in_string; | ||
| 439 | + switch (ch) { | ||
| 440 | + case '0': | ||
| 441 | + case '1': | ||
| 442 | + case '2': | ||
| 443 | + case '3': | ||
| 444 | + case '4': | ||
| 445 | + case '5': | ||
| 446 | + case '6': | ||
| 447 | + case '7': | ||
| 448 | + this->state = st_char_code; | ||
| 449 | + this->char_code = 0; | ||
| 450 | + this->digit_count = 0; | ||
| 451 | + inCharCode(ch); | ||
| 452 | + return; | ||
| 453 | + | ||
| 454 | + case 'n': | ||
| 455 | + this->val += '\n'; | ||
| 456 | + return; | ||
| 457 | + | ||
| 458 | + case 'r': | ||
| 459 | + this->val += '\r'; | ||
| 460 | + return; | ||
| 461 | + | ||
| 462 | + case 't': | ||
| 463 | + this->val += '\t'; | ||
| 464 | + return; | ||
| 465 | + | ||
| 466 | + case 'b': | ||
| 467 | + this->val += '\b'; | ||
| 468 | + return; | ||
| 469 | + | ||
| 470 | + case 'f': | ||
| 471 | + this->val += '\f'; | ||
| 472 | + return; | ||
| 473 | + | ||
| 474 | + case '\n': | ||
| 475 | + return; | ||
| 476 | + | ||
| 477 | + case '\r': | ||
| 478 | + this->state = st_string_after_cr; | ||
| 479 | + return; | ||
| 480 | + | ||
| 481 | + default: | ||
| 482 | + // PDF spec says backslash is ignored before anything else | ||
| 483 | + this->val += ch; | ||
| 484 | + return; | ||
| 485 | + } | ||
| 486 | +} | ||
| 487 | + | ||
| 488 | +void | ||
| 489 | +QPDFTokenizer::inStringAfterCR(char ch) | ||
| 490 | +{ | ||
| 491 | + this->state = st_in_string; | ||
| 492 | + if (ch != '\n') { | ||
| 493 | + inString(ch); | ||
| 494 | + } | ||
| 495 | +} | ||
| 496 | + | ||
| 497 | +void | ||
| 498 | +QPDFTokenizer::inLt(char ch) | ||
| 499 | +{ | ||
| 500 | + if (ch == '<') { | ||
| 501 | + this->val += "<<"; | ||
| 502 | + this->type = tt_dict_open; | ||
| 503 | + this->state = st_token_ready; | ||
| 504 | + return; | ||
| 505 | + } | ||
| 506 | + | ||
| 507 | + this->state = st_in_hexstring; | ||
| 508 | + inHexstring(ch); | ||
| 509 | +} | ||
| 510 | + | ||
| 511 | +void | ||
| 512 | +QPDFTokenizer::inGt(char ch) | ||
| 513 | +{ | ||
| 514 | + if (ch == '>') { | ||
| 515 | + this->val += ">>"; | ||
| 516 | + this->type = tt_dict_close; | ||
| 517 | + this->state = st_token_ready; | ||
| 518 | + } else { | ||
| 519 | + this->val += ">"; | ||
| 520 | + this->type = tt_bad; | ||
| 521 | + QTC::TC("qpdf", "QPDFTokenizer bad >"); | ||
| 522 | + this->error_message = "unexpected >"; | ||
| 523 | + this->unread_char = true; | ||
| 524 | + this->char_to_unread = ch; | ||
| 525 | + this->state = st_token_ready; | ||
| 526 | + } | ||
| 527 | +} | ||
| 528 | + | ||
| 529 | +void | ||
| 530 | +QPDFTokenizer::inLiteral(char ch) | ||
| 531 | +{ | ||
| 532 | + if (isDelimiter(ch)) { | ||
| 533 | + // A C-locale whitespace character or delimiter terminates | ||
| 534 | + // token. It is important to unread the whitespace | ||
| 535 | + // character even though it is ignored since it may be the | ||
| 536 | + // newline after a stream keyword. Removing it here could | ||
| 537 | + // make the stream-reading code break on some files, | ||
| 538 | + // though not on any files in the test suite as of this | ||
| 539 | + // writing. | ||
| 540 | + | ||
| 541 | + this->type = tt_word; | ||
| 542 | + this->unread_char = true; | ||
| 543 | + this->char_to_unread = ch; | ||
| 544 | + this->state = st_token_ready; | ||
| 545 | + } else { | ||
| 546 | + this->val += ch; | ||
| 456 | } | 547 | } |
| 457 | } | 548 | } |
| 458 | 549 | ||
| @@ -521,45 +612,6 @@ QPDFTokenizer::inHexstring2nd(char ch) | @@ -521,45 +612,6 @@ QPDFTokenizer::inHexstring2nd(char ch) | ||
| 521 | } | 612 | } |
| 522 | 613 | ||
| 523 | void | 614 | void |
| 524 | -QPDFTokenizer::inString(char ch) | ||
| 525 | -{ | ||
| 526 | - switch (ch) { | ||
| 527 | - case '\\': | ||
| 528 | - this->state = st_string_escape; | ||
| 529 | - return; | ||
| 530 | - | ||
| 531 | - case '(': | ||
| 532 | - this->val += ch; | ||
| 533 | - ++this->string_depth; | ||
| 534 | - return; | ||
| 535 | - | ||
| 536 | - case ')': | ||
| 537 | - if (--this->string_depth == 0) { | ||
| 538 | - this->type = tt_string; | ||
| 539 | - this->state = st_token_ready; | ||
| 540 | - return; | ||
| 541 | - } | ||
| 542 | - | ||
| 543 | - this->val += ch; | ||
| 544 | - return; | ||
| 545 | - | ||
| 546 | - case '\r': | ||
| 547 | - // CR by itself is converted to LF | ||
| 548 | - this->val += '\n'; | ||
| 549 | - this->state = st_string_after_cr; | ||
| 550 | - return; | ||
| 551 | - | ||
| 552 | - case '\n': | ||
| 553 | - this->val += ch; | ||
| 554 | - return; | ||
| 555 | - | ||
| 556 | - default: | ||
| 557 | - this->val += ch; | ||
| 558 | - return; | ||
| 559 | - } | ||
| 560 | -} | ||
| 561 | - | ||
| 562 | -void | ||
| 563 | QPDFTokenizer::inCharCode(char ch) | 615 | QPDFTokenizer::inCharCode(char ch) |
| 564 | { | 616 | { |
| 565 | if (('0' <= ch) && (ch <= '7')) { | 617 | if (('0' <= ch) && (ch <= '7')) { |
| @@ -576,6 +628,18 @@ QPDFTokenizer::inCharCode(char ch) | @@ -576,6 +628,18 @@ QPDFTokenizer::inCharCode(char ch) | ||
| 576 | } | 628 | } |
| 577 | 629 | ||
| 578 | void | 630 | void |
| 631 | +QPDFTokenizer::inInlineImage(char ch) | ||
| 632 | +{ | ||
| 633 | + this->val += ch; | ||
| 634 | + if (this->val.length() == this->inline_image_bytes) { | ||
| 635 | + QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); | ||
| 636 | + this->type = tt_inline_image; | ||
| 637 | + this->inline_image_bytes = 0; | ||
| 638 | + this->state = st_token_ready; | ||
| 639 | + } | ||
| 640 | +} | ||
| 641 | + | ||
| 642 | +void | ||
| 579 | QPDFTokenizer::presentEOF() | 643 | QPDFTokenizer::presentEOF() |
| 580 | { | 644 | { |
| 581 | if (this->state == st_literal) { | 645 | if (this->state == st_literal) { |