Commit f3e267fce28c58039789379ba3488ad12c20a7f6
1 parent
15eaed5c
Move readToken from QPDF to QPDFTokenizer
Showing
3 changed files
with
59 additions
and
39 deletions
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -10,6 +10,8 @@ |
| 10 | 10 | |
| 11 | 11 | #include <qpdf/DLL.h> |
| 12 | 12 | |
| 13 | +#include <qpdf/InputSource.hh> | |
| 14 | +#include <qpdf/PointerHolder.hh> | |
| 13 | 15 | #include <string> |
| 14 | 16 | #include <stdio.h> |
| 15 | 17 | |
| ... | ... | @@ -122,6 +124,13 @@ class QPDFTokenizer |
| 122 | 124 | QPDF_DLL |
| 123 | 125 | bool betweenTokens(); |
| 124 | 126 | |
| 127 | + // Read a token from an input source. Context describes the | |
| 128 | + // context in which the token is being read and is used in the | |
| 129 | + // exception thrown if there is an error. | |
| 130 | + QPDF_DLL | |
| 131 | + Token readToken(PointerHolder<InputSource> input, | |
| 132 | + std::string const& context); | |
| 133 | + | |
| 125 | 134 | private: |
| 126 | 135 | void reset(); |
| 127 | 136 | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -1419,45 +1419,7 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, |
| 1419 | 1419 | QPDFTokenizer::Token |
| 1420 | 1420 | QPDF::readToken(PointerHolder<InputSource> input) |
| 1421 | 1421 | { |
| 1422 | - qpdf_offset_t offset = input->tell(); | |
| 1423 | - QPDFTokenizer::Token token; | |
| 1424 | - bool unread_char; | |
| 1425 | - char char_to_unread; | |
| 1426 | - while (! this->tokenizer.getToken(token, unread_char, char_to_unread)) | |
| 1427 | - { | |
| 1428 | - char ch; | |
| 1429 | - if (input->read(&ch, 1) == 0) | |
| 1430 | - { | |
| 1431 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1432 | - this->last_object_description, offset, | |
| 1433 | - "EOF while reading token"); | |
| 1434 | - } | |
| 1435 | - else | |
| 1436 | - { | |
| 1437 | - if (isspace((unsigned char)ch) && | |
| 1438 | - (input->getLastOffset() == offset)) | |
| 1439 | - { | |
| 1440 | - ++offset; | |
| 1441 | - } | |
| 1442 | - this->tokenizer.presentCharacter(ch); | |
| 1443 | - } | |
| 1444 | - } | |
| 1445 | - | |
| 1446 | - if (unread_char) | |
| 1447 | - { | |
| 1448 | - input->unreadCh(char_to_unread); | |
| 1449 | - } | |
| 1450 | - | |
| 1451 | - if (token.getType() == QPDFTokenizer::tt_bad) | |
| 1452 | - { | |
| 1453 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1454 | - this->last_object_description, offset, | |
| 1455 | - token.getErrorMessage()); | |
| 1456 | - } | |
| 1457 | - | |
| 1458 | - input->setLastOffset(offset); | |
| 1459 | - | |
| 1460 | - return token; | |
| 1422 | + return this->tokenizer.readToken(input, this->last_object_description); | |
| 1461 | 1423 | } |
| 1462 | 1424 | |
| 1463 | 1425 | QPDFObjectHandle | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -6,6 +6,7 @@ |
| 6 | 6 | |
| 7 | 7 | #include <qpdf/PCRE.hh> |
| 8 | 8 | #include <qpdf/QTC.hh> |
| 9 | +#include <qpdf/QPDFExc.hh> | |
| 9 | 10 | |
| 10 | 11 | #include <stdexcept> |
| 11 | 12 | #include <string.h> |
| ... | ... | @@ -15,6 +16,10 @@ static bool is_hex_digit(char ch) |
| 15 | 16 | { |
| 16 | 17 | return (strchr("0123456789abcdefABCDEF", ch) != 0); |
| 17 | 18 | } |
| 19 | +static bool is_space(char ch) | |
| 20 | +{ | |
| 21 | + return (strchr(" \f\n\r\t\v", ch) != 0); | |
| 22 | +} | |
| 18 | 23 | |
| 19 | 24 | QPDFTokenizer::QPDFTokenizer() : |
| 20 | 25 | pound_special_in_name(true) |
| ... | ... | @@ -460,3 +465,47 @@ QPDFTokenizer::betweenTokens() |
| 460 | 465 | { |
| 461 | 466 | return ((state == st_top) || (state == st_in_comment)); |
| 462 | 467 | } |
| 468 | + | |
| 469 | +QPDFTokenizer::Token | |
| 470 | +QPDFTokenizer::readToken(PointerHolder<InputSource> input, | |
| 471 | + std::string const& context) | |
| 472 | +{ | |
| 473 | + qpdf_offset_t offset = input->tell(); | |
| 474 | + Token token; | |
| 475 | + bool unread_char; | |
| 476 | + char char_to_unread; | |
| 477 | + while (! getToken(token, unread_char, char_to_unread)) | |
| 478 | + { | |
| 479 | + char ch; | |
| 480 | + if (input->read(&ch, 1) == 0) | |
| 481 | + { | |
| 482 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 483 | + context, offset, | |
| 484 | + "EOF while reading token"); | |
| 485 | + } | |
| 486 | + else | |
| 487 | + { | |
| 488 | + if (is_space((unsigned char)ch) && | |
| 489 | + (input->getLastOffset() == offset)) | |
| 490 | + { | |
| 491 | + ++offset; | |
| 492 | + } | |
| 493 | + presentCharacter(ch); | |
| 494 | + } | |
| 495 | + } | |
| 496 | + | |
| 497 | + if (unread_char) | |
| 498 | + { | |
| 499 | + input->unreadCh(char_to_unread); | |
| 500 | + } | |
| 501 | + | |
| 502 | + if (token.getType() == tt_bad) | |
| 503 | + { | |
| 504 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 505 | + context, offset, token.getErrorMessage()); | |
| 506 | + } | |
| 507 | + | |
| 508 | + input->setLastOffset(offset); | |
| 509 | + | |
| 510 | + return token; | |
| 511 | +} | ... | ... |