Commit f3e267fce28c58039789379ba3488ad12c20a7f6
1 parent
15eaed5c
Move readToken from QPDF to QPDFTokenizer
Showing
3 changed files
with
59 additions
and
39 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -10,6 +10,8 @@ | @@ -10,6 +10,8 @@ | ||
| 10 | 10 | ||
| 11 | #include <qpdf/DLL.h> | 11 | #include <qpdf/DLL.h> |
| 12 | 12 | ||
| 13 | +#include <qpdf/InputSource.hh> | ||
| 14 | +#include <qpdf/PointerHolder.hh> | ||
| 13 | #include <string> | 15 | #include <string> |
| 14 | #include <stdio.h> | 16 | #include <stdio.h> |
| 15 | 17 | ||
| @@ -122,6 +124,13 @@ class QPDFTokenizer | @@ -122,6 +124,13 @@ class QPDFTokenizer | ||
| 122 | QPDF_DLL | 124 | QPDF_DLL |
| 123 | bool betweenTokens(); | 125 | bool betweenTokens(); |
| 124 | 126 | ||
| 127 | + // Read a token from an input source. Context describes the | ||
| 128 | + // context in which the token is being read and is used in the | ||
| 129 | + // exception thrown if there is an error. | ||
| 130 | + QPDF_DLL | ||
| 131 | + Token readToken(PointerHolder<InputSource> input, | ||
| 132 | + std::string const& context); | ||
| 133 | + | ||
| 125 | private: | 134 | private: |
| 126 | void reset(); | 135 | void reset(); |
| 127 | 136 |
libqpdf/QPDF.cc
| @@ -1419,45 +1419,7 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, | @@ -1419,45 +1419,7 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input, | ||
| 1419 | QPDFTokenizer::Token | 1419 | QPDFTokenizer::Token |
| 1420 | QPDF::readToken(PointerHolder<InputSource> input) | 1420 | QPDF::readToken(PointerHolder<InputSource> input) |
| 1421 | { | 1421 | { |
| 1422 | - qpdf_offset_t offset = input->tell(); | ||
| 1423 | - QPDFTokenizer::Token token; | ||
| 1424 | - bool unread_char; | ||
| 1425 | - char char_to_unread; | ||
| 1426 | - while (! this->tokenizer.getToken(token, unread_char, char_to_unread)) | ||
| 1427 | - { | ||
| 1428 | - char ch; | ||
| 1429 | - if (input->read(&ch, 1) == 0) | ||
| 1430 | - { | ||
| 1431 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1432 | - this->last_object_description, offset, | ||
| 1433 | - "EOF while reading token"); | ||
| 1434 | - } | ||
| 1435 | - else | ||
| 1436 | - { | ||
| 1437 | - if (isspace((unsigned char)ch) && | ||
| 1438 | - (input->getLastOffset() == offset)) | ||
| 1439 | - { | ||
| 1440 | - ++offset; | ||
| 1441 | - } | ||
| 1442 | - this->tokenizer.presentCharacter(ch); | ||
| 1443 | - } | ||
| 1444 | - } | ||
| 1445 | - | ||
| 1446 | - if (unread_char) | ||
| 1447 | - { | ||
| 1448 | - input->unreadCh(char_to_unread); | ||
| 1449 | - } | ||
| 1450 | - | ||
| 1451 | - if (token.getType() == QPDFTokenizer::tt_bad) | ||
| 1452 | - { | ||
| 1453 | - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 1454 | - this->last_object_description, offset, | ||
| 1455 | - token.getErrorMessage()); | ||
| 1456 | - } | ||
| 1457 | - | ||
| 1458 | - input->setLastOffset(offset); | ||
| 1459 | - | ||
| 1460 | - return token; | 1422 | + return this->tokenizer.readToken(input, this->last_object_description); |
| 1461 | } | 1423 | } |
| 1462 | 1424 | ||
| 1463 | QPDFObjectHandle | 1425 | QPDFObjectHandle |
libqpdf/QPDFTokenizer.cc
| @@ -6,6 +6,7 @@ | @@ -6,6 +6,7 @@ | ||
| 6 | 6 | ||
| 7 | #include <qpdf/PCRE.hh> | 7 | #include <qpdf/PCRE.hh> |
| 8 | #include <qpdf/QTC.hh> | 8 | #include <qpdf/QTC.hh> |
| 9 | +#include <qpdf/QPDFExc.hh> | ||
| 9 | 10 | ||
| 10 | #include <stdexcept> | 11 | #include <stdexcept> |
| 11 | #include <string.h> | 12 | #include <string.h> |
| @@ -15,6 +16,10 @@ static bool is_hex_digit(char ch) | @@ -15,6 +16,10 @@ static bool is_hex_digit(char ch) | ||
| 15 | { | 16 | { |
| 16 | return (strchr("0123456789abcdefABCDEF", ch) != 0); | 17 | return (strchr("0123456789abcdefABCDEF", ch) != 0); |
| 17 | } | 18 | } |
| 19 | +static bool is_space(char ch) | ||
| 20 | +{ | ||
| 21 | + return (strchr(" \f\n\r\t\v", ch) != 0); | ||
| 22 | +} | ||
| 18 | 23 | ||
| 19 | QPDFTokenizer::QPDFTokenizer() : | 24 | QPDFTokenizer::QPDFTokenizer() : |
| 20 | pound_special_in_name(true) | 25 | pound_special_in_name(true) |
| @@ -460,3 +465,47 @@ QPDFTokenizer::betweenTokens() | @@ -460,3 +465,47 @@ QPDFTokenizer::betweenTokens() | ||
| 460 | { | 465 | { |
| 461 | return ((state == st_top) || (state == st_in_comment)); | 466 | return ((state == st_top) || (state == st_in_comment)); |
| 462 | } | 467 | } |
| 468 | + | ||
| 469 | +QPDFTokenizer::Token | ||
| 470 | +QPDFTokenizer::readToken(PointerHolder<InputSource> input, | ||
| 471 | + std::string const& context) | ||
| 472 | +{ | ||
| 473 | + qpdf_offset_t offset = input->tell(); | ||
| 474 | + Token token; | ||
| 475 | + bool unread_char; | ||
| 476 | + char char_to_unread; | ||
| 477 | + while (! getToken(token, unread_char, char_to_unread)) | ||
| 478 | + { | ||
| 479 | + char ch; | ||
| 480 | + if (input->read(&ch, 1) == 0) | ||
| 481 | + { | ||
| 482 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 483 | + context, offset, | ||
| 484 | + "EOF while reading token"); | ||
| 485 | + } | ||
| 486 | + else | ||
| 487 | + { | ||
| 488 | + if (is_space((unsigned char)ch) && | ||
| 489 | + (input->getLastOffset() == offset)) | ||
| 490 | + { | ||
| 491 | + ++offset; | ||
| 492 | + } | ||
| 493 | + presentCharacter(ch); | ||
| 494 | + } | ||
| 495 | + } | ||
| 496 | + | ||
| 497 | + if (unread_char) | ||
| 498 | + { | ||
| 499 | + input->unreadCh(char_to_unread); | ||
| 500 | + } | ||
| 501 | + | ||
| 502 | + if (token.getType() == tt_bad) | ||
| 503 | + { | ||
| 504 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | ||
| 505 | + context, offset, token.getErrorMessage()); | ||
| 506 | + } | ||
| 507 | + | ||
| 508 | + input->setLastOffset(offset); | ||
| 509 | + | ||
| 510 | + return token; | ||
| 511 | +} |