Commit 863d95e5676b0c03539c1cd9bacb31039d53b433
Committed by
Jay Berkenbilt
1 parent
a07d2b41
Add new method QPDFTokenizer::nextToken
Showing
2 changed files
with
51 additions
and
25 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -204,6 +204,18 @@ class QPDFTokenizer | @@ -204,6 +204,18 @@ class QPDFTokenizer | ||
| 204 | QPDF_DLL | 204 | QPDF_DLL |
| 205 | void expectInlineImage(std::shared_ptr<InputSource> input); | 205 | void expectInlineImage(std::shared_ptr<InputSource> input); |
| 206 | 206 | ||
| 207 | + // Read a token from an input source. Context describes the | ||
| 208 | + // context in which the token is being read and is used in the | ||
| 209 | + // exception thrown if there is an error. After a token is read, | ||
| 210 | + // the position of the input source returned by input->tell() | ||
| 211 | + // points to just after the token, and the input source's "last | ||
| 212 | + // offset" as returned by input->getLastOffset() points to the | ||
| 213 | + // beginning of the token. Returns false if the token is bad | ||
| 214 | + // or if scanning produced an error message for any reason. | ||
| 215 | + QPDF_DLL | ||
| 216 | + bool nextToken( | ||
| 217 | + InputSource& input, std::string const& context, size_t max_len = 0); | ||
| 218 | + | ||
| 207 | private: | 219 | private: |
| 208 | QPDFTokenizer(QPDFTokenizer const&) = delete; | 220 | QPDFTokenizer(QPDFTokenizer const&) = delete; |
| 209 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; | 221 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; |
libqpdf/QPDFTokenizer.cc
| @@ -805,7 +805,9 @@ QPDFTokenizer::presentEOF() | @@ -805,7 +805,9 @@ QPDFTokenizer::presentEOF() | ||
| 805 | void | 805 | void |
| 806 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) | 806 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) |
| 807 | { | 807 | { |
| 808 | - if (this->state != st_before_token) { | 808 | + if (this->state == st_token_ready) { |
| 809 | + reset(); | ||
| 810 | + } else if (this->state != st_before_token) { | ||
| 809 | throw std::logic_error("QPDFTokenizer::expectInlineImage called" | 811 | throw std::logic_error("QPDFTokenizer::expectInlineImage called" |
| 810 | " when tokenizer is in improper state"); | 812 | " when tokenizer is in improper state"); |
| 811 | } | 813 | } |
| @@ -941,11 +943,40 @@ QPDFTokenizer::readToken( | @@ -941,11 +943,40 @@ QPDFTokenizer::readToken( | ||
| 941 | bool allow_bad, | 943 | bool allow_bad, |
| 942 | size_t max_len) | 944 | size_t max_len) |
| 943 | { | 945 | { |
| 944 | - qpdf_offset_t offset = input->fastTell(); | 946 | + nextToken(*input, context, max_len); |
| 947 | + | ||
| 948 | + Token token; | ||
| 949 | + bool unread_char; | ||
| 950 | + char char_to_unread; | ||
| 951 | + getToken(token, unread_char, char_to_unread); | ||
| 952 | + | ||
| 953 | + if (token.getType() == tt_bad) { | ||
| 954 | + if (allow_bad) { | ||
| 955 | + QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); | ||
| 956 | + } else { | ||
| 957 | + throw QPDFExc( | ||
| 958 | + qpdf_e_damaged_pdf, | ||
| 959 | + input->getName(), | ||
| 960 | + context, | ||
| 961 | + input->getLastOffset(), | ||
| 962 | + token.getErrorMessage()); | ||
| 963 | + } | ||
| 964 | + } | ||
| 965 | + return token; | ||
| 966 | +} | ||
| 967 | + | ||
| 968 | +bool | ||
| 969 | +QPDFTokenizer::nextToken( | ||
| 970 | + InputSource& input, std::string const& context, size_t max_len) | ||
| 971 | +{ | ||
| 972 | + if (this->state != st_inline_image) { | ||
| 973 | + reset(); | ||
| 974 | + } | ||
| 975 | + qpdf_offset_t offset = input.fastTell(); | ||
| 945 | 976 | ||
| 946 | while (this->state != st_token_ready) { | 977 | while (this->state != st_token_ready) { |
| 947 | char ch; | 978 | char ch; |
| 948 | - if (!input->fastRead(ch)) { | 979 | + if (!input.fastRead(ch)) { |
| 949 | presentEOF(); | 980 | presentEOF(); |
| 950 | 981 | ||
| 951 | if ((this->type == tt_eof) && (!this->allow_eof)) { | 982 | if ((this->type == tt_eof) && (!this->allow_eof)) { |
| @@ -954,7 +985,7 @@ QPDFTokenizer::readToken( | @@ -954,7 +985,7 @@ QPDFTokenizer::readToken( | ||
| 954 | // exercised. | 985 | // exercised. |
| 955 | this->type = tt_bad; | 986 | this->type = tt_bad; |
| 956 | this->error_message = "unexpected EOF"; | 987 | this->error_message = "unexpected EOF"; |
| 957 | - offset = input->getLastOffset(); | 988 | + offset = input.getLastOffset(); |
| 958 | } | 989 | } |
| 959 | } else { | 990 | } else { |
| 960 | handleCharacter(ch); | 991 | handleCharacter(ch); |
| @@ -976,28 +1007,11 @@ QPDFTokenizer::readToken( | @@ -976,28 +1007,11 @@ QPDFTokenizer::readToken( | ||
| 976 | } | 1007 | } |
| 977 | } | 1008 | } |
| 978 | 1009 | ||
| 979 | - Token token; | ||
| 980 | - bool unread_char; | ||
| 981 | - char char_to_unread; | ||
| 982 | - getToken(token, unread_char, char_to_unread); | ||
| 983 | - input->fastUnread(unread_char); | ||
| 984 | - | ||
| 985 | - if (token.getType() != tt_eof) { | ||
| 986 | - input->setLastOffset(offset); | ||
| 987 | - } | 1010 | + input.fastUnread(!this->in_token && !this->before_token); |
| 988 | 1011 | ||
| 989 | - if (token.getType() == tt_bad) { | ||
| 990 | - if (allow_bad) { | ||
| 991 | - QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); | ||
| 992 | - } else { | ||
| 993 | - throw QPDFExc( | ||
| 994 | - qpdf_e_damaged_pdf, | ||
| 995 | - input->getName(), | ||
| 996 | - context, | ||
| 997 | - offset, | ||
| 998 | - token.getErrorMessage()); | ||
| 999 | - } | 1012 | + if (this->type != tt_eof) { |
| 1013 | + input.setLastOffset(offset); | ||
| 1000 | } | 1014 | } |
| 1001 | 1015 | ||
| 1002 | - return token; | 1016 | + return this->error_message.empty(); |
| 1003 | } | 1017 | } |