Commit 863d95e5676b0c03539c1cd9bacb31039d53b433
Committed by
Jay Berkenbilt
1 parent
a07d2b41
Add new method QPDFTokenizer::nextToken
Showing
2 changed files
with
51 additions
and
25 deletions
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -204,6 +204,18 @@ class QPDFTokenizer |
| 204 | 204 | QPDF_DLL |
| 205 | 205 | void expectInlineImage(std::shared_ptr<InputSource> input); |
| 206 | 206 | |
| 207 | + // Read a token from an input source. Context describes the | |
| 208 | + // context in which the token is being read and is used in the | |
| 209 | + // exception thrown if there is an error. After a token is read, | |
| 210 | + // the position of the input source returned by input->tell() | |
| 211 | + // points to just after the token, and the input source's "last | |
| 212 | + // offset" as returned by input->getLastOffset() points to the | |
| 213 | + // beginning of the token. Returns false if the token is bad | |
| 214 | + // or if scanning produced an error message for any reason. | |
| 215 | + QPDF_DLL | |
| 216 | + bool nextToken( | |
| 217 | + InputSource& input, std::string const& context, size_t max_len = 0); | |
| 218 | + | |
| 207 | 219 | private: |
| 208 | 220 | QPDFTokenizer(QPDFTokenizer const&) = delete; |
| 209 | 221 | QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -805,7 +805,9 @@ QPDFTokenizer::presentEOF() |
| 805 | 805 | void |
| 806 | 806 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) |
| 807 | 807 | { |
| 808 | - if (this->state != st_before_token) { | |
| 808 | + if (this->state == st_token_ready) { | |
| 809 | + reset(); | |
| 810 | + } else if (this->state != st_before_token) { | |
| 809 | 811 | throw std::logic_error("QPDFTokenizer::expectInlineImage called" |
| 810 | 812 | " when tokenizer is in improper state"); |
| 811 | 813 | } |
| ... | ... | @@ -941,11 +943,40 @@ QPDFTokenizer::readToken( |
| 941 | 943 | bool allow_bad, |
| 942 | 944 | size_t max_len) |
| 943 | 945 | { |
| 944 | - qpdf_offset_t offset = input->fastTell(); | |
| 946 | + nextToken(*input, context, max_len); | |
| 947 | + | |
| 948 | + Token token; | |
| 949 | + bool unread_char; | |
| 950 | + char char_to_unread; | |
| 951 | + getToken(token, unread_char, char_to_unread); | |
| 952 | + | |
| 953 | + if (token.getType() == tt_bad) { | |
| 954 | + if (allow_bad) { | |
| 955 | + QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); | |
| 956 | + } else { | |
| 957 | + throw QPDFExc( | |
| 958 | + qpdf_e_damaged_pdf, | |
| 959 | + input->getName(), | |
| 960 | + context, | |
| 961 | + input->getLastOffset(), | |
| 962 | + token.getErrorMessage()); | |
| 963 | + } | |
| 964 | + } | |
| 965 | + return token; | |
| 966 | +} | |
| 967 | + | |
| 968 | +bool | |
| 969 | +QPDFTokenizer::nextToken( | |
| 970 | + InputSource& input, std::string const& context, size_t max_len) | |
| 971 | +{ | |
| 972 | + if (this->state != st_inline_image) { | |
| 973 | + reset(); | |
| 974 | + } | |
| 975 | + qpdf_offset_t offset = input.fastTell(); | |
| 945 | 976 | |
| 946 | 977 | while (this->state != st_token_ready) { |
| 947 | 978 | char ch; |
| 948 | - if (!input->fastRead(ch)) { | |
| 979 | + if (!input.fastRead(ch)) { | |
| 949 | 980 | presentEOF(); |
| 950 | 981 | |
| 951 | 982 | if ((this->type == tt_eof) && (!this->allow_eof)) { |
| ... | ... | @@ -954,7 +985,7 @@ QPDFTokenizer::readToken( |
| 954 | 985 | // exercised. |
| 955 | 986 | this->type = tt_bad; |
| 956 | 987 | this->error_message = "unexpected EOF"; |
| 957 | - offset = input->getLastOffset(); | |
| 988 | + offset = input.getLastOffset(); | |
| 958 | 989 | } |
| 959 | 990 | } else { |
| 960 | 991 | handleCharacter(ch); |
| ... | ... | @@ -976,28 +1007,11 @@ QPDFTokenizer::readToken( |
| 976 | 1007 | } |
| 977 | 1008 | } |
| 978 | 1009 | |
| 979 | - Token token; | |
| 980 | - bool unread_char; | |
| 981 | - char char_to_unread; | |
| 982 | - getToken(token, unread_char, char_to_unread); | |
| 983 | - input->fastUnread(unread_char); | |
| 984 | - | |
| 985 | - if (token.getType() != tt_eof) { | |
| 986 | - input->setLastOffset(offset); | |
| 987 | - } | |
| 1010 | + input.fastUnread(!this->in_token && !this->before_token); | |
| 988 | 1011 | |
| 989 | - if (token.getType() == tt_bad) { | |
| 990 | - if (allow_bad) { | |
| 991 | - QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); | |
| 992 | - } else { | |
| 993 | - throw QPDFExc( | |
| 994 | - qpdf_e_damaged_pdf, | |
| 995 | - input->getName(), | |
| 996 | - context, | |
| 997 | - offset, | |
| 998 | - token.getErrorMessage()); | |
| 999 | - } | |
| 1012 | + if (this->type != tt_eof) { | |
| 1013 | + input.setLastOffset(offset); | |
| 1000 | 1014 | } |
| 1001 | 1015 | |
| 1002 | - return token; | |
| 1016 | + return this->error_message.empty(); | |
| 1003 | 1017 | } | ... | ... |