Commit 863d95e5676b0c03539c1cd9bacb31039d53b433

Authored by m-holger
Committed by Jay Berkenbilt
1 parent a07d2b41

Add new method QPDFTokenizer::nextToken

include/qpdf/QPDFTokenizer.hh
@@ -204,6 +204,18 @@ class QPDFTokenizer @@ -204,6 +204,18 @@ class QPDFTokenizer
204 QPDF_DLL 204 QPDF_DLL
205 void expectInlineImage(std::shared_ptr<InputSource> input); 205 void expectInlineImage(std::shared_ptr<InputSource> input);
206 206
  207 + // Read a token from an input source. Context describes the
  208 + // context in which the token is being read and is used in the
  209 + // exception thrown if there is an error. After a token is read,
  210 + // the position of the input source returned by input->tell()
  211 + // points to just after the token, and the input source's "last
  212 + // offset" as returned by input->getLastOffset() points to the
  213 + // beginning of the token. Returns false if the token is bad
  214 + // or if scanning produced an error message for any reason.
  215 + QPDF_DLL
  216 + bool nextToken(
  217 + InputSource& input, std::string const& context, size_t max_len = 0);
  218 +
207 private: 219 private:
208 QPDFTokenizer(QPDFTokenizer const&) = delete; 220 QPDFTokenizer(QPDFTokenizer const&) = delete;
209 QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; 221 QPDFTokenizer& operator=(QPDFTokenizer const&) = delete;
libqpdf/QPDFTokenizer.cc
@@ -805,7 +805,9 @@ QPDFTokenizer::presentEOF() @@ -805,7 +805,9 @@ QPDFTokenizer::presentEOF()
805 void 805 void
806 QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) 806 QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input)
807 { 807 {
808 - if (this->state != st_before_token) { 808 + if (this->state == st_token_ready) {
  809 + reset();
  810 + } else if (this->state != st_before_token) {
809 throw std::logic_error("QPDFTokenizer::expectInlineImage called" 811 throw std::logic_error("QPDFTokenizer::expectInlineImage called"
810 " when tokenizer is in improper state"); 812 " when tokenizer is in improper state");
811 } 813 }
@@ -941,11 +943,40 @@ QPDFTokenizer::readToken( @@ -941,11 +943,40 @@ QPDFTokenizer::readToken(
941 bool allow_bad, 943 bool allow_bad,
942 size_t max_len) 944 size_t max_len)
943 { 945 {
944 - qpdf_offset_t offset = input->fastTell(); 946 + nextToken(*input, context, max_len);
  947 +
  948 + Token token;
  949 + bool unread_char;
  950 + char char_to_unread;
  951 + getToken(token, unread_char, char_to_unread);
  952 +
  953 + if (token.getType() == tt_bad) {
  954 + if (allow_bad) {
  955 + QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
  956 + } else {
  957 + throw QPDFExc(
  958 + qpdf_e_damaged_pdf,
  959 + input->getName(),
  960 + context,
  961 + input->getLastOffset(),
  962 + token.getErrorMessage());
  963 + }
  964 + }
  965 + return token;
  966 +}
  967 +
  968 +bool
  969 +QPDFTokenizer::nextToken(
  970 + InputSource& input, std::string const& context, size_t max_len)
  971 +{
  972 + if (this->state != st_inline_image) {
  973 + reset();
  974 + }
  975 + qpdf_offset_t offset = input.fastTell();
945 976
946 while (this->state != st_token_ready) { 977 while (this->state != st_token_ready) {
947 char ch; 978 char ch;
948 - if (!input->fastRead(ch)) { 979 + if (!input.fastRead(ch)) {
949 presentEOF(); 980 presentEOF();
950 981
951 if ((this->type == tt_eof) && (!this->allow_eof)) { 982 if ((this->type == tt_eof) && (!this->allow_eof)) {
@@ -954,7 +985,7 @@ QPDFTokenizer::readToken( @@ -954,7 +985,7 @@ QPDFTokenizer::readToken(
954 // exercised. 985 // exercised.
955 this->type = tt_bad; 986 this->type = tt_bad;
956 this->error_message = "unexpected EOF"; 987 this->error_message = "unexpected EOF";
957 - offset = input->getLastOffset(); 988 + offset = input.getLastOffset();
958 } 989 }
959 } else { 990 } else {
960 handleCharacter(ch); 991 handleCharacter(ch);
@@ -976,28 +1007,11 @@ QPDFTokenizer::readToken( @@ -976,28 +1007,11 @@ QPDFTokenizer::readToken(
976 } 1007 }
977 } 1008 }
978 1009
979 - Token token;  
980 - bool unread_char;  
981 - char char_to_unread;  
982 - getToken(token, unread_char, char_to_unread);  
983 - input->fastUnread(unread_char);  
984 -  
985 - if (token.getType() != tt_eof) {  
986 - input->setLastOffset(offset);  
987 - } 1010 + input.fastUnread(!this->in_token && !this->before_token);
988 1011
989 - if (token.getType() == tt_bad) {  
990 - if (allow_bad) {  
991 - QTC::TC("qpdf", "QPDFTokenizer allowing bad token");  
992 - } else {  
993 - throw QPDFExc(  
994 - qpdf_e_damaged_pdf,  
995 - input->getName(),  
996 - context,  
997 - offset,  
998 - token.getErrorMessage());  
999 - } 1012 + if (this->type != tt_eof) {
  1013 + input.setLastOffset(offset);
1000 } 1014 }
1001 1015
1002 - return token; 1016 + return this->error_message.empty();
1003 } 1017 }