Commit 863d95e5676b0c03539c1cd9bacb31039d53b433

Authored by m-holger
Committed by Jay Berkenbilt
1 parent a07d2b41

Add new method QPDFTokenizer::nextToken

include/qpdf/QPDFTokenizer.hh
... ... @@ -204,6 +204,18 @@ class QPDFTokenizer
204 204 QPDF_DLL
205 205 void expectInlineImage(std::shared_ptr<InputSource> input);
206 206  
  207 + // Read a token from an input source. Context describes the
  208 + // context in which the token is being read and is used in the
  209 + // exception thrown if there is an error. After a token is read,
  210 + // the position of the input source returned by input->tell()
  211 + // points to just after the token, and the input source's "last
  212 + // offset" as returned by input->getLastOffset() points to the
  213 + // beginning of the token. Returns false if the token is bad
  214 + // or if scanning produced an error message for any reason.
  215 + QPDF_DLL
  216 + bool nextToken(
  217 + InputSource& input, std::string const& context, size_t max_len = 0);
  218 +
207 219 private:
208 220 QPDFTokenizer(QPDFTokenizer const&) = delete;
209 221 QPDFTokenizer& operator=(QPDFTokenizer const&) = delete;
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -805,7 +805,9 @@ QPDFTokenizer::presentEOF()
805 805 void
806 806 QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input)
807 807 {
808   - if (this->state != st_before_token) {
  808 + if (this->state == st_token_ready) {
  809 + reset();
  810 + } else if (this->state != st_before_token) {
809 811 throw std::logic_error("QPDFTokenizer::expectInlineImage called"
810 812 " when tokenizer is in improper state");
811 813 }
... ... @@ -941,11 +943,40 @@ QPDFTokenizer::readToken(
941 943 bool allow_bad,
942 944 size_t max_len)
943 945 {
944   - qpdf_offset_t offset = input->fastTell();
  946 + nextToken(*input, context, max_len);
  947 +
  948 + Token token;
  949 + bool unread_char;
  950 + char char_to_unread;
  951 + getToken(token, unread_char, char_to_unread);
  952 +
  953 + if (token.getType() == tt_bad) {
  954 + if (allow_bad) {
  955 + QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
  956 + } else {
  957 + throw QPDFExc(
  958 + qpdf_e_damaged_pdf,
  959 + input->getName(),
  960 + context,
  961 + input->getLastOffset(),
  962 + token.getErrorMessage());
  963 + }
  964 + }
  965 + return token;
  966 +}
  967 +
  968 +bool
  969 +QPDFTokenizer::nextToken(
  970 + InputSource& input, std::string const& context, size_t max_len)
  971 +{
  972 + if (this->state != st_inline_image) {
  973 + reset();
  974 + }
  975 + qpdf_offset_t offset = input.fastTell();
945 976  
946 977 while (this->state != st_token_ready) {
947 978 char ch;
948   - if (!input->fastRead(ch)) {
  979 + if (!input.fastRead(ch)) {
949 980 presentEOF();
950 981  
951 982 if ((this->type == tt_eof) && (!this->allow_eof)) {
... ... @@ -954,7 +985,7 @@ QPDFTokenizer::readToken(
954 985 // exercised.
955 986 this->type = tt_bad;
956 987 this->error_message = "unexpected EOF";
957   - offset = input->getLastOffset();
  988 + offset = input.getLastOffset();
958 989 }
959 990 } else {
960 991 handleCharacter(ch);
... ... @@ -976,28 +1007,11 @@ QPDFTokenizer::readToken(
976 1007 }
977 1008 }
978 1009  
979   - Token token;
980   - bool unread_char;
981   - char char_to_unread;
982   - getToken(token, unread_char, char_to_unread);
983   - input->fastUnread(unread_char);
984   -
985   - if (token.getType() != tt_eof) {
986   - input->setLastOffset(offset);
987   - }
  1010 + input.fastUnread(!this->in_token && !this->before_token);
988 1011  
989   - if (token.getType() == tt_bad) {
990   - if (allow_bad) {
991   - QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
992   - } else {
993   - throw QPDFExc(
994   - qpdf_e_damaged_pdf,
995   - input->getName(),
996   - context,
997   - offset,
998   - token.getErrorMessage());
999   - }
  1012 + if (this->type != tt_eof) {
  1013 + input.setLastOffset(offset);
1000 1014 }
1001 1015  
1002   - return token;
  1016 + return this->error_message.empty();
1003 1017 }
... ...