Move readToken from QPDF to QPDFTokenizer

Jay Berkenbilt
1 parent 15eaed5c
Showing 3 changed files with 59 additions and 39 deletions
include/qpdf/QPDFTokenizer.hh
libqpdf/QPDF.cc
libqpdf/QPDFTokenizer.cc
@@ -10,6 +10,8 @@
 #include <qpdf/DLL.h>
+#include <qpdf/InputSource.hh>
+#include <qpdf/PointerHolder.hh>
 #include <string>
 #include <stdio.h>
@@ -122,6 +124,13 @@ class QPDFTokenizer
     QPDF_DLL
     bool betweenTokens();
+    // Read a token from an input source.  Context describes the
+    // context in which the token is being read and is used in the
+    // exception thrown if there is an error.
+    QPDF_DLL
+    Token readToken(PointerHolder<InputSource> input,
+                    std::string const& context);
+
   private:
     void reset();
@@ -1419,45 +1419,7 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input,
 QPDFTokenizer::Token
 QPDF::readToken(PointerHolder<InputSource> input)
 {
-    qpdf_offset_t offset = input->tell();
-    QPDFTokenizer::Token token;
-    bool unread_char;
-    char char_to_unread;
-    while (! this->tokenizer.getToken(token, unread_char, char_to_unread))
-    {
-	char ch;
-	if (input->read(&ch, 1) == 0)
-	{
-	    throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
-			  this->last_object_description, offset,
-			  "EOF while reading token");
-	}
-	else
-	{
-	    if (isspace((unsigned char)ch) &&
-		(input->getLastOffset() == offset))
-	    {
-		++offset;
-	    }
-	    this->tokenizer.presentCharacter(ch);
-	}
-    }
-
-    if (unread_char)
-    {
-	input->unreadCh(char_to_unread);
-    }
-
-    if (token.getType() == QPDFTokenizer::tt_bad)
-    {
-	throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
-		      this->last_object_description, offset,
-		      token.getErrorMessage());
-    }
-
-    input->setLastOffset(offset);
-
-    return token;
+    return this->tokenizer.readToken(input, this->last_object_description);
 }
 QPDFObjectHandle
@@ -6,6 +6,7 @@
 #include <qpdf/PCRE.hh>
 #include <qpdf/QTC.hh>
+#include <qpdf/QPDFExc.hh>
 #include <stdexcept>
 #include <string.h>
@@ -15,6 +16,10 @@ static bool is_hex_digit(char ch)
 {
     return (strchr("0123456789abcdefABCDEF", ch) != 0);
 }
+static bool is_space(char ch)
+{
+    return (strchr(" \f\n\r\t\v", ch) != 0);
+}
 QPDFTokenizer::QPDFTokenizer() :
     pound_special_in_name(true)
@@ -460,3 +465,47 @@ QPDFTokenizer::betweenTokens()
 {
     return ((state == st_top) || (state == st_in_comment));
 }
+
+QPDFTokenizer::Token
+QPDFTokenizer::readToken(PointerHolder<InputSource> input,
+                         std::string const& context)
+{
+    qpdf_offset_t offset = input->tell();
+    Token token;
+    bool unread_char;
+    char char_to_unread;
+    while (! getToken(token, unread_char, char_to_unread))
+    {
+	char ch;
+	if (input->read(&ch, 1) == 0)
+	{
+	    throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+			  context, offset,
+			  "EOF while reading token");
+	}
+	else
+	{
+	    if (is_space((unsigned char)ch) &&
+		(input->getLastOffset() == offset))
+	    {
+		++offset;
+	    }
+	    presentCharacter(ch);
+	}
+    }
+
+    if (unread_char)
+    {
+	input->unreadCh(char_to_unread);
+    }
+
+    if (token.getType() == tt_bad)
+    {
+	throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
+		      context, offset, token.getErrorMessage());
+    }
+
+    input->setLastOffset(offset);
+
+    return token;
+}