Use inline image token type in tokenizer filter

Jay Berkenbilt
1 parent 5b5f45e9
Showing 3 changed files with 15 additions and 59 deletions
libqpdf/Pl_QPDFTokenizer.cc
libqpdf/qpdf/Pl_QPDFTokenizer.hh
qpdf/qpdf.testcov
@@ -12,10 +12,8 @@ Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
     just_wrote_nl(false),
     last_char_was_cr(false),
     unread_char(false),
-    char_to_unread('\0'),
-    in_inline_image(false)
+    char_to_unread('\0')
 {
-    memset(this->image_buf, 0, IMAGE_BUF_SIZE);
 }
  
 Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
@@ -56,37 +54,6 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token&amp; token)
 void
 Pl_QPDFTokenizer::processChar(char ch)
 {
-    if (this->in_inline_image)
-    {
-	// Scan through the input looking for EI surrounded by
-	// whitespace.  If that pattern appears in the inline image's
-	// representation, we're hosed, but this situation seems
-	// excessively unlikely, and this code path is only followed
-	// during content stream normalization, which is pretty much
-	// used for debugging and human inspection of PDF files.
-	memmove(this->image_buf,
-		this->image_buf + 1,
-		IMAGE_BUF_SIZE - 1);
-	this->image_buf[IMAGE_BUF_SIZE - 1] = ch;
-	if (strchr(" \t\n\v\f\r", this->image_buf[0]) &&
-	    (this->image_buf[1] == 'E') &&
-	    (this->image_buf[2] == 'I') &&
-	    strchr(" \t\n\v\f\r", this->image_buf[3]))
-	{
-	    // We've found an EI operator.  We've already written the
-	    // EI operator to output; terminate with a newline
-	    // character and resume normal processing.
-	    writeNext("\n", 1);
-	    this->in_inline_image = false;
-	    QTC::TC("qpdf", "Pl_QPDFTokenizer found EI");
-	}
-	else
-	{
-	    writeNext(&ch, 1);
-	}
-	return;
-    }
-
     tokenizer.presentCharacter(ch);
     QPDFTokenizer::Token token;
     if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
@@ -100,13 +67,8 @@ Pl_QPDFTokenizer::processChar(char ch)
 	if ((token.getType() == QPDFTokenizer::tt_word) &&
 	    (token.getValue() == "ID"))
 	{
-	    // Suspend normal scanning until we find an EI token.
-	    this->in_inline_image = true;
-	    if (this->unread_char)
-	    {
-		writeNext(&this->char_to_unread, 1);
-		this->unread_char = false;
-	    }
+            QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
+            tokenizer.expectInlineImage();
 	}
     }
     else
@@ -171,21 +133,18 @@ void
 Pl_QPDFTokenizer::finish()
 {
     this->tokenizer.presentEOF();
-    if (! this->in_inline_image)
+    QPDFTokenizer::Token token;
+    if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
     {
-	QPDFTokenizer::Token token;
-	if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
-	{
-	    writeToken(token);
-	    if (unread_char)
-	    {
-		if (this->char_to_unread == '\r')
-		{
-		    this->char_to_unread = '\n';
-		}
-		writeNext(&this->char_to_unread, 1);
-	    }
-	}
+        writeToken(token);
+        if (unread_char)
+        {
+            if (this->char_to_unread == '\r')
+            {
+                this->char_to_unread = '\n';
+            }
+            writeNext(&this->char_to_unread, 1);
+        }
     }
     if (! this->just_wrote_nl)
     {
@@ -33,9 +33,6 @@ class Pl_QPDFTokenizer: public Pipeline
     bool last_char_was_cr;
     bool unread_char;
     char char_to_unread;
-    bool in_inline_image;
-    static int const IMAGE_BUF_SIZE = 4; // must be >= 4
-    char image_buf[IMAGE_BUF_SIZE];
 };
  
 #endif // __PL_QPDFTOKENIZER_HH__
@@ -182,7 +182,6 @@ QPDF_Stream getRawStreamData 0
 QPDF_Stream getStreamData 0
 QPDF_Stream expand filter abbreviation 0
 qpdf-c called qpdf_read_memory 0
-Pl_QPDFTokenizer found EI 0
 QPDF stream without newline 0
 QPDF stream with CR only 0
 QPDF stream with CRNL 0
@@ -304,3 +303,4 @@ qpdf-c called qpdf_set_newline_before_endstream 0
 QPDF_Stream TIFF predictor 0
 QPDFTokenizer EOF when not allowed 0
 QPDFTokenizer inline image at EOF 0
+Pl_QPDFTokenizer found ID 0