Commit f3e267fce28c58039789379ba3488ad12c20a7f6

Authored by Jay Berkenbilt
1 parent 15eaed5c

Move readToken from QPDF to QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
@@ -10,6 +10,8 @@ @@ -10,6 +10,8 @@
10 10
11 #include <qpdf/DLL.h> 11 #include <qpdf/DLL.h>
12 12
  13 +#include <qpdf/InputSource.hh>
  14 +#include <qpdf/PointerHolder.hh>
13 #include <string> 15 #include <string>
14 #include <stdio.h> 16 #include <stdio.h>
15 17
@@ -122,6 +124,13 @@ class QPDFTokenizer @@ -122,6 +124,13 @@ class QPDFTokenizer
122 QPDF_DLL 124 QPDF_DLL
123 bool betweenTokens(); 125 bool betweenTokens();
124 126
  127 + // Read a token from an input source. Context describes the
  128 + // context in which the token is being read and is used in the
  129 + // exception thrown if there is an error.
  130 + QPDF_DLL
  131 + Token readToken(PointerHolder<InputSource> input,
  132 + std::string const& context);
  133 +
125 private: 134 private:
126 void reset(); 135 void reset();
127 136
libqpdf/QPDF.cc
@@ -1419,45 +1419,7 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input, @@ -1419,45 +1419,7 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input,
1419 QPDFTokenizer::Token 1419 QPDFTokenizer::Token
1420 QPDF::readToken(PointerHolder<InputSource> input) 1420 QPDF::readToken(PointerHolder<InputSource> input)
1421 { 1421 {
1422 - qpdf_offset_t offset = input->tell();  
1423 - QPDFTokenizer::Token token;  
1424 - bool unread_char;  
1425 - char char_to_unread;  
1426 - while (! this->tokenizer.getToken(token, unread_char, char_to_unread))  
1427 - {  
1428 - char ch;  
1429 - if (input->read(&ch, 1) == 0)  
1430 - {  
1431 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1432 - this->last_object_description, offset,  
1433 - "EOF while reading token");  
1434 - }  
1435 - else  
1436 - {  
1437 - if (isspace((unsigned char)ch) &&  
1438 - (input->getLastOffset() == offset))  
1439 - {  
1440 - ++offset;  
1441 - }  
1442 - this->tokenizer.presentCharacter(ch);  
1443 - }  
1444 - }  
1445 -  
1446 - if (unread_char)  
1447 - {  
1448 - input->unreadCh(char_to_unread);  
1449 - }  
1450 -  
1451 - if (token.getType() == QPDFTokenizer::tt_bad)  
1452 - {  
1453 - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),  
1454 - this->last_object_description, offset,  
1455 - token.getErrorMessage());  
1456 - }  
1457 -  
1458 - input->setLastOffset(offset);  
1459 -  
1460 - return token; 1422 + return this->tokenizer.readToken(input, this->last_object_description);
1461 } 1423 }
1462 1424
1463 QPDFObjectHandle 1425 QPDFObjectHandle
libqpdf/QPDFTokenizer.cc
@@ -6,6 +6,7 @@ @@ -6,6 +6,7 @@
6 6
7 #include <qpdf/PCRE.hh> 7 #include <qpdf/PCRE.hh>
8 #include <qpdf/QTC.hh> 8 #include <qpdf/QTC.hh>
  9 +#include <qpdf/QPDFExc.hh>
9 10
10 #include <stdexcept> 11 #include <stdexcept>
11 #include <string.h> 12 #include <string.h>
@@ -15,6 +16,10 @@ static bool is_hex_digit(char ch) @@ -15,6 +16,10 @@ static bool is_hex_digit(char ch)
15 { 16 {
16 return (strchr("0123456789abcdefABCDEF", ch) != 0); 17 return (strchr("0123456789abcdefABCDEF", ch) != 0);
17 } 18 }
  19 +static bool is_space(char ch)
  20 +{
  21 + return (strchr(" \f\n\r\t\v", ch) != 0);
  22 +}
18 23
19 QPDFTokenizer::QPDFTokenizer() : 24 QPDFTokenizer::QPDFTokenizer() :
20 pound_special_in_name(true) 25 pound_special_in_name(true)
@@ -460,3 +465,47 @@ QPDFTokenizer::betweenTokens() @@ -460,3 +465,47 @@ QPDFTokenizer::betweenTokens()
460 { 465 {
461 return ((state == st_top) || (state == st_in_comment)); 466 return ((state == st_top) || (state == st_in_comment));
462 } 467 }
  468 +
  469 +QPDFTokenizer::Token
  470 +QPDFTokenizer::readToken(PointerHolder<InputSource> input,
  471 + std::string const& context)
  472 +{
  473 + qpdf_offset_t offset = input->tell();
  474 + Token token;
  475 + bool unread_char;
  476 + char char_to_unread;
  477 + while (! getToken(token, unread_char, char_to_unread))
  478 + {
  479 + char ch;
  480 + if (input->read(&ch, 1) == 0)
  481 + {
  482 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  483 + context, offset,
  484 + "EOF while reading token");
  485 + }
  486 + else
  487 + {
  488 + if (is_space((unsigned char)ch) &&
  489 + (input->getLastOffset() == offset))
  490 + {
  491 + ++offset;
  492 + }
  493 + presentCharacter(ch);
  494 + }
  495 + }
  496 +
  497 + if (unread_char)
  498 + {
  499 + input->unreadCh(char_to_unread);
  500 + }
  501 +
  502 + if (token.getType() == tt_bad)
  503 + {
  504 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  505 + context, offset, token.getErrorMessage());
  506 + }
  507 +
  508 + input->setLastOffset(offset);
  509 +
  510 + return token;
  511 +}