Commit f3e267fce28c58039789379ba3488ad12c20a7f6

Authored by Jay Berkenbilt
1 parent 15eaed5c

Move readToken from QPDF to QPDFTokenizer

include/qpdf/QPDFTokenizer.hh
... ... @@ -10,6 +10,8 @@
10 10  
11 11 #include <qpdf/DLL.h>
12 12  
  13 +#include <qpdf/InputSource.hh>
  14 +#include <qpdf/PointerHolder.hh>
13 15 #include <string>
14 16 #include <stdio.h>
15 17  
... ... @@ -122,6 +124,13 @@ class QPDFTokenizer
122 124 QPDF_DLL
123 125 bool betweenTokens();
124 126  
  127 + // Read a token from an input source. Context describes the
  128 + // context in which the token is being read and is used in the
  129 + // exception thrown if there is an error.
  130 + QPDF_DLL
  131 + Token readToken(PointerHolder<InputSource> input,
  132 + std::string const& context);
  133 +
125 134 private:
126 135 void reset();
127 136  
... ...
libqpdf/QPDF.cc
... ... @@ -1419,45 +1419,7 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input,
1419 1419 QPDFTokenizer::Token
1420 1420 QPDF::readToken(PointerHolder<InputSource> input)
1421 1421 {
1422   - qpdf_offset_t offset = input->tell();
1423   - QPDFTokenizer::Token token;
1424   - bool unread_char;
1425   - char char_to_unread;
1426   - while (! this->tokenizer.getToken(token, unread_char, char_to_unread))
1427   - {
1428   - char ch;
1429   - if (input->read(&ch, 1) == 0)
1430   - {
1431   - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
1432   - this->last_object_description, offset,
1433   - "EOF while reading token");
1434   - }
1435   - else
1436   - {
1437   - if (isspace((unsigned char)ch) &&
1438   - (input->getLastOffset() == offset))
1439   - {
1440   - ++offset;
1441   - }
1442   - this->tokenizer.presentCharacter(ch);
1443   - }
1444   - }
1445   -
1446   - if (unread_char)
1447   - {
1448   - input->unreadCh(char_to_unread);
1449   - }
1450   -
1451   - if (token.getType() == QPDFTokenizer::tt_bad)
1452   - {
1453   - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
1454   - this->last_object_description, offset,
1455   - token.getErrorMessage());
1456   - }
1457   -
1458   - input->setLastOffset(offset);
1459   -
1460   - return token;
  1422 + return this->tokenizer.readToken(input, this->last_object_description);
1461 1423 }
1462 1424  
1463 1425 QPDFObjectHandle
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -6,6 +6,7 @@
6 6  
7 7 #include <qpdf/PCRE.hh>
8 8 #include <qpdf/QTC.hh>
  9 +#include <qpdf/QPDFExc.hh>
9 10  
10 11 #include <stdexcept>
11 12 #include <string.h>
... ... @@ -15,6 +16,10 @@ static bool is_hex_digit(char ch)
15 16 {
16 17 return (strchr("0123456789abcdefABCDEF", ch) != 0);
17 18 }
  19 +static bool is_space(char ch)
  20 +{
  21 + return (strchr(" \f\n\r\t\v", ch) != 0);
  22 +}
18 23  
19 24 QPDFTokenizer::QPDFTokenizer() :
20 25 pound_special_in_name(true)
... ... @@ -460,3 +465,47 @@ QPDFTokenizer::betweenTokens()
460 465 {
461 466 return ((state == st_top) || (state == st_in_comment));
462 467 }
  468 +
  469 +QPDFTokenizer::Token
  470 +QPDFTokenizer::readToken(PointerHolder<InputSource> input,
  471 + std::string const& context)
  472 +{
  473 + qpdf_offset_t offset = input->tell();
  474 + Token token;
  475 + bool unread_char;
  476 + char char_to_unread;
  477 + while (! getToken(token, unread_char, char_to_unread))
  478 + {
  479 + char ch;
  480 + if (input->read(&ch, 1) == 0)
  481 + {
  482 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  483 + context, offset,
  484 + "EOF while reading token");
  485 + }
  486 + else
  487 + {
  488 + if (is_space((unsigned char)ch) &&
  489 + (input->getLastOffset() == offset))
  490 + {
  491 + ++offset;
  492 + }
  493 + presentCharacter(ch);
  494 + }
  495 + }
  496 +
  497 + if (unread_char)
  498 + {
  499 + input->unreadCh(char_to_unread);
  500 + }
  501 +
  502 + if (token.getType() == tt_bad)
  503 + {
  504 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  505 + context, offset, token.getErrorMessage());
  506 + }
  507 +
  508 + input->setLastOffset(offset);
  509 +
  510 + return token;
  511 +}
... ...