Commit 69a5fb70472a2112d2bbb938ee3718250d2364cf

Authored by m-holger
1 parent 13ef50cd

Add methods InputSource::fastRead, fastUnRead and fastTell

Provide buffered input for QPDFTokenizer.
include/qpdf/InputSource.hh
... ... @@ -93,6 +93,12 @@ class QPDF_DLL_CLASS InputSource
93 93 // efficient.
94 94 virtual void unreadCh(char ch) = 0;
95 95  
  96 + // The following methods are for use by QPDFTokenizer
  97 + inline qpdf_offset_t fastTell();
  98 + inline bool fastRead(char&);
  99 + inline void fastUnread(bool);
  100 + inline void loadBuffer();
  101 +
96 102 protected:
97 103 qpdf_offset_t last_offset;
98 104  
... ... @@ -111,6 +117,68 @@ class QPDF_DLL_CLASS InputSource
111 117 };
112 118  
113 119 std::shared_ptr<Members> m;
  120 +
  121 + // State for fast... methods
  122 + static const qpdf_offset_t buf_size = 128;
  123 + char buffer[buf_size];
  124 + qpdf_offset_t buf_len = 0;
  125 + qpdf_offset_t buf_idx = 0;
  126 + qpdf_offset_t buf_start = 0;
114 127 };
115 128  
  129 +inline void
  130 +InputSource::loadBuffer()
  131 +{
  132 + this->buf_idx = 0;
  133 + this->buf_len = qpdf_offset_t(read(this->buffer, this->buf_size));
  134 + // NB read sets last_offset
  135 + this->buf_start = this->last_offset;
  136 +}
  137 +
  138 +inline qpdf_offset_t
  139 +InputSource::fastTell()
  140 +{
  141 + if (this->buf_len == 0) {
  142 + loadBuffer();
  143 + } else {
  144 + auto curr = tell();
  145 + if (curr < this->buf_start ||
  146 + curr >= (this->buf_start + this->buf_len)) {
  147 + loadBuffer();
  148 + } else {
  149 + this->last_offset = curr;
  150 + this->buf_idx = curr - this->buf_start;
  151 + }
  152 + }
  153 + return this->last_offset;
  154 +}
  155 +
  156 +inline bool
  157 +InputSource::fastRead(char& ch)
  158 +{
  159 + // Before calling fastRead, fastTell must be called to prepare the buffer.
  160 + // Once reading is complete, fastUnread must be called to set the correct
  161 + // file position.
  162 + if (this->buf_idx < this->buf_len) {
  163 + ch = this->buffer[this->buf_idx];
  164 + ++(this->buf_idx);
  165 + ++(this->last_offset);
  166 + return true;
  167 +
  168 + } else if (this->buf_len == 0) {
  169 + return false;
  170 + } else {
  171 + seek(this->buf_start + this->buf_len, SEEK_SET);
  172 + fastTell();
  173 + return fastRead(ch);
  174 + }
  175 +}
  176 +
  177 +inline void
  178 +InputSource::fastUnread(bool back)
  179 +{
  180 + this->last_offset -= back ? 1 : 0;
  181 + seek(this->last_offset, SEEK_SET);
  182 +}
  183 +
116 184 #endif // QPDF_INPUTSOURCE_HH
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -974,11 +974,11 @@ QPDFTokenizer::readToken(
974 974 bool allow_bad,
975 975 size_t max_len)
976 976 {
977   - qpdf_offset_t offset = input->tell();
  977 + qpdf_offset_t offset = input->fastTell();
978 978  
979 979 while (this->state != st_token_ready) {
980 980 char ch;
981   - if (input->read(&ch, 1) == 0) {
  981 + if (!input->fastRead(ch)) {
982 982 presentEOF();
983 983  
984 984 if ((this->type == tt_eof) && (!this->allow_eof)) {
... ... @@ -1013,9 +1013,7 @@ QPDFTokenizer::readToken(
1013 1013 bool unread_char;
1014 1014 char char_to_unread;
1015 1015 getToken(token, unread_char, char_to_unread);
1016   - if (unread_char) {
1017   - input->unreadCh(char_to_unread);
1018   - }
  1016 + input->fastUnread(unread_char);
1019 1017  
1020 1018 if (token.getType() != tt_eof) {
1021 1019 input->setLastOffset(offset);
... ...