Commit 69a5fb70472a2112d2bbb938ee3718250d2364cf
1 parent
13ef50cd
Add methods InputSource::fastRead, fastUnRead and fastTell
Provide buffered input for QPDFTokenizer.
Showing
2 changed files
with
71 additions
and
5 deletions
include/qpdf/InputSource.hh
| ... | ... | @@ -93,6 +93,12 @@ class QPDF_DLL_CLASS InputSource |
| 93 | 93 | // efficient. |
| 94 | 94 | virtual void unreadCh(char ch) = 0; |
| 95 | 95 | |
| 96 | + // The following methods are for use by QPDFTokenizer | |
| 97 | + inline qpdf_offset_t fastTell(); | |
| 98 | + inline bool fastRead(char&); | |
| 99 | + inline void fastUnread(bool); | |
| 100 | + inline void loadBuffer(); | |
| 101 | + | |
| 96 | 102 | protected: |
| 97 | 103 | qpdf_offset_t last_offset; |
| 98 | 104 | |
| ... | ... | @@ -111,6 +117,68 @@ class QPDF_DLL_CLASS InputSource |
| 111 | 117 | }; |
| 112 | 118 | |
| 113 | 119 | std::shared_ptr<Members> m; |
| 120 | + | |
| 121 | + // State for fast... methods | |
| 122 | + static const qpdf_offset_t buf_size = 128; | |
| 123 | + char buffer[buf_size]; | |
| 124 | + qpdf_offset_t buf_len = 0; | |
| 125 | + qpdf_offset_t buf_idx = 0; | |
| 126 | + qpdf_offset_t buf_start = 0; | |
| 114 | 127 | }; |
| 115 | 128 | |
| 129 | +inline void | |
| 130 | +InputSource::loadBuffer() | |
| 131 | +{ | |
| 132 | + this->buf_idx = 0; | |
| 133 | + this->buf_len = qpdf_offset_t(read(this->buffer, this->buf_size)); | |
| 134 | + // NB read sets last_offset | |
| 135 | + this->buf_start = this->last_offset; | |
| 136 | +} | |
| 137 | + | |
| 138 | +inline qpdf_offset_t | |
| 139 | +InputSource::fastTell() | |
| 140 | +{ | |
| 141 | + if (this->buf_len == 0) { | |
| 142 | + loadBuffer(); | |
| 143 | + } else { | |
| 144 | + auto curr = tell(); | |
| 145 | + if (curr < this->buf_start || | |
| 146 | + curr >= (this->buf_start + this->buf_len)) { | |
| 147 | + loadBuffer(); | |
| 148 | + } else { | |
| 149 | + this->last_offset = curr; | |
| 150 | + this->buf_idx = curr - this->buf_start; | |
| 151 | + } | |
| 152 | + } | |
| 153 | + return this->last_offset; | |
| 154 | +} | |
| 155 | + | |
| 156 | +inline bool | |
| 157 | +InputSource::fastRead(char& ch) | |
| 158 | +{ | |
| 159 | + // Before calling fastRead, fastTell must be called to prepare the buffer. | |
| 160 | + // Once reading is complete, fastUnread must be called to set the correct | |
| 161 | + // file position. | |
| 162 | + if (this->buf_idx < this->buf_len) { | |
| 163 | + ch = this->buffer[this->buf_idx]; | |
| 164 | + ++(this->buf_idx); | |
| 165 | + ++(this->last_offset); | |
| 166 | + return true; | |
| 167 | + | |
| 168 | + } else if (this->buf_len == 0) { | |
| 169 | + return false; | |
| 170 | + } else { | |
| 171 | + seek(this->buf_start + this->buf_len, SEEK_SET); | |
| 172 | + fastTell(); | |
| 173 | + return fastRead(ch); | |
| 174 | + } | |
| 175 | +} | |
| 176 | + | |
| 177 | +inline void | |
| 178 | +InputSource::fastUnread(bool back) | |
| 179 | +{ | |
| 180 | + this->last_offset -= back ? 1 : 0; | |
| 181 | + seek(this->last_offset, SEEK_SET); | |
| 182 | +} | |
| 183 | + | |
| 116 | 184 | #endif // QPDF_INPUTSOURCE_HH | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -974,11 +974,11 @@ QPDFTokenizer::readToken( |
| 974 | 974 | bool allow_bad, |
| 975 | 975 | size_t max_len) |
| 976 | 976 | { |
| 977 | - qpdf_offset_t offset = input->tell(); | |
| 977 | + qpdf_offset_t offset = input->fastTell(); | |
| 978 | 978 | |
| 979 | 979 | while (this->state != st_token_ready) { |
| 980 | 980 | char ch; |
| 981 | - if (input->read(&ch, 1) == 0) { | |
| 981 | + if (!input->fastRead(ch)) { | |
| 982 | 982 | presentEOF(); |
| 983 | 983 | |
| 984 | 984 | if ((this->type == tt_eof) && (!this->allow_eof)) { |
| ... | ... | @@ -1013,9 +1013,7 @@ QPDFTokenizer::readToken( |
| 1013 | 1013 | bool unread_char; |
| 1014 | 1014 | char char_to_unread; |
| 1015 | 1015 | getToken(token, unread_char, char_to_unread); |
| 1016 | - if (unread_char) { | |
| 1017 | - input->unreadCh(char_to_unread); | |
| 1018 | - } | |
| 1016 | + input->fastUnread(unread_char); | |
| 1019 | 1017 | |
| 1020 | 1018 | if (token.getType() != tt_eof) { |
| 1021 | 1019 | input->setLastOffset(offset); | ... | ... |