Commit c08bb0ec0208683ecd0ad89c3d46d4d66b938fbd
1 parent
cef6425b
Remove QPDFTokenizer::Members
Showing
2 changed files
with
201 additions
and
233 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -211,42 +211,29 @@ class QPDFTokenizer | @@ -211,42 +211,29 @@ class QPDFTokenizer | ||
| 211 | st_token_ready | 211 | st_token_ready |
| 212 | }; | 212 | }; |
| 213 | 213 | ||
| 214 | - class Members | ||
| 215 | - { | ||
| 216 | - friend class QPDFTokenizer; | ||
| 217 | - | ||
| 218 | - public: | ||
| 219 | - QPDF_DLL | ||
| 220 | - ~Members() = default; | ||
| 221 | - | ||
| 222 | - private: | ||
| 223 | - Members(); | ||
| 224 | - Members(Members const&) = delete; | ||
| 225 | - void reset(); | ||
| 226 | - | ||
| 227 | - // Lexer state | ||
| 228 | - state_e state; | ||
| 229 | - | ||
| 230 | - bool allow_eof; | ||
| 231 | - bool include_ignorable; | ||
| 232 | - | ||
| 233 | - // Current token accumulation | ||
| 234 | - token_type_e type; | ||
| 235 | - std::string val; | ||
| 236 | - std::string raw_val; | ||
| 237 | - std::string error_message; | ||
| 238 | - bool unread_char; | ||
| 239 | - char char_to_unread; | ||
| 240 | - size_t inline_image_bytes; | ||
| 241 | - | ||
| 242 | - // State for strings | ||
| 243 | - int string_depth; | ||
| 244 | - bool string_ignoring_newline; | ||
| 245 | - char bs_num_register[4]; | ||
| 246 | - bool last_char_was_bs; | ||
| 247 | - bool last_char_was_cr; | ||
| 248 | - }; | ||
| 249 | - std::shared_ptr<Members> m; | 214 | + void reset(); |
| 215 | + | ||
| 216 | + // Lexer state | ||
| 217 | + state_e state; | ||
| 218 | + | ||
| 219 | + bool allow_eof; | ||
| 220 | + bool include_ignorable; | ||
| 221 | + | ||
| 222 | + // Current token accumulation | ||
| 223 | + token_type_e type; | ||
| 224 | + std::string val; | ||
| 225 | + std::string raw_val; | ||
| 226 | + std::string error_message; | ||
| 227 | + bool unread_char; | ||
| 228 | + char char_to_unread; | ||
| 229 | + size_t inline_image_bytes; | ||
| 230 | + | ||
| 231 | + // State for strings | ||
| 232 | + int string_depth; | ||
| 233 | + bool string_ignoring_newline; | ||
| 234 | + char bs_num_register[4]; | ||
| 235 | + bool last_char_was_bs; | ||
| 236 | + bool last_char_was_cr; | ||
| 250 | }; | 237 | }; |
| 251 | 238 | ||
| 252 | #endif // QPDFTOKENIZER_HH | 239 | #endif // QPDFTOKENIZER_HH |
libqpdf/QPDFTokenizer.cc
| @@ -73,15 +73,8 @@ QPDFWordTokenFinder::check() | @@ -73,15 +73,8 @@ QPDFWordTokenFinder::check() | ||
| 73 | return true; | 73 | return true; |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | -QPDFTokenizer::Members::Members() : | ||
| 77 | - allow_eof(false), | ||
| 78 | - include_ignorable(false) | ||
| 79 | -{ | ||
| 80 | - reset(); | ||
| 81 | -} | ||
| 82 | - | ||
| 83 | void | 76 | void |
| 84 | -QPDFTokenizer::Members::reset() | 77 | +QPDFTokenizer::reset() |
| 85 | { | 78 | { |
| 86 | state = st_top; | 79 | state = st_top; |
| 87 | type = tt_bad; | 80 | type = tt_bad; |
| @@ -110,20 +103,22 @@ QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : | @@ -110,20 +103,22 @@ QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : | ||
| 110 | } | 103 | } |
| 111 | 104 | ||
| 112 | QPDFTokenizer::QPDFTokenizer() : | 105 | QPDFTokenizer::QPDFTokenizer() : |
| 113 | - m(new Members()) | 106 | + allow_eof(false), |
| 107 | + include_ignorable(false) | ||
| 114 | { | 108 | { |
| 109 | + reset(); | ||
| 115 | } | 110 | } |
| 116 | 111 | ||
| 117 | void | 112 | void |
| 118 | QPDFTokenizer::allowEOF() | 113 | QPDFTokenizer::allowEOF() |
| 119 | { | 114 | { |
| 120 | - this->m->allow_eof = true; | 115 | + this->allow_eof = true; |
| 121 | } | 116 | } |
| 122 | 117 | ||
| 123 | void | 118 | void |
| 124 | QPDFTokenizer::includeIgnorable() | 119 | QPDFTokenizer::includeIgnorable() |
| 125 | { | 120 | { |
| 126 | - this->m->include_ignorable = true; | 121 | + this->include_ignorable = true; |
| 127 | } | 122 | } |
| 128 | 123 | ||
| 129 | bool | 124 | bool |
| @@ -141,28 +136,27 @@ QPDFTokenizer::isDelimiter(char ch) | @@ -141,28 +136,27 @@ QPDFTokenizer::isDelimiter(char ch) | ||
| 141 | void | 136 | void |
| 142 | QPDFTokenizer::resolveLiteral() | 137 | QPDFTokenizer::resolveLiteral() |
| 143 | { | 138 | { |
| 144 | - if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/')) { | ||
| 145 | - this->m->type = tt_name; | 139 | + if ((this->val.length() > 0) && (this->val.at(0) == '/')) { |
| 140 | + this->type = tt_name; | ||
| 146 | // Deal with # in name token. Note: '/' by itself is a | 141 | // Deal with # in name token. Note: '/' by itself is a |
| 147 | // valid name, so don't strip leading /. That way we | 142 | // valid name, so don't strip leading /. That way we |
| 148 | // don't have to deal with the empty string as a name. | 143 | // don't have to deal with the empty string as a name. |
| 149 | std::string nval = "/"; | 144 | std::string nval = "/"; |
| 150 | - size_t len = this->m->val.length(); | 145 | + size_t len = this->val.length(); |
| 151 | for (size_t i = 1; i < len; ++i) { | 146 | for (size_t i = 1; i < len; ++i) { |
| 152 | - char ch = this->m->val.at(i); | 147 | + char ch = this->val.at(i); |
| 153 | if (ch == '#') { | 148 | if (ch == '#') { |
| 154 | - if ((i + 2 < len) && | ||
| 155 | - QUtil::is_hex_digit(this->m->val.at(i + 1)) && | ||
| 156 | - QUtil::is_hex_digit(this->m->val.at(i + 2))) { | 149 | + if ((i + 2 < len) && QUtil::is_hex_digit(this->val.at(i + 1)) && |
| 150 | + QUtil::is_hex_digit(this->val.at(i + 2))) { | ||
| 157 | char num[3]; | 151 | char num[3]; |
| 158 | - num[0] = this->m->val.at(i + 1); | ||
| 159 | - num[1] = this->m->val.at(i + 2); | 152 | + num[0] = this->val.at(i + 1); |
| 153 | + num[1] = this->val.at(i + 2); | ||
| 160 | num[2] = '\0'; | 154 | num[2] = '\0'; |
| 161 | char ch2 = static_cast<char>(strtol(num, nullptr, 16)); | 155 | char ch2 = static_cast<char>(strtol(num, nullptr, 16)); |
| 162 | if (ch2 == '\0') { | 156 | if (ch2 == '\0') { |
| 163 | - this->m->type = tt_bad; | 157 | + this->type = tt_bad; |
| 164 | QTC::TC("qpdf", "QPDFTokenizer null in name"); | 158 | QTC::TC("qpdf", "QPDFTokenizer null in name"); |
| 165 | - this->m->error_message = | 159 | + this->error_message = |
| 166 | "null character not allowed in name token"; | 160 | "null character not allowed in name token"; |
| 167 | nval += "#00"; | 161 | nval += "#00"; |
| 168 | } else { | 162 | } else { |
| @@ -171,7 +165,7 @@ QPDFTokenizer::resolveLiteral() | @@ -171,7 +165,7 @@ QPDFTokenizer::resolveLiteral() | ||
| 171 | i += 2; | 165 | i += 2; |
| 172 | } else { | 166 | } else { |
| 173 | QTC::TC("qpdf", "QPDFTokenizer bad name"); | 167 | QTC::TC("qpdf", "QPDFTokenizer bad name"); |
| 174 | - this->m->error_message = | 168 | + this->error_message = |
| 175 | "name with stray # will not work with PDF >= 1.2"; | 169 | "name with stray # will not work with PDF >= 1.2"; |
| 176 | // Use null to encode a bad # -- this is reversed | 170 | // Use null to encode a bad # -- this is reversed |
| 177 | // in QPDF_Name::normalizeName. | 171 | // in QPDF_Name::normalizeName. |
| @@ -181,29 +175,29 @@ QPDFTokenizer::resolveLiteral() | @@ -181,29 +175,29 @@ QPDFTokenizer::resolveLiteral() | ||
| 181 | nval.append(1, ch); | 175 | nval.append(1, ch); |
| 182 | } | 176 | } |
| 183 | } | 177 | } |
| 184 | - this->m->val = nval; | ||
| 185 | - } else if (QUtil::is_number(this->m->val.c_str())) { | ||
| 186 | - if (this->m->val.find('.') != std::string::npos) { | ||
| 187 | - this->m->type = tt_real; | 178 | + this->val = nval; |
| 179 | + } else if (QUtil::is_number(this->val.c_str())) { | ||
| 180 | + if (this->val.find('.') != std::string::npos) { | ||
| 181 | + this->type = tt_real; | ||
| 188 | } else { | 182 | } else { |
| 189 | - this->m->type = tt_integer; | 183 | + this->type = tt_integer; |
| 190 | } | 184 | } |
| 191 | - } else if ((this->m->val == "true") || (this->m->val == "false")) { | ||
| 192 | - this->m->type = tt_bool; | ||
| 193 | - } else if (this->m->val == "null") { | ||
| 194 | - this->m->type = tt_null; | 185 | + } else if ((this->val == "true") || (this->val == "false")) { |
| 186 | + this->type = tt_bool; | ||
| 187 | + } else if (this->val == "null") { | ||
| 188 | + this->type = tt_null; | ||
| 195 | } else { | 189 | } else { |
| 196 | // I don't really know what it is, so leave it as tt_word. | 190 | // I don't really know what it is, so leave it as tt_word. |
| 197 | // Lots of cases ($, #, etc.) other than actual words fall | 191 | // Lots of cases ($, #, etc.) other than actual words fall |
| 198 | // into this category, but that's okay at least for now. | 192 | // into this category, but that's okay at least for now. |
| 199 | - this->m->type = tt_word; | 193 | + this->type = tt_word; |
| 200 | } | 194 | } |
| 201 | } | 195 | } |
| 202 | 196 | ||
| 203 | void | 197 | void |
| 204 | QPDFTokenizer::presentCharacter(char ch) | 198 | QPDFTokenizer::presentCharacter(char ch) |
| 205 | { | 199 | { |
| 206 | - if (this->m->state == st_token_ready) { | 200 | + if (this->state == st_token_ready) { |
| 207 | throw std::logic_error( | 201 | throw std::logic_error( |
| 208 | "INTERNAL ERROR: QPDF tokenizer presented character " | 202 | "INTERNAL ERROR: QPDF tokenizer presented character " |
| 209 | "while token is waiting"); | 203 | "while token is waiting"); |
| @@ -216,159 +210,153 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -216,159 +210,153 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 216 | // the character that caused a state change in the new state. | 210 | // the character that caused a state change in the new state. |
| 217 | 211 | ||
| 218 | bool handled = true; | 212 | bool handled = true; |
| 219 | - if (this->m->state == st_top) { | 213 | + if (this->state == st_top) { |
| 220 | // Note: we specifically do not use ctype here. It is | 214 | // Note: we specifically do not use ctype here. It is |
| 221 | // locale-dependent. | 215 | // locale-dependent. |
| 222 | if (isSpace(ch)) { | 216 | if (isSpace(ch)) { |
| 223 | - if (this->m->include_ignorable) { | ||
| 224 | - this->m->state = st_in_space; | ||
| 225 | - this->m->val += ch; | 217 | + if (this->include_ignorable) { |
| 218 | + this->state = st_in_space; | ||
| 219 | + this->val += ch; | ||
| 226 | } | 220 | } |
| 227 | } else if (ch == '%') { | 221 | } else if (ch == '%') { |
| 228 | - this->m->state = st_in_comment; | ||
| 229 | - if (this->m->include_ignorable) { | ||
| 230 | - this->m->val += ch; | 222 | + this->state = st_in_comment; |
| 223 | + if (this->include_ignorable) { | ||
| 224 | + this->val += ch; | ||
| 231 | } | 225 | } |
| 232 | } else if (ch == '(') { | 226 | } else if (ch == '(') { |
| 233 | - this->m->string_depth = 1; | ||
| 234 | - this->m->string_ignoring_newline = false; | ||
| 235 | - memset( | ||
| 236 | - this->m->bs_num_register, | ||
| 237 | - '\0', | ||
| 238 | - sizeof(this->m->bs_num_register)); | ||
| 239 | - this->m->last_char_was_bs = false; | ||
| 240 | - this->m->last_char_was_cr = false; | ||
| 241 | - this->m->state = st_in_string; | 227 | + this->string_depth = 1; |
| 228 | + this->string_ignoring_newline = false; | ||
| 229 | + memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); | ||
| 230 | + this->last_char_was_bs = false; | ||
| 231 | + this->last_char_was_cr = false; | ||
| 232 | + this->state = st_in_string; | ||
| 242 | } else if (ch == '<') { | 233 | } else if (ch == '<') { |
| 243 | - this->m->state = st_lt; | 234 | + this->state = st_lt; |
| 244 | } else if (ch == '>') { | 235 | } else if (ch == '>') { |
| 245 | - this->m->state = st_gt; | 236 | + this->state = st_gt; |
| 246 | } else { | 237 | } else { |
| 247 | - this->m->val += ch; | 238 | + this->val += ch; |
| 248 | if (ch == ')') { | 239 | if (ch == ')') { |
| 249 | - this->m->type = tt_bad; | 240 | + this->type = tt_bad; |
| 250 | QTC::TC("qpdf", "QPDFTokenizer bad )"); | 241 | QTC::TC("qpdf", "QPDFTokenizer bad )"); |
| 251 | - this->m->error_message = "unexpected )"; | ||
| 252 | - this->m->state = st_token_ready; | 242 | + this->error_message = "unexpected )"; |
| 243 | + this->state = st_token_ready; | ||
| 253 | } else if (ch == '[') { | 244 | } else if (ch == '[') { |
| 254 | - this->m->type = tt_array_open; | ||
| 255 | - this->m->state = st_token_ready; | 245 | + this->type = tt_array_open; |
| 246 | + this->state = st_token_ready; | ||
| 256 | } else if (ch == ']') { | 247 | } else if (ch == ']') { |
| 257 | - this->m->type = tt_array_close; | ||
| 258 | - this->m->state = st_token_ready; | 248 | + this->type = tt_array_close; |
| 249 | + this->state = st_token_ready; | ||
| 259 | } else if (ch == '{') { | 250 | } else if (ch == '{') { |
| 260 | - this->m->type = tt_brace_open; | ||
| 261 | - this->m->state = st_token_ready; | 251 | + this->type = tt_brace_open; |
| 252 | + this->state = st_token_ready; | ||
| 262 | } else if (ch == '}') { | 253 | } else if (ch == '}') { |
| 263 | - this->m->type = tt_brace_close; | ||
| 264 | - this->m->state = st_token_ready; | 254 | + this->type = tt_brace_close; |
| 255 | + this->state = st_token_ready; | ||
| 265 | } else { | 256 | } else { |
| 266 | - this->m->state = st_literal; | 257 | + this->state = st_literal; |
| 267 | } | 258 | } |
| 268 | } | 259 | } |
| 269 | - } else if (this->m->state == st_in_space) { | 260 | + } else if (this->state == st_in_space) { |
| 270 | // We only enter this state if include_ignorable is true. | 261 | // We only enter this state if include_ignorable is true. |
| 271 | if (!isSpace(ch)) { | 262 | if (!isSpace(ch)) { |
| 272 | - this->m->type = tt_space; | ||
| 273 | - this->m->unread_char = true; | ||
| 274 | - this->m->char_to_unread = ch; | ||
| 275 | - this->m->state = st_token_ready; | 263 | + this->type = tt_space; |
| 264 | + this->unread_char = true; | ||
| 265 | + this->char_to_unread = ch; | ||
| 266 | + this->state = st_token_ready; | ||
| 276 | } else { | 267 | } else { |
| 277 | - this->m->val += ch; | 268 | + this->val += ch; |
| 278 | } | 269 | } |
| 279 | - } else if (this->m->state == st_in_comment) { | 270 | + } else if (this->state == st_in_comment) { |
| 280 | if ((ch == '\r') || (ch == '\n')) { | 271 | if ((ch == '\r') || (ch == '\n')) { |
| 281 | - if (this->m->include_ignorable) { | ||
| 282 | - this->m->type = tt_comment; | ||
| 283 | - this->m->unread_char = true; | ||
| 284 | - this->m->char_to_unread = ch; | ||
| 285 | - this->m->state = st_token_ready; | 272 | + if (this->include_ignorable) { |
| 273 | + this->type = tt_comment; | ||
| 274 | + this->unread_char = true; | ||
| 275 | + this->char_to_unread = ch; | ||
| 276 | + this->state = st_token_ready; | ||
| 286 | } else { | 277 | } else { |
| 287 | - this->m->state = st_top; | 278 | + this->state = st_top; |
| 288 | } | 279 | } |
| 289 | - } else if (this->m->include_ignorable) { | ||
| 290 | - this->m->val += ch; | 280 | + } else if (this->include_ignorable) { |
| 281 | + this->val += ch; | ||
| 291 | } | 282 | } |
| 292 | - } else if (this->m->state == st_lt) { | 283 | + } else if (this->state == st_lt) { |
| 293 | if (ch == '<') { | 284 | if (ch == '<') { |
| 294 | - this->m->val = "<<"; | ||
| 295 | - this->m->type = tt_dict_open; | ||
| 296 | - this->m->state = st_token_ready; | 285 | + this->val = "<<"; |
| 286 | + this->type = tt_dict_open; | ||
| 287 | + this->state = st_token_ready; | ||
| 297 | } else { | 288 | } else { |
| 298 | handled = false; | 289 | handled = false; |
| 299 | - this->m->state = st_in_hexstring; | 290 | + this->state = st_in_hexstring; |
| 300 | } | 291 | } |
| 301 | - } else if (this->m->state == st_gt) { | 292 | + } else if (this->state == st_gt) { |
| 302 | if (ch == '>') { | 293 | if (ch == '>') { |
| 303 | - this->m->val = ">>"; | ||
| 304 | - this->m->type = tt_dict_close; | ||
| 305 | - this->m->state = st_token_ready; | 294 | + this->val = ">>"; |
| 295 | + this->type = tt_dict_close; | ||
| 296 | + this->state = st_token_ready; | ||
| 306 | } else { | 297 | } else { |
| 307 | - this->m->val = ">"; | ||
| 308 | - this->m->type = tt_bad; | 298 | + this->val = ">"; |
| 299 | + this->type = tt_bad; | ||
| 309 | QTC::TC("qpdf", "QPDFTokenizer bad >"); | 300 | QTC::TC("qpdf", "QPDFTokenizer bad >"); |
| 310 | - this->m->error_message = "unexpected >"; | ||
| 311 | - this->m->unread_char = true; | ||
| 312 | - this->m->char_to_unread = ch; | ||
| 313 | - this->m->state = st_token_ready; | 301 | + this->error_message = "unexpected >"; |
| 302 | + this->unread_char = true; | ||
| 303 | + this->char_to_unread = ch; | ||
| 304 | + this->state = st_token_ready; | ||
| 314 | } | 305 | } |
| 315 | - } else if (this->m->state == st_in_string) { | ||
| 316 | - if (this->m->string_ignoring_newline && (ch != '\n')) { | ||
| 317 | - this->m->string_ignoring_newline = false; | 306 | + } else if (this->state == st_in_string) { |
| 307 | + if (this->string_ignoring_newline && (ch != '\n')) { | ||
| 308 | + this->string_ignoring_newline = false; | ||
| 318 | } | 309 | } |
| 319 | 310 | ||
| 320 | - size_t bs_num_count = strlen(this->m->bs_num_register); | 311 | + size_t bs_num_count = strlen(this->bs_num_register); |
| 321 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | 312 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); |
| 322 | if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { | 313 | if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { |
| 323 | // We've accumulated \ddd. PDF Spec says to ignore | 314 | // We've accumulated \ddd. PDF Spec says to ignore |
| 324 | // high-order overflow. | 315 | // high-order overflow. |
| 325 | - this->m->val += | ||
| 326 | - static_cast<char>(strtol(this->m->bs_num_register, nullptr, 8)); | ||
| 327 | - memset( | ||
| 328 | - this->m->bs_num_register, | ||
| 329 | - '\0', | ||
| 330 | - sizeof(this->m->bs_num_register)); | 316 | + this->val += |
| 317 | + static_cast<char>(strtol(this->bs_num_register, nullptr, 8)); | ||
| 318 | + memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); | ||
| 331 | bs_num_count = 0; | 319 | bs_num_count = 0; |
| 332 | } | 320 | } |
| 333 | 321 | ||
| 334 | - if (this->m->string_ignoring_newline && (ch == '\n')) { | 322 | + if (this->string_ignoring_newline && (ch == '\n')) { |
| 335 | // ignore | 323 | // ignore |
| 336 | - this->m->string_ignoring_newline = false; | 324 | + this->string_ignoring_newline = false; |
| 337 | } else if ( | 325 | } else if ( |
| 338 | - ch_is_octal && (this->m->last_char_was_bs || (bs_num_count > 0))) { | ||
| 339 | - this->m->bs_num_register[bs_num_count++] = ch; | ||
| 340 | - } else if (this->m->last_char_was_bs) { | 326 | + ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) { |
| 327 | + this->bs_num_register[bs_num_count++] = ch; | ||
| 328 | + } else if (this->last_char_was_bs) { | ||
| 341 | switch (ch) { | 329 | switch (ch) { |
| 342 | case 'n': | 330 | case 'n': |
| 343 | - this->m->val += '\n'; | 331 | + this->val += '\n'; |
| 344 | break; | 332 | break; |
| 345 | 333 | ||
| 346 | case 'r': | 334 | case 'r': |
| 347 | - this->m->val += '\r'; | 335 | + this->val += '\r'; |
| 348 | break; | 336 | break; |
| 349 | 337 | ||
| 350 | case 't': | 338 | case 't': |
| 351 | - this->m->val += '\t'; | 339 | + this->val += '\t'; |
| 352 | break; | 340 | break; |
| 353 | 341 | ||
| 354 | case 'b': | 342 | case 'b': |
| 355 | - this->m->val += '\b'; | 343 | + this->val += '\b'; |
| 356 | break; | 344 | break; |
| 357 | 345 | ||
| 358 | case 'f': | 346 | case 'f': |
| 359 | - this->m->val += '\f'; | 347 | + this->val += '\f'; |
| 360 | break; | 348 | break; |
| 361 | 349 | ||
| 362 | case '\n': | 350 | case '\n': |
| 363 | break; | 351 | break; |
| 364 | 352 | ||
| 365 | case '\r': | 353 | case '\r': |
| 366 | - this->m->string_ignoring_newline = true; | 354 | + this->string_ignoring_newline = true; |
| 367 | break; | 355 | break; |
| 368 | 356 | ||
| 369 | default: | 357 | default: |
| 370 | // PDF spec says backslash is ignored before anything else | 358 | // PDF spec says backslash is ignored before anything else |
| 371 | - this->m->val += ch; | 359 | + this->val += ch; |
| 372 | break; | 360 | break; |
| 373 | } | 361 | } |
| 374 | } else if (ch == '\\') { | 362 | } else if (ch == '\\') { |
| @@ -379,28 +367,27 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -379,28 +367,27 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 379 | "when ch == '\\'"); | 367 | "when ch == '\\'"); |
| 380 | } | 368 | } |
| 381 | } else if (ch == '(') { | 369 | } else if (ch == '(') { |
| 382 | - this->m->val += ch; | ||
| 383 | - ++this->m->string_depth; | ||
| 384 | - } else if ((ch == ')') && (--this->m->string_depth == 0)) { | ||
| 385 | - this->m->type = tt_string; | ||
| 386 | - this->m->state = st_token_ready; | 370 | + this->val += ch; |
| 371 | + ++this->string_depth; | ||
| 372 | + } else if ((ch == ')') && (--this->string_depth == 0)) { | ||
| 373 | + this->type = tt_string; | ||
| 374 | + this->state = st_token_ready; | ||
| 387 | } else if (ch == '\r') { | 375 | } else if (ch == '\r') { |
| 388 | // CR by itself is converted to LF | 376 | // CR by itself is converted to LF |
| 389 | - this->m->val += '\n'; | 377 | + this->val += '\n'; |
| 390 | } else if (ch == '\n') { | 378 | } else if (ch == '\n') { |
| 391 | // CR LF is converted to LF | 379 | // CR LF is converted to LF |
| 392 | - if (!this->m->last_char_was_cr) { | ||
| 393 | - this->m->val += ch; | 380 | + if (!this->last_char_was_cr) { |
| 381 | + this->val += ch; | ||
| 394 | } | 382 | } |
| 395 | } else { | 383 | } else { |
| 396 | - this->m->val += ch; | 384 | + this->val += ch; |
| 397 | } | 385 | } |
| 398 | 386 | ||
| 399 | - this->m->last_char_was_cr = | ||
| 400 | - ((!this->m->string_ignoring_newline) && (ch == '\r')); | ||
| 401 | - this->m->last_char_was_bs = | ||
| 402 | - ((!this->m->last_char_was_bs) && (ch == '\\')); | ||
| 403 | - } else if (this->m->state == st_literal) { | 387 | + this->last_char_was_cr = |
| 388 | + ((!this->string_ignoring_newline) && (ch == '\r')); | ||
| 389 | + this->last_char_was_bs = ((!this->last_char_was_bs) && (ch == '\\')); | ||
| 390 | + } else if (this->state == st_literal) { | ||
| 404 | if (isDelimiter(ch)) { | 391 | if (isDelimiter(ch)) { |
| 405 | // A C-locale whitespace character or delimiter terminates | 392 | // A C-locale whitespace character or delimiter terminates |
| 406 | // token. It is important to unread the whitespace | 393 | // token. It is important to unread the whitespace |
| @@ -410,21 +397,21 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -410,21 +397,21 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 410 | // though not on any files in the test suite as of this | 397 | // though not on any files in the test suite as of this |
| 411 | // writing. | 398 | // writing. |
| 412 | 399 | ||
| 413 | - this->m->type = tt_word; | ||
| 414 | - this->m->unread_char = true; | ||
| 415 | - this->m->char_to_unread = ch; | ||
| 416 | - this->m->state = st_token_ready; | 400 | + this->type = tt_word; |
| 401 | + this->unread_char = true; | ||
| 402 | + this->char_to_unread = ch; | ||
| 403 | + this->state = st_token_ready; | ||
| 417 | } else { | 404 | } else { |
| 418 | - this->m->val += ch; | 405 | + this->val += ch; |
| 419 | } | 406 | } |
| 420 | - } else if (this->m->state == st_inline_image) { | ||
| 421 | - this->m->val += ch; | ||
| 422 | - size_t len = this->m->val.length(); | ||
| 423 | - if (len == this->m->inline_image_bytes) { | 407 | + } else if (this->state == st_inline_image) { |
| 408 | + this->val += ch; | ||
| 409 | + size_t len = this->val.length(); | ||
| 410 | + if (len == this->inline_image_bytes) { | ||
| 424 | QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); | 411 | QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); |
| 425 | - this->m->type = tt_inline_image; | ||
| 426 | - this->m->inline_image_bytes = 0; | ||
| 427 | - this->m->state = st_token_ready; | 412 | + this->type = tt_inline_image; |
| 413 | + this->inline_image_bytes = 0; | ||
| 414 | + this->state = st_token_ready; | ||
| 428 | } | 415 | } |
| 429 | } else { | 416 | } else { |
| 430 | handled = false; | 417 | handled = false; |
| @@ -432,83 +419,81 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -432,83 +419,81 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 432 | 419 | ||
| 433 | if (handled) { | 420 | if (handled) { |
| 434 | // okay | 421 | // okay |
| 435 | - } else if (this->m->state == st_in_hexstring) { | 422 | + } else if (this->state == st_in_hexstring) { |
| 436 | if (ch == '>') { | 423 | if (ch == '>') { |
| 437 | - this->m->type = tt_string; | ||
| 438 | - this->m->state = st_token_ready; | ||
| 439 | - if (this->m->val.length() % 2) { | 424 | + this->type = tt_string; |
| 425 | + this->state = st_token_ready; | ||
| 426 | + if (this->val.length() % 2) { | ||
| 440 | // PDF spec says odd hexstrings have implicit | 427 | // PDF spec says odd hexstrings have implicit |
| 441 | // trailing 0. | 428 | // trailing 0. |
| 442 | - this->m->val += '0'; | 429 | + this->val += '0'; |
| 443 | } | 430 | } |
| 444 | char num[3]; | 431 | char num[3]; |
| 445 | num[2] = '\0'; | 432 | num[2] = '\0'; |
| 446 | std::string nval; | 433 | std::string nval; |
| 447 | - for (unsigned int i = 0; i < this->m->val.length(); i += 2) { | ||
| 448 | - num[0] = this->m->val.at(i); | ||
| 449 | - num[1] = this->m->val.at(i + 1); | 434 | + for (unsigned int i = 0; i < this->val.length(); i += 2) { |
| 435 | + num[0] = this->val.at(i); | ||
| 436 | + num[1] = this->val.at(i + 1); | ||
| 450 | char nch = static_cast<char>(strtol(num, nullptr, 16)); | 437 | char nch = static_cast<char>(strtol(num, nullptr, 16)); |
| 451 | nval += nch; | 438 | nval += nch; |
| 452 | } | 439 | } |
| 453 | - this->m->val = nval; | 440 | + this->val = nval; |
| 454 | } else if (QUtil::is_hex_digit(ch)) { | 441 | } else if (QUtil::is_hex_digit(ch)) { |
| 455 | - this->m->val += ch; | 442 | + this->val += ch; |
| 456 | } else if (isSpace(ch)) { | 443 | } else if (isSpace(ch)) { |
| 457 | // ignore | 444 | // ignore |
| 458 | } else { | 445 | } else { |
| 459 | - this->m->type = tt_bad; | 446 | + this->type = tt_bad; |
| 460 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); | 447 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); |
| 461 | - this->m->error_message = | 448 | + this->error_message = |
| 462 | std::string("invalid character (") + ch + ") in hexstring"; | 449 | std::string("invalid character (") + ch + ") in hexstring"; |
| 463 | - this->m->state = st_token_ready; | 450 | + this->state = st_token_ready; |
| 464 | } | 451 | } |
| 465 | } else { | 452 | } else { |
| 466 | throw std::logic_error( | 453 | throw std::logic_error( |
| 467 | "INTERNAL ERROR: invalid state while reading token"); | 454 | "INTERNAL ERROR: invalid state while reading token"); |
| 468 | } | 455 | } |
| 469 | 456 | ||
| 470 | - if ((this->m->state == st_token_ready) && (this->m->type == tt_word)) { | 457 | + if ((this->state == st_token_ready) && (this->type == tt_word)) { |
| 471 | resolveLiteral(); | 458 | resolveLiteral(); |
| 472 | } | 459 | } |
| 473 | 460 | ||
| 474 | if (!(betweenTokens() || | 461 | if (!(betweenTokens() || |
| 475 | - ((this->m->state == st_token_ready) && this->m->unread_char))) { | ||
| 476 | - this->m->raw_val += orig_ch; | 462 | + ((this->state == st_token_ready) && this->unread_char))) { |
| 463 | + this->raw_val += orig_ch; | ||
| 477 | } | 464 | } |
| 478 | } | 465 | } |
| 479 | 466 | ||
| 480 | void | 467 | void |
| 481 | QPDFTokenizer::presentEOF() | 468 | QPDFTokenizer::presentEOF() |
| 482 | { | 469 | { |
| 483 | - if (this->m->state == st_literal) { | 470 | + if (this->state == st_literal) { |
| 484 | QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); | 471 | QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); |
| 485 | resolveLiteral(); | 472 | resolveLiteral(); |
| 486 | - } else if ( | ||
| 487 | - (this->m->include_ignorable) && (this->m->state == st_in_space)) { | ||
| 488 | - this->m->type = tt_space; | ||
| 489 | - } else if ( | ||
| 490 | - (this->m->include_ignorable) && (this->m->state == st_in_comment)) { | ||
| 491 | - this->m->type = tt_comment; | 473 | + } else if ((this->include_ignorable) && (this->state == st_in_space)) { |
| 474 | + this->type = tt_space; | ||
| 475 | + } else if ((this->include_ignorable) && (this->state == st_in_comment)) { | ||
| 476 | + this->type = tt_comment; | ||
| 492 | } else if (betweenTokens()) { | 477 | } else if (betweenTokens()) { |
| 493 | - this->m->type = tt_eof; | ||
| 494 | - } else if (this->m->state != st_token_ready) { | 478 | + this->type = tt_eof; |
| 479 | + } else if (this->state != st_token_ready) { | ||
| 495 | QTC::TC("qpdf", "QPDFTokenizer EOF reading token"); | 480 | QTC::TC("qpdf", "QPDFTokenizer EOF reading token"); |
| 496 | - this->m->type = tt_bad; | ||
| 497 | - this->m->error_message = "EOF while reading token"; | 481 | + this->type = tt_bad; |
| 482 | + this->error_message = "EOF while reading token"; | ||
| 498 | } | 483 | } |
| 499 | 484 | ||
| 500 | - this->m->state = st_token_ready; | 485 | + this->state = st_token_ready; |
| 501 | } | 486 | } |
| 502 | 487 | ||
| 503 | void | 488 | void |
| 504 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) | 489 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) |
| 505 | { | 490 | { |
| 506 | - if (this->m->state != st_top) { | 491 | + if (this->state != st_top) { |
| 507 | throw std::logic_error("QPDFTokenizer::expectInlineImage called" | 492 | throw std::logic_error("QPDFTokenizer::expectInlineImage called" |
| 508 | " when tokenizer is in improper state"); | 493 | " when tokenizer is in improper state"); |
| 509 | } | 494 | } |
| 510 | findEI(input); | 495 | findEI(input); |
| 511 | - this->m->state = st_inline_image; | 496 | + this->state = st_inline_image; |
| 512 | } | 497 | } |
| 513 | 498 | ||
| 514 | void | 499 | void |
| @@ -537,7 +522,7 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | @@ -537,7 +522,7 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | ||
| 537 | if (!input->findFirst("EI", input->tell(), 0, f)) { | 522 | if (!input->findFirst("EI", input->tell(), 0, f)) { |
| 538 | break; | 523 | break; |
| 539 | } | 524 | } |
| 540 | - this->m->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2); | 525 | + this->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2); |
| 541 | 526 | ||
| 542 | QPDFTokenizer check; | 527 | QPDFTokenizer check; |
| 543 | bool found_bad = false; | 528 | bool found_bad = false; |
| @@ -610,19 +595,16 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | @@ -610,19 +595,16 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) | ||
| 610 | bool | 595 | bool |
| 611 | QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) | 596 | QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) |
| 612 | { | 597 | { |
| 613 | - bool ready = (this->m->state == st_token_ready); | ||
| 614 | - unread_char = this->m->unread_char; | ||
| 615 | - ch = this->m->char_to_unread; | 598 | + bool ready = (this->state == st_token_ready); |
| 599 | + unread_char = this->unread_char; | ||
| 600 | + ch = this->char_to_unread; | ||
| 616 | if (ready) { | 601 | if (ready) { |
| 617 | - if (this->m->type == tt_bad) { | ||
| 618 | - this->m->val = this->m->raw_val; | 602 | + if (this->type == tt_bad) { |
| 603 | + this->val = this->raw_val; | ||
| 619 | } | 604 | } |
| 620 | - token = Token( | ||
| 621 | - this->m->type, | ||
| 622 | - this->m->val, | ||
| 623 | - this->m->raw_val, | ||
| 624 | - this->m->error_message); | ||
| 625 | - this->m->reset(); | 605 | + token = |
| 606 | + Token(this->type, this->val, this->raw_val, this->error_message); | ||
| 607 | + this->reset(); | ||
| 626 | } | 608 | } |
| 627 | return ready; | 609 | return ready; |
| 628 | } | 610 | } |
| @@ -631,10 +613,9 @@ bool | @@ -631,10 +613,9 @@ bool | ||
| 631 | QPDFTokenizer::betweenTokens() | 613 | QPDFTokenizer::betweenTokens() |
| 632 | { | 614 | { |
| 633 | return ( | 615 | return ( |
| 634 | - (this->m->state == st_top) || | ||
| 635 | - ((!this->m->include_ignorable) && | ||
| 636 | - ((this->m->state == st_in_comment) || | ||
| 637 | - (this->m->state == st_in_space)))); | 616 | + (this->state == st_top) || |
| 617 | + ((!this->include_ignorable) && | ||
| 618 | + ((this->state == st_in_comment) || (this->state == st_in_space)))); | ||
| 638 | } | 619 | } |
| 639 | 620 | ||
| 640 | QPDFTokenizer::Token | 621 | QPDFTokenizer::Token |
| @@ -655,12 +636,12 @@ QPDFTokenizer::readToken( | @@ -655,12 +636,12 @@ QPDFTokenizer::readToken( | ||
| 655 | if (!presented_eof) { | 636 | if (!presented_eof) { |
| 656 | presentEOF(); | 637 | presentEOF(); |
| 657 | presented_eof = true; | 638 | presented_eof = true; |
| 658 | - if ((this->m->type == tt_eof) && (!this->m->allow_eof)) { | 639 | + if ((this->type == tt_eof) && (!this->allow_eof)) { |
| 659 | // Nothing in the qpdf library calls readToken | 640 | // Nothing in the qpdf library calls readToken |
| 660 | // without allowEOF anymore, so this case is not | 641 | // without allowEOF anymore, so this case is not |
| 661 | // exercised. | 642 | // exercised. |
| 662 | - this->m->type = tt_bad; | ||
| 663 | - this->m->error_message = "unexpected EOF"; | 643 | + this->type = tt_bad; |
| 644 | + this->error_message = "unexpected EOF"; | ||
| 664 | offset = input->getLastOffset(); | 645 | offset = input->getLastOffset(); |
| 665 | } | 646 | } |
| 666 | } else { | 647 | } else { |
| @@ -672,13 +653,13 @@ QPDFTokenizer::readToken( | @@ -672,13 +653,13 @@ QPDFTokenizer::readToken( | ||
| 672 | if (betweenTokens() && (input->getLastOffset() == offset)) { | 653 | if (betweenTokens() && (input->getLastOffset() == offset)) { |
| 673 | ++offset; | 654 | ++offset; |
| 674 | } | 655 | } |
| 675 | - if (max_len && (this->m->raw_val.length() >= max_len) && | ||
| 676 | - (this->m->state != st_token_ready)) { | 656 | + if (max_len && (this->raw_val.length() >= max_len) && |
| 657 | + (this->state != st_token_ready)) { | ||
| 677 | // terminate this token now | 658 | // terminate this token now |
| 678 | QTC::TC("qpdf", "QPDFTokenizer block long token"); | 659 | QTC::TC("qpdf", "QPDFTokenizer block long token"); |
| 679 | - this->m->type = tt_bad; | ||
| 680 | - this->m->state = st_token_ready; | ||
| 681 | - this->m->error_message = | 660 | + this->type = tt_bad; |
| 661 | + this->state = st_token_ready; | ||
| 662 | + this->error_message = | ||
| 682 | "exceeded allowable length while reading token"; | 663 | "exceeded allowable length while reading token"; |
| 683 | } | 664 | } |
| 684 | } | 665 | } |