Commit c08bb0ec0208683ecd0ad89c3d46d4d66b938fbd
1 parent
cef6425b
Remove QPDFTokenizer::Members
Showing
2 changed files
with
201 additions
and
233 deletions
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -211,42 +211,29 @@ class QPDFTokenizer |
| 211 | 211 | st_token_ready |
| 212 | 212 | }; |
| 213 | 213 | |
| 214 | - class Members | |
| 215 | - { | |
| 216 | - friend class QPDFTokenizer; | |
| 217 | - | |
| 218 | - public: | |
| 219 | - QPDF_DLL | |
| 220 | - ~Members() = default; | |
| 221 | - | |
| 222 | - private: | |
| 223 | - Members(); | |
| 224 | - Members(Members const&) = delete; | |
| 225 | - void reset(); | |
| 226 | - | |
| 227 | - // Lexer state | |
| 228 | - state_e state; | |
| 229 | - | |
| 230 | - bool allow_eof; | |
| 231 | - bool include_ignorable; | |
| 232 | - | |
| 233 | - // Current token accumulation | |
| 234 | - token_type_e type; | |
| 235 | - std::string val; | |
| 236 | - std::string raw_val; | |
| 237 | - std::string error_message; | |
| 238 | - bool unread_char; | |
| 239 | - char char_to_unread; | |
| 240 | - size_t inline_image_bytes; | |
| 241 | - | |
| 242 | - // State for strings | |
| 243 | - int string_depth; | |
| 244 | - bool string_ignoring_newline; | |
| 245 | - char bs_num_register[4]; | |
| 246 | - bool last_char_was_bs; | |
| 247 | - bool last_char_was_cr; | |
| 248 | - }; | |
| 249 | - std::shared_ptr<Members> m; | |
| 214 | + void reset(); | |
| 215 | + | |
| 216 | + // Lexer state | |
| 217 | + state_e state; | |
| 218 | + | |
| 219 | + bool allow_eof; | |
| 220 | + bool include_ignorable; | |
| 221 | + | |
| 222 | + // Current token accumulation | |
| 223 | + token_type_e type; | |
| 224 | + std::string val; | |
| 225 | + std::string raw_val; | |
| 226 | + std::string error_message; | |
| 227 | + bool unread_char; | |
| 228 | + char char_to_unread; | |
| 229 | + size_t inline_image_bytes; | |
| 230 | + | |
| 231 | + // State for strings | |
| 232 | + int string_depth; | |
| 233 | + bool string_ignoring_newline; | |
| 234 | + char bs_num_register[4]; | |
| 235 | + bool last_char_was_bs; | |
| 236 | + bool last_char_was_cr; | |
| 250 | 237 | }; |
| 251 | 238 | |
| 252 | 239 | #endif // QPDFTOKENIZER_HH | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -73,15 +73,8 @@ QPDFWordTokenFinder::check() |
| 73 | 73 | return true; |
| 74 | 74 | } |
| 75 | 75 | |
| 76 | -QPDFTokenizer::Members::Members() : | |
| 77 | - allow_eof(false), | |
| 78 | - include_ignorable(false) | |
| 79 | -{ | |
| 80 | - reset(); | |
| 81 | -} | |
| 82 | - | |
| 83 | 76 | void |
| 84 | -QPDFTokenizer::Members::reset() | |
| 77 | +QPDFTokenizer::reset() | |
| 85 | 78 | { |
| 86 | 79 | state = st_top; |
| 87 | 80 | type = tt_bad; |
| ... | ... | @@ -110,20 +103,22 @@ QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : |
| 110 | 103 | } |
| 111 | 104 | |
| 112 | 105 | QPDFTokenizer::QPDFTokenizer() : |
| 113 | - m(new Members()) | |
| 106 | + allow_eof(false), | |
| 107 | + include_ignorable(false) | |
| 114 | 108 | { |
| 109 | + reset(); | |
| 115 | 110 | } |
| 116 | 111 | |
| 117 | 112 | void |
| 118 | 113 | QPDFTokenizer::allowEOF() |
| 119 | 114 | { |
| 120 | - this->m->allow_eof = true; | |
| 115 | + this->allow_eof = true; | |
| 121 | 116 | } |
| 122 | 117 | |
| 123 | 118 | void |
| 124 | 119 | QPDFTokenizer::includeIgnorable() |
| 125 | 120 | { |
| 126 | - this->m->include_ignorable = true; | |
| 121 | + this->include_ignorable = true; | |
| 127 | 122 | } |
| 128 | 123 | |
| 129 | 124 | bool |
| ... | ... | @@ -141,28 +136,27 @@ QPDFTokenizer::isDelimiter(char ch) |
| 141 | 136 | void |
| 142 | 137 | QPDFTokenizer::resolveLiteral() |
| 143 | 138 | { |
| 144 | - if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/')) { | |
| 145 | - this->m->type = tt_name; | |
| 139 | + if ((this->val.length() > 0) && (this->val.at(0) == '/')) { | |
| 140 | + this->type = tt_name; | |
| 146 | 141 | // Deal with # in name token. Note: '/' by itself is a |
| 147 | 142 | // valid name, so don't strip leading /. That way we |
| 148 | 143 | // don't have to deal with the empty string as a name. |
| 149 | 144 | std::string nval = "/"; |
| 150 | - size_t len = this->m->val.length(); | |
| 145 | + size_t len = this->val.length(); | |
| 151 | 146 | for (size_t i = 1; i < len; ++i) { |
| 152 | - char ch = this->m->val.at(i); | |
| 147 | + char ch = this->val.at(i); | |
| 153 | 148 | if (ch == '#') { |
| 154 | - if ((i + 2 < len) && | |
| 155 | - QUtil::is_hex_digit(this->m->val.at(i + 1)) && | |
| 156 | - QUtil::is_hex_digit(this->m->val.at(i + 2))) { | |
| 149 | + if ((i + 2 < len) && QUtil::is_hex_digit(this->val.at(i + 1)) && | |
| 150 | + QUtil::is_hex_digit(this->val.at(i + 2))) { | |
| 157 | 151 | char num[3]; |
| 158 | - num[0] = this->m->val.at(i + 1); | |
| 159 | - num[1] = this->m->val.at(i + 2); | |
| 152 | + num[0] = this->val.at(i + 1); | |
| 153 | + num[1] = this->val.at(i + 2); | |
| 160 | 154 | num[2] = '\0'; |
| 161 | 155 | char ch2 = static_cast<char>(strtol(num, nullptr, 16)); |
| 162 | 156 | if (ch2 == '\0') { |
| 163 | - this->m->type = tt_bad; | |
| 157 | + this->type = tt_bad; | |
| 164 | 158 | QTC::TC("qpdf", "QPDFTokenizer null in name"); |
| 165 | - this->m->error_message = | |
| 159 | + this->error_message = | |
| 166 | 160 | "null character not allowed in name token"; |
| 167 | 161 | nval += "#00"; |
| 168 | 162 | } else { |
| ... | ... | @@ -171,7 +165,7 @@ QPDFTokenizer::resolveLiteral() |
| 171 | 165 | i += 2; |
| 172 | 166 | } else { |
| 173 | 167 | QTC::TC("qpdf", "QPDFTokenizer bad name"); |
| 174 | - this->m->error_message = | |
| 168 | + this->error_message = | |
| 175 | 169 | "name with stray # will not work with PDF >= 1.2"; |
| 176 | 170 | // Use null to encode a bad # -- this is reversed |
| 177 | 171 | // in QPDF_Name::normalizeName. |
| ... | ... | @@ -181,29 +175,29 @@ QPDFTokenizer::resolveLiteral() |
| 181 | 175 | nval.append(1, ch); |
| 182 | 176 | } |
| 183 | 177 | } |
| 184 | - this->m->val = nval; | |
| 185 | - } else if (QUtil::is_number(this->m->val.c_str())) { | |
| 186 | - if (this->m->val.find('.') != std::string::npos) { | |
| 187 | - this->m->type = tt_real; | |
| 178 | + this->val = nval; | |
| 179 | + } else if (QUtil::is_number(this->val.c_str())) { | |
| 180 | + if (this->val.find('.') != std::string::npos) { | |
| 181 | + this->type = tt_real; | |
| 188 | 182 | } else { |
| 189 | - this->m->type = tt_integer; | |
| 183 | + this->type = tt_integer; | |
| 190 | 184 | } |
| 191 | - } else if ((this->m->val == "true") || (this->m->val == "false")) { | |
| 192 | - this->m->type = tt_bool; | |
| 193 | - } else if (this->m->val == "null") { | |
| 194 | - this->m->type = tt_null; | |
| 185 | + } else if ((this->val == "true") || (this->val == "false")) { | |
| 186 | + this->type = tt_bool; | |
| 187 | + } else if (this->val == "null") { | |
| 188 | + this->type = tt_null; | |
| 195 | 189 | } else { |
| 196 | 190 | // I don't really know what it is, so leave it as tt_word. |
| 197 | 191 | // Lots of cases ($, #, etc.) other than actual words fall |
| 198 | 192 | // into this category, but that's okay at least for now. |
| 199 | - this->m->type = tt_word; | |
| 193 | + this->type = tt_word; | |
| 200 | 194 | } |
| 201 | 195 | } |
| 202 | 196 | |
| 203 | 197 | void |
| 204 | 198 | QPDFTokenizer::presentCharacter(char ch) |
| 205 | 199 | { |
| 206 | - if (this->m->state == st_token_ready) { | |
| 200 | + if (this->state == st_token_ready) { | |
| 207 | 201 | throw std::logic_error( |
| 208 | 202 | "INTERNAL ERROR: QPDF tokenizer presented character " |
| 209 | 203 | "while token is waiting"); |
| ... | ... | @@ -216,159 +210,153 @@ QPDFTokenizer::presentCharacter(char ch) |
| 216 | 210 | // the character that caused a state change in the new state. |
| 217 | 211 | |
| 218 | 212 | bool handled = true; |
| 219 | - if (this->m->state == st_top) { | |
| 213 | + if (this->state == st_top) { | |
| 220 | 214 | // Note: we specifically do not use ctype here. It is |
| 221 | 215 | // locale-dependent. |
| 222 | 216 | if (isSpace(ch)) { |
| 223 | - if (this->m->include_ignorable) { | |
| 224 | - this->m->state = st_in_space; | |
| 225 | - this->m->val += ch; | |
| 217 | + if (this->include_ignorable) { | |
| 218 | + this->state = st_in_space; | |
| 219 | + this->val += ch; | |
| 226 | 220 | } |
| 227 | 221 | } else if (ch == '%') { |
| 228 | - this->m->state = st_in_comment; | |
| 229 | - if (this->m->include_ignorable) { | |
| 230 | - this->m->val += ch; | |
| 222 | + this->state = st_in_comment; | |
| 223 | + if (this->include_ignorable) { | |
| 224 | + this->val += ch; | |
| 231 | 225 | } |
| 232 | 226 | } else if (ch == '(') { |
| 233 | - this->m->string_depth = 1; | |
| 234 | - this->m->string_ignoring_newline = false; | |
| 235 | - memset( | |
| 236 | - this->m->bs_num_register, | |
| 237 | - '\0', | |
| 238 | - sizeof(this->m->bs_num_register)); | |
| 239 | - this->m->last_char_was_bs = false; | |
| 240 | - this->m->last_char_was_cr = false; | |
| 241 | - this->m->state = st_in_string; | |
| 227 | + this->string_depth = 1; | |
| 228 | + this->string_ignoring_newline = false; | |
| 229 | + memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); | |
| 230 | + this->last_char_was_bs = false; | |
| 231 | + this->last_char_was_cr = false; | |
| 232 | + this->state = st_in_string; | |
| 242 | 233 | } else if (ch == '<') { |
| 243 | - this->m->state = st_lt; | |
| 234 | + this->state = st_lt; | |
| 244 | 235 | } else if (ch == '>') { |
| 245 | - this->m->state = st_gt; | |
| 236 | + this->state = st_gt; | |
| 246 | 237 | } else { |
| 247 | - this->m->val += ch; | |
| 238 | + this->val += ch; | |
| 248 | 239 | if (ch == ')') { |
| 249 | - this->m->type = tt_bad; | |
| 240 | + this->type = tt_bad; | |
| 250 | 241 | QTC::TC("qpdf", "QPDFTokenizer bad )"); |
| 251 | - this->m->error_message = "unexpected )"; | |
| 252 | - this->m->state = st_token_ready; | |
| 242 | + this->error_message = "unexpected )"; | |
| 243 | + this->state = st_token_ready; | |
| 253 | 244 | } else if (ch == '[') { |
| 254 | - this->m->type = tt_array_open; | |
| 255 | - this->m->state = st_token_ready; | |
| 245 | + this->type = tt_array_open; | |
| 246 | + this->state = st_token_ready; | |
| 256 | 247 | } else if (ch == ']') { |
| 257 | - this->m->type = tt_array_close; | |
| 258 | - this->m->state = st_token_ready; | |
| 248 | + this->type = tt_array_close; | |
| 249 | + this->state = st_token_ready; | |
| 259 | 250 | } else if (ch == '{') { |
| 260 | - this->m->type = tt_brace_open; | |
| 261 | - this->m->state = st_token_ready; | |
| 251 | + this->type = tt_brace_open; | |
| 252 | + this->state = st_token_ready; | |
| 262 | 253 | } else if (ch == '}') { |
| 263 | - this->m->type = tt_brace_close; | |
| 264 | - this->m->state = st_token_ready; | |
| 254 | + this->type = tt_brace_close; | |
| 255 | + this->state = st_token_ready; | |
| 265 | 256 | } else { |
| 266 | - this->m->state = st_literal; | |
| 257 | + this->state = st_literal; | |
| 267 | 258 | } |
| 268 | 259 | } |
| 269 | - } else if (this->m->state == st_in_space) { | |
| 260 | + } else if (this->state == st_in_space) { | |
| 270 | 261 | // We only enter this state if include_ignorable is true. |
| 271 | 262 | if (!isSpace(ch)) { |
| 272 | - this->m->type = tt_space; | |
| 273 | - this->m->unread_char = true; | |
| 274 | - this->m->char_to_unread = ch; | |
| 275 | - this->m->state = st_token_ready; | |
| 263 | + this->type = tt_space; | |
| 264 | + this->unread_char = true; | |
| 265 | + this->char_to_unread = ch; | |
| 266 | + this->state = st_token_ready; | |
| 276 | 267 | } else { |
| 277 | - this->m->val += ch; | |
| 268 | + this->val += ch; | |
| 278 | 269 | } |
| 279 | - } else if (this->m->state == st_in_comment) { | |
| 270 | + } else if (this->state == st_in_comment) { | |
| 280 | 271 | if ((ch == '\r') || (ch == '\n')) { |
| 281 | - if (this->m->include_ignorable) { | |
| 282 | - this->m->type = tt_comment; | |
| 283 | - this->m->unread_char = true; | |
| 284 | - this->m->char_to_unread = ch; | |
| 285 | - this->m->state = st_token_ready; | |
| 272 | + if (this->include_ignorable) { | |
| 273 | + this->type = tt_comment; | |
| 274 | + this->unread_char = true; | |
| 275 | + this->char_to_unread = ch; | |
| 276 | + this->state = st_token_ready; | |
| 286 | 277 | } else { |
| 287 | - this->m->state = st_top; | |
| 278 | + this->state = st_top; | |
| 288 | 279 | } |
| 289 | - } else if (this->m->include_ignorable) { | |
| 290 | - this->m->val += ch; | |
| 280 | + } else if (this->include_ignorable) { | |
| 281 | + this->val += ch; | |
| 291 | 282 | } |
| 292 | - } else if (this->m->state == st_lt) { | |
| 283 | + } else if (this->state == st_lt) { | |
| 293 | 284 | if (ch == '<') { |
| 294 | - this->m->val = "<<"; | |
| 295 | - this->m->type = tt_dict_open; | |
| 296 | - this->m->state = st_token_ready; | |
| 285 | + this->val = "<<"; | |
| 286 | + this->type = tt_dict_open; | |
| 287 | + this->state = st_token_ready; | |
| 297 | 288 | } else { |
| 298 | 289 | handled = false; |
| 299 | - this->m->state = st_in_hexstring; | |
| 290 | + this->state = st_in_hexstring; | |
| 300 | 291 | } |
| 301 | - } else if (this->m->state == st_gt) { | |
| 292 | + } else if (this->state == st_gt) { | |
| 302 | 293 | if (ch == '>') { |
| 303 | - this->m->val = ">>"; | |
| 304 | - this->m->type = tt_dict_close; | |
| 305 | - this->m->state = st_token_ready; | |
| 294 | + this->val = ">>"; | |
| 295 | + this->type = tt_dict_close; | |
| 296 | + this->state = st_token_ready; | |
| 306 | 297 | } else { |
| 307 | - this->m->val = ">"; | |
| 308 | - this->m->type = tt_bad; | |
| 298 | + this->val = ">"; | |
| 299 | + this->type = tt_bad; | |
| 309 | 300 | QTC::TC("qpdf", "QPDFTokenizer bad >"); |
| 310 | - this->m->error_message = "unexpected >"; | |
| 311 | - this->m->unread_char = true; | |
| 312 | - this->m->char_to_unread = ch; | |
| 313 | - this->m->state = st_token_ready; | |
| 301 | + this->error_message = "unexpected >"; | |
| 302 | + this->unread_char = true; | |
| 303 | + this->char_to_unread = ch; | |
| 304 | + this->state = st_token_ready; | |
| 314 | 305 | } |
| 315 | - } else if (this->m->state == st_in_string) { | |
| 316 | - if (this->m->string_ignoring_newline && (ch != '\n')) { | |
| 317 | - this->m->string_ignoring_newline = false; | |
| 306 | + } else if (this->state == st_in_string) { | |
| 307 | + if (this->string_ignoring_newline && (ch != '\n')) { | |
| 308 | + this->string_ignoring_newline = false; | |
| 318 | 309 | } |
| 319 | 310 | |
| 320 | - size_t bs_num_count = strlen(this->m->bs_num_register); | |
| 311 | + size_t bs_num_count = strlen(this->bs_num_register); | |
| 321 | 312 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); |
| 322 | 313 | if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) { |
| 323 | 314 | // We've accumulated \ddd. PDF Spec says to ignore |
| 324 | 315 | // high-order overflow. |
| 325 | - this->m->val += | |
| 326 | - static_cast<char>(strtol(this->m->bs_num_register, nullptr, 8)); | |
| 327 | - memset( | |
| 328 | - this->m->bs_num_register, | |
| 329 | - '\0', | |
| 330 | - sizeof(this->m->bs_num_register)); | |
| 316 | + this->val += | |
| 317 | + static_cast<char>(strtol(this->bs_num_register, nullptr, 8)); | |
| 318 | + memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); | |
| 331 | 319 | bs_num_count = 0; |
| 332 | 320 | } |
| 333 | 321 | |
| 334 | - if (this->m->string_ignoring_newline && (ch == '\n')) { | |
| 322 | + if (this->string_ignoring_newline && (ch == '\n')) { | |
| 335 | 323 | // ignore |
| 336 | - this->m->string_ignoring_newline = false; | |
| 324 | + this->string_ignoring_newline = false; | |
| 337 | 325 | } else if ( |
| 338 | - ch_is_octal && (this->m->last_char_was_bs || (bs_num_count > 0))) { | |
| 339 | - this->m->bs_num_register[bs_num_count++] = ch; | |
| 340 | - } else if (this->m->last_char_was_bs) { | |
| 326 | + ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) { | |
| 327 | + this->bs_num_register[bs_num_count++] = ch; | |
| 328 | + } else if (this->last_char_was_bs) { | |
| 341 | 329 | switch (ch) { |
| 342 | 330 | case 'n': |
| 343 | - this->m->val += '\n'; | |
| 331 | + this->val += '\n'; | |
| 344 | 332 | break; |
| 345 | 333 | |
| 346 | 334 | case 'r': |
| 347 | - this->m->val += '\r'; | |
| 335 | + this->val += '\r'; | |
| 348 | 336 | break; |
| 349 | 337 | |
| 350 | 338 | case 't': |
| 351 | - this->m->val += '\t'; | |
| 339 | + this->val += '\t'; | |
| 352 | 340 | break; |
| 353 | 341 | |
| 354 | 342 | case 'b': |
| 355 | - this->m->val += '\b'; | |
| 343 | + this->val += '\b'; | |
| 356 | 344 | break; |
| 357 | 345 | |
| 358 | 346 | case 'f': |
| 359 | - this->m->val += '\f'; | |
| 347 | + this->val += '\f'; | |
| 360 | 348 | break; |
| 361 | 349 | |
| 362 | 350 | case '\n': |
| 363 | 351 | break; |
| 364 | 352 | |
| 365 | 353 | case '\r': |
| 366 | - this->m->string_ignoring_newline = true; | |
| 354 | + this->string_ignoring_newline = true; | |
| 367 | 355 | break; |
| 368 | 356 | |
| 369 | 357 | default: |
| 370 | 358 | // PDF spec says backslash is ignored before anything else |
| 371 | - this->m->val += ch; | |
| 359 | + this->val += ch; | |
| 372 | 360 | break; |
| 373 | 361 | } |
| 374 | 362 | } else if (ch == '\\') { |
| ... | ... | @@ -379,28 +367,27 @@ QPDFTokenizer::presentCharacter(char ch) |
| 379 | 367 | "when ch == '\\'"); |
| 380 | 368 | } |
| 381 | 369 | } else if (ch == '(') { |
| 382 | - this->m->val += ch; | |
| 383 | - ++this->m->string_depth; | |
| 384 | - } else if ((ch == ')') && (--this->m->string_depth == 0)) { | |
| 385 | - this->m->type = tt_string; | |
| 386 | - this->m->state = st_token_ready; | |
| 370 | + this->val += ch; | |
| 371 | + ++this->string_depth; | |
| 372 | + } else if ((ch == ')') && (--this->string_depth == 0)) { | |
| 373 | + this->type = tt_string; | |
| 374 | + this->state = st_token_ready; | |
| 387 | 375 | } else if (ch == '\r') { |
| 388 | 376 | // CR by itself is converted to LF |
| 389 | - this->m->val += '\n'; | |
| 377 | + this->val += '\n'; | |
| 390 | 378 | } else if (ch == '\n') { |
| 391 | 379 | // CR LF is converted to LF |
| 392 | - if (!this->m->last_char_was_cr) { | |
| 393 | - this->m->val += ch; | |
| 380 | + if (!this->last_char_was_cr) { | |
| 381 | + this->val += ch; | |
| 394 | 382 | } |
| 395 | 383 | } else { |
| 396 | - this->m->val += ch; | |
| 384 | + this->val += ch; | |
| 397 | 385 | } |
| 398 | 386 | |
| 399 | - this->m->last_char_was_cr = | |
| 400 | - ((!this->m->string_ignoring_newline) && (ch == '\r')); | |
| 401 | - this->m->last_char_was_bs = | |
| 402 | - ((!this->m->last_char_was_bs) && (ch == '\\')); | |
| 403 | - } else if (this->m->state == st_literal) { | |
| 387 | + this->last_char_was_cr = | |
| 388 | + ((!this->string_ignoring_newline) && (ch == '\r')); | |
| 389 | + this->last_char_was_bs = ((!this->last_char_was_bs) && (ch == '\\')); | |
| 390 | + } else if (this->state == st_literal) { | |
| 404 | 391 | if (isDelimiter(ch)) { |
| 405 | 392 | // A C-locale whitespace character or delimiter terminates |
| 406 | 393 | // token. It is important to unread the whitespace |
| ... | ... | @@ -410,21 +397,21 @@ QPDFTokenizer::presentCharacter(char ch) |
| 410 | 397 | // though not on any files in the test suite as of this |
| 411 | 398 | // writing. |
| 412 | 399 | |
| 413 | - this->m->type = tt_word; | |
| 414 | - this->m->unread_char = true; | |
| 415 | - this->m->char_to_unread = ch; | |
| 416 | - this->m->state = st_token_ready; | |
| 400 | + this->type = tt_word; | |
| 401 | + this->unread_char = true; | |
| 402 | + this->char_to_unread = ch; | |
| 403 | + this->state = st_token_ready; | |
| 417 | 404 | } else { |
| 418 | - this->m->val += ch; | |
| 405 | + this->val += ch; | |
| 419 | 406 | } |
| 420 | - } else if (this->m->state == st_inline_image) { | |
| 421 | - this->m->val += ch; | |
| 422 | - size_t len = this->m->val.length(); | |
| 423 | - if (len == this->m->inline_image_bytes) { | |
| 407 | + } else if (this->state == st_inline_image) { | |
| 408 | + this->val += ch; | |
| 409 | + size_t len = this->val.length(); | |
| 410 | + if (len == this->inline_image_bytes) { | |
| 424 | 411 | QTC::TC("qpdf", "QPDFTokenizer found EI by byte count"); |
| 425 | - this->m->type = tt_inline_image; | |
| 426 | - this->m->inline_image_bytes = 0; | |
| 427 | - this->m->state = st_token_ready; | |
| 412 | + this->type = tt_inline_image; | |
| 413 | + this->inline_image_bytes = 0; | |
| 414 | + this->state = st_token_ready; | |
| 428 | 415 | } |
| 429 | 416 | } else { |
| 430 | 417 | handled = false; |
| ... | ... | @@ -432,83 +419,81 @@ QPDFTokenizer::presentCharacter(char ch) |
| 432 | 419 | |
| 433 | 420 | if (handled) { |
| 434 | 421 | // okay |
| 435 | - } else if (this->m->state == st_in_hexstring) { | |
| 422 | + } else if (this->state == st_in_hexstring) { | |
| 436 | 423 | if (ch == '>') { |
| 437 | - this->m->type = tt_string; | |
| 438 | - this->m->state = st_token_ready; | |
| 439 | - if (this->m->val.length() % 2) { | |
| 424 | + this->type = tt_string; | |
| 425 | + this->state = st_token_ready; | |
| 426 | + if (this->val.length() % 2) { | |
| 440 | 427 | // PDF spec says odd hexstrings have implicit |
| 441 | 428 | // trailing 0. |
| 442 | - this->m->val += '0'; | |
| 429 | + this->val += '0'; | |
| 443 | 430 | } |
| 444 | 431 | char num[3]; |
| 445 | 432 | num[2] = '\0'; |
| 446 | 433 | std::string nval; |
| 447 | - for (unsigned int i = 0; i < this->m->val.length(); i += 2) { | |
| 448 | - num[0] = this->m->val.at(i); | |
| 449 | - num[1] = this->m->val.at(i + 1); | |
| 434 | + for (unsigned int i = 0; i < this->val.length(); i += 2) { | |
| 435 | + num[0] = this->val.at(i); | |
| 436 | + num[1] = this->val.at(i + 1); | |
| 450 | 437 | char nch = static_cast<char>(strtol(num, nullptr, 16)); |
| 451 | 438 | nval += nch; |
| 452 | 439 | } |
| 453 | - this->m->val = nval; | |
| 440 | + this->val = nval; | |
| 454 | 441 | } else if (QUtil::is_hex_digit(ch)) { |
| 455 | - this->m->val += ch; | |
| 442 | + this->val += ch; | |
| 456 | 443 | } else if (isSpace(ch)) { |
| 457 | 444 | // ignore |
| 458 | 445 | } else { |
| 459 | - this->m->type = tt_bad; | |
| 446 | + this->type = tt_bad; | |
| 460 | 447 | QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); |
| 461 | - this->m->error_message = | |
| 448 | + this->error_message = | |
| 462 | 449 | std::string("invalid character (") + ch + ") in hexstring"; |
| 463 | - this->m->state = st_token_ready; | |
| 450 | + this->state = st_token_ready; | |
| 464 | 451 | } |
| 465 | 452 | } else { |
| 466 | 453 | throw std::logic_error( |
| 467 | 454 | "INTERNAL ERROR: invalid state while reading token"); |
| 468 | 455 | } |
| 469 | 456 | |
| 470 | - if ((this->m->state == st_token_ready) && (this->m->type == tt_word)) { | |
| 457 | + if ((this->state == st_token_ready) && (this->type == tt_word)) { | |
| 471 | 458 | resolveLiteral(); |
| 472 | 459 | } |
| 473 | 460 | |
| 474 | 461 | if (!(betweenTokens() || |
| 475 | - ((this->m->state == st_token_ready) && this->m->unread_char))) { | |
| 476 | - this->m->raw_val += orig_ch; | |
| 462 | + ((this->state == st_token_ready) && this->unread_char))) { | |
| 463 | + this->raw_val += orig_ch; | |
| 477 | 464 | } |
| 478 | 465 | } |
| 479 | 466 | |
| 480 | 467 | void |
| 481 | 468 | QPDFTokenizer::presentEOF() |
| 482 | 469 | { |
| 483 | - if (this->m->state == st_literal) { | |
| 470 | + if (this->state == st_literal) { | |
| 484 | 471 | QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); |
| 485 | 472 | resolveLiteral(); |
| 486 | - } else if ( | |
| 487 | - (this->m->include_ignorable) && (this->m->state == st_in_space)) { | |
| 488 | - this->m->type = tt_space; | |
| 489 | - } else if ( | |
| 490 | - (this->m->include_ignorable) && (this->m->state == st_in_comment)) { | |
| 491 | - this->m->type = tt_comment; | |
| 473 | + } else if ((this->include_ignorable) && (this->state == st_in_space)) { | |
| 474 | + this->type = tt_space; | |
| 475 | + } else if ((this->include_ignorable) && (this->state == st_in_comment)) { | |
| 476 | + this->type = tt_comment; | |
| 492 | 477 | } else if (betweenTokens()) { |
| 493 | - this->m->type = tt_eof; | |
| 494 | - } else if (this->m->state != st_token_ready) { | |
| 478 | + this->type = tt_eof; | |
| 479 | + } else if (this->state != st_token_ready) { | |
| 495 | 480 | QTC::TC("qpdf", "QPDFTokenizer EOF reading token"); |
| 496 | - this->m->type = tt_bad; | |
| 497 | - this->m->error_message = "EOF while reading token"; | |
| 481 | + this->type = tt_bad; | |
| 482 | + this->error_message = "EOF while reading token"; | |
| 498 | 483 | } |
| 499 | 484 | |
| 500 | - this->m->state = st_token_ready; | |
| 485 | + this->state = st_token_ready; | |
| 501 | 486 | } |
| 502 | 487 | |
| 503 | 488 | void |
| 504 | 489 | QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input) |
| 505 | 490 | { |
| 506 | - if (this->m->state != st_top) { | |
| 491 | + if (this->state != st_top) { | |
| 507 | 492 | throw std::logic_error("QPDFTokenizer::expectInlineImage called" |
| 508 | 493 | " when tokenizer is in improper state"); |
| 509 | 494 | } |
| 510 | 495 | findEI(input); |
| 511 | - this->m->state = st_inline_image; | |
| 496 | + this->state = st_inline_image; | |
| 512 | 497 | } |
| 513 | 498 | |
| 514 | 499 | void |
| ... | ... | @@ -537,7 +522,7 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) |
| 537 | 522 | if (!input->findFirst("EI", input->tell(), 0, f)) { |
| 538 | 523 | break; |
| 539 | 524 | } |
| 540 | - this->m->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2); | |
| 525 | + this->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2); | |
| 541 | 526 | |
| 542 | 527 | QPDFTokenizer check; |
| 543 | 528 | bool found_bad = false; |
| ... | ... | @@ -610,19 +595,16 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) |
| 610 | 595 | bool |
| 611 | 596 | QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) |
| 612 | 597 | { |
| 613 | - bool ready = (this->m->state == st_token_ready); | |
| 614 | - unread_char = this->m->unread_char; | |
| 615 | - ch = this->m->char_to_unread; | |
| 598 | + bool ready = (this->state == st_token_ready); | |
| 599 | + unread_char = this->unread_char; | |
| 600 | + ch = this->char_to_unread; | |
| 616 | 601 | if (ready) { |
| 617 | - if (this->m->type == tt_bad) { | |
| 618 | - this->m->val = this->m->raw_val; | |
| 602 | + if (this->type == tt_bad) { | |
| 603 | + this->val = this->raw_val; | |
| 619 | 604 | } |
| 620 | - token = Token( | |
| 621 | - this->m->type, | |
| 622 | - this->m->val, | |
| 623 | - this->m->raw_val, | |
| 624 | - this->m->error_message); | |
| 625 | - this->m->reset(); | |
| 605 | + token = | |
| 606 | + Token(this->type, this->val, this->raw_val, this->error_message); | |
| 607 | + this->reset(); | |
| 626 | 608 | } |
| 627 | 609 | return ready; |
| 628 | 610 | } |
| ... | ... | @@ -631,10 +613,9 @@ bool |
| 631 | 613 | QPDFTokenizer::betweenTokens() |
| 632 | 614 | { |
| 633 | 615 | return ( |
| 634 | - (this->m->state == st_top) || | |
| 635 | - ((!this->m->include_ignorable) && | |
| 636 | - ((this->m->state == st_in_comment) || | |
| 637 | - (this->m->state == st_in_space)))); | |
| 616 | + (this->state == st_top) || | |
| 617 | + ((!this->include_ignorable) && | |
| 618 | + ((this->state == st_in_comment) || (this->state == st_in_space)))); | |
| 638 | 619 | } |
| 639 | 620 | |
| 640 | 621 | QPDFTokenizer::Token |
| ... | ... | @@ -655,12 +636,12 @@ QPDFTokenizer::readToken( |
| 655 | 636 | if (!presented_eof) { |
| 656 | 637 | presentEOF(); |
| 657 | 638 | presented_eof = true; |
| 658 | - if ((this->m->type == tt_eof) && (!this->m->allow_eof)) { | |
| 639 | + if ((this->type == tt_eof) && (!this->allow_eof)) { | |
| 659 | 640 | // Nothing in the qpdf library calls readToken |
| 660 | 641 | // without allowEOF anymore, so this case is not |
| 661 | 642 | // exercised. |
| 662 | - this->m->type = tt_bad; | |
| 663 | - this->m->error_message = "unexpected EOF"; | |
| 643 | + this->type = tt_bad; | |
| 644 | + this->error_message = "unexpected EOF"; | |
| 664 | 645 | offset = input->getLastOffset(); |
| 665 | 646 | } |
| 666 | 647 | } else { |
| ... | ... | @@ -672,13 +653,13 @@ QPDFTokenizer::readToken( |
| 672 | 653 | if (betweenTokens() && (input->getLastOffset() == offset)) { |
| 673 | 654 | ++offset; |
| 674 | 655 | } |
| 675 | - if (max_len && (this->m->raw_val.length() >= max_len) && | |
| 676 | - (this->m->state != st_token_ready)) { | |
| 656 | + if (max_len && (this->raw_val.length() >= max_len) && | |
| 657 | + (this->state != st_token_ready)) { | |
| 677 | 658 | // terminate this token now |
| 678 | 659 | QTC::TC("qpdf", "QPDFTokenizer block long token"); |
| 679 | - this->m->type = tt_bad; | |
| 680 | - this->m->state = st_token_ready; | |
| 681 | - this->m->error_message = | |
| 660 | + this->type = tt_bad; | |
| 661 | + this->state = st_token_ready; | |
| 662 | + this->error_message = | |
| 682 | 663 | "exceeded allowable length while reading token"; |
| 683 | 664 | } |
| 684 | 665 | } | ... | ... |