Commit 2699ecf13e8559b136ded1986bf18e1a0a51011f
1 parent
d9747486
Push QPDFTokenizer members into a nested structure
This is for protection against future ABI breaking changes.
Showing
2 changed files
with
231 additions
and
192 deletions
include/qpdf/QPDFTokenizer.hh
| @@ -165,31 +165,53 @@ class QPDFTokenizer | @@ -165,31 +165,53 @@ class QPDFTokenizer | ||
| 165 | size_t max_len = 0); | 165 | size_t max_len = 0); |
| 166 | 166 | ||
| 167 | private: | 167 | private: |
| 168 | - void reset(); | 168 | + // Do not implement copy or assignment |
| 169 | + QPDFTokenizer(QPDFTokenizer const&); | ||
| 170 | + QPDFTokenizer& operator=(QPDFTokenizer const&); | ||
| 171 | + | ||
| 169 | void resolveLiteral(); | 172 | void resolveLiteral(); |
| 170 | bool isSpace(char); | 173 | bool isSpace(char); |
| 171 | 174 | ||
| 172 | - // Lexer state | ||
| 173 | - enum { st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt, | ||
| 174 | - st_literal, st_in_hexstring, st_token_ready } state; | ||
| 175 | - | ||
| 176 | - bool pound_special_in_name; | ||
| 177 | - bool allow_eof; | ||
| 178 | - bool include_ignorable; | ||
| 179 | - | ||
| 180 | - // Current token accumulation | ||
| 181 | - token_type_e type; | ||
| 182 | - std::string val; | ||
| 183 | - std::string raw_val; | ||
| 184 | - std::string error_message; | ||
| 185 | - bool unread_char; | ||
| 186 | - char char_to_unread; | ||
| 187 | - | ||
| 188 | - // State for strings | ||
| 189 | - int string_depth; | ||
| 190 | - bool string_ignoring_newline; | ||
| 191 | - char bs_num_register[4]; | ||
| 192 | - bool last_char_was_bs; | 175 | + enum state_e { |
| 176 | + st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt, | ||
| 177 | + st_literal, st_in_hexstring, st_token_ready | ||
| 178 | + }; | ||
| 179 | + | ||
| 180 | + class Members | ||
| 181 | + { | ||
| 182 | + friend class QPDFTokenizer; | ||
| 183 | + | ||
| 184 | + public: | ||
| 185 | + QPDF_DLL | ||
| 186 | + ~Members(); | ||
| 187 | + | ||
| 188 | + private: | ||
| 189 | + Members(); | ||
| 190 | + Members(Members const&); | ||
| 191 | + void reset(); | ||
| 192 | + | ||
| 193 | + // Lexer state | ||
| 194 | + state_e state; | ||
| 195 | + | ||
| 196 | + bool pound_special_in_name; | ||
| 197 | + bool allow_eof; | ||
| 198 | + bool include_ignorable; | ||
| 199 | + | ||
| 200 | + // Current token accumulation | ||
| 201 | + token_type_e type; | ||
| 202 | + std::string val; | ||
| 203 | + std::string raw_val; | ||
| 204 | + std::string error_message; | ||
| 205 | + bool unread_char; | ||
| 206 | + char char_to_unread; | ||
| 207 | + | ||
| 208 | + // State for strings | ||
| 209 | + int string_depth; | ||
| 210 | + bool string_ignoring_newline; | ||
| 211 | + char bs_num_register[4]; | ||
| 212 | + bool last_char_was_bs; | ||
| 213 | + }; | ||
| 214 | + PointerHolder<Members> m; | ||
| 193 | }; | 215 | }; |
| 194 | 216 | ||
| 195 | #endif // __QPDFTOKENIZER_HH__ | 217 | #endif // __QPDFTOKENIZER_HH__ |
libqpdf/QPDFTokenizer.cc
| @@ -12,7 +12,7 @@ | @@ -12,7 +12,7 @@ | ||
| 12 | #include <string.h> | 12 | #include <string.h> |
| 13 | #include <cstdlib> | 13 | #include <cstdlib> |
| 14 | 14 | ||
| 15 | -QPDFTokenizer::QPDFTokenizer() : | 15 | +QPDFTokenizer::Members::Members() : |
| 16 | pound_special_in_name(true), | 16 | pound_special_in_name(true), |
| 17 | allow_eof(false), | 17 | allow_eof(false), |
| 18 | include_ignorable(false) | 18 | include_ignorable(false) |
| @@ -21,22 +21,46 @@ QPDFTokenizer::QPDFTokenizer() : | @@ -21,22 +21,46 @@ QPDFTokenizer::QPDFTokenizer() : | ||
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | void | 23 | void |
| 24 | +QPDFTokenizer::Members::reset() | ||
| 25 | +{ | ||
| 26 | + state = st_top; | ||
| 27 | + type = tt_bad; | ||
| 28 | + val = ""; | ||
| 29 | + raw_val = ""; | ||
| 30 | + error_message = ""; | ||
| 31 | + unread_char = false; | ||
| 32 | + char_to_unread = '\0'; | ||
| 33 | + string_depth = 0; | ||
| 34 | + string_ignoring_newline = false; | ||
| 35 | + last_char_was_bs = false; | ||
| 36 | +} | ||
| 37 | + | ||
| 38 | +QPDFTokenizer::Members::~Members() | ||
| 39 | +{ | ||
| 40 | +} | ||
| 41 | + | ||
| 42 | +QPDFTokenizer::QPDFTokenizer() : | ||
| 43 | + m(new Members()) | ||
| 44 | +{ | ||
| 45 | +} | ||
| 46 | + | ||
| 47 | +void | ||
| 24 | QPDFTokenizer::allowPoundAnywhereInName() | 48 | QPDFTokenizer::allowPoundAnywhereInName() |
| 25 | { | 49 | { |
| 26 | QTC::TC("qpdf", "QPDFTokenizer allow pound anywhere in name"); | 50 | QTC::TC("qpdf", "QPDFTokenizer allow pound anywhere in name"); |
| 27 | - this->pound_special_in_name = false; | 51 | + this->m->pound_special_in_name = false; |
| 28 | } | 52 | } |
| 29 | 53 | ||
| 30 | void | 54 | void |
| 31 | QPDFTokenizer::allowEOF() | 55 | QPDFTokenizer::allowEOF() |
| 32 | { | 56 | { |
| 33 | - this->allow_eof = true; | 57 | + this->m->allow_eof = true; |
| 34 | } | 58 | } |
| 35 | 59 | ||
| 36 | void | 60 | void |
| 37 | QPDFTokenizer::includeIgnorable() | 61 | QPDFTokenizer::includeIgnorable() |
| 38 | { | 62 | { |
| 39 | - this->include_ignorable = true; | 63 | + this->m->include_ignorable = true; |
| 40 | } | 64 | } |
| 41 | 65 | ||
| 42 | bool | 66 | bool |
| @@ -46,34 +70,19 @@ QPDFTokenizer::isSpace(char ch) | @@ -46,34 +70,19 @@ QPDFTokenizer::isSpace(char ch) | ||
| 46 | } | 70 | } |
| 47 | 71 | ||
| 48 | void | 72 | void |
| 49 | -QPDFTokenizer::reset() | ||
| 50 | -{ | ||
| 51 | - state = st_top; | ||
| 52 | - type = tt_bad; | ||
| 53 | - val = ""; | ||
| 54 | - raw_val = ""; | ||
| 55 | - error_message = ""; | ||
| 56 | - unread_char = false; | ||
| 57 | - char_to_unread = '\0'; | ||
| 58 | - string_depth = 0; | ||
| 59 | - string_ignoring_newline = false; | ||
| 60 | - last_char_was_bs = false; | ||
| 61 | -} | ||
| 62 | - | ||
| 63 | -void | ||
| 64 | QPDFTokenizer::resolveLiteral() | 73 | QPDFTokenizer::resolveLiteral() |
| 65 | { | 74 | { |
| 66 | - if ((val.length() > 0) && (val.at(0) == '/')) | 75 | + if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/')) |
| 67 | { | 76 | { |
| 68 | - type = tt_name; | 77 | + this->m->type = tt_name; |
| 69 | // Deal with # in name token. Note: '/' by itself is a | 78 | // Deal with # in name token. Note: '/' by itself is a |
| 70 | // valid name, so don't strip leading /. That way we | 79 | // valid name, so don't strip leading /. That way we |
| 71 | // don't have to deal with the empty string as a name. | 80 | // don't have to deal with the empty string as a name. |
| 72 | std::string nval = "/"; | 81 | std::string nval = "/"; |
| 73 | - char const* valstr = val.c_str() + 1; | 82 | + char const* valstr = this->m->val.c_str() + 1; |
| 74 | for (char const* p = valstr; *p; ++p) | 83 | for (char const* p = valstr; *p; ++p) |
| 75 | { | 84 | { |
| 76 | - if ((*p == '#') && this->pound_special_in_name) | 85 | + if ((*p == '#') && this->m->pound_special_in_name) |
| 77 | { | 86 | { |
| 78 | if (p[1] && p[2] && | 87 | if (p[1] && p[2] && |
| 79 | QUtil::is_hex_digit(p[1]) && QUtil::is_hex_digit(p[2])) | 88 | QUtil::is_hex_digit(p[1]) && QUtil::is_hex_digit(p[2])) |
| @@ -85,9 +94,9 @@ QPDFTokenizer::resolveLiteral() | @@ -85,9 +94,9 @@ QPDFTokenizer::resolveLiteral() | ||
| 85 | char ch = static_cast<char>(strtol(num, 0, 16)); | 94 | char ch = static_cast<char>(strtol(num, 0, 16)); |
| 86 | if (ch == '\0') | 95 | if (ch == '\0') |
| 87 | { | 96 | { |
| 88 | - type = tt_bad; | 97 | + this->m->type = tt_bad; |
| 89 | QTC::TC("qpdf", "QPDF_Tokenizer null in name"); | 98 | QTC::TC("qpdf", "QPDF_Tokenizer null in name"); |
| 90 | - error_message = | 99 | + this->m->error_message = |
| 91 | "null character not allowed in name token"; | 100 | "null character not allowed in name token"; |
| 92 | nval += "#00"; | 101 | nval += "#00"; |
| 93 | } | 102 | } |
| @@ -100,8 +109,8 @@ QPDFTokenizer::resolveLiteral() | @@ -100,8 +109,8 @@ QPDFTokenizer::resolveLiteral() | ||
| 100 | else | 109 | else |
| 101 | { | 110 | { |
| 102 | QTC::TC("qpdf", "QPDF_Tokenizer bad name"); | 111 | QTC::TC("qpdf", "QPDF_Tokenizer bad name"); |
| 103 | - type = tt_bad; | ||
| 104 | - error_message = "invalid name token"; | 112 | + this->m->type = tt_bad; |
| 113 | + this->m->error_message = "invalid name token"; | ||
| 105 | nval += *p; | 114 | nval += *p; |
| 106 | } | 115 | } |
| 107 | } | 116 | } |
| @@ -110,40 +119,40 @@ QPDFTokenizer::resolveLiteral() | @@ -110,40 +119,40 @@ QPDFTokenizer::resolveLiteral() | ||
| 110 | nval += *p; | 119 | nval += *p; |
| 111 | } | 120 | } |
| 112 | } | 121 | } |
| 113 | - val = nval; | 122 | + this->m->val = nval; |
| 114 | } | 123 | } |
| 115 | - else if (QUtil::is_number(val.c_str())) | 124 | + else if (QUtil::is_number(this->m->val.c_str())) |
| 116 | { | 125 | { |
| 117 | - if (val.find('.') != std::string::npos) | 126 | + if (this->m->val.find('.') != std::string::npos) |
| 118 | { | 127 | { |
| 119 | - type = tt_real; | 128 | + this->m->type = tt_real; |
| 120 | } | 129 | } |
| 121 | else | 130 | else |
| 122 | { | 131 | { |
| 123 | - type = tt_integer; | 132 | + this->m->type = tt_integer; |
| 124 | } | 133 | } |
| 125 | } | 134 | } |
| 126 | - else if ((val == "true") || (val == "false")) | 135 | + else if ((this->m->val == "true") || (this->m->val == "false")) |
| 127 | { | 136 | { |
| 128 | - type = tt_bool; | 137 | + this->m->type = tt_bool; |
| 129 | } | 138 | } |
| 130 | - else if (val == "null") | 139 | + else if (this->m->val == "null") |
| 131 | { | 140 | { |
| 132 | - type = tt_null; | 141 | + this->m->type = tt_null; |
| 133 | } | 142 | } |
| 134 | else | 143 | else |
| 135 | { | 144 | { |
| 136 | // I don't really know what it is, so leave it as tt_word. | 145 | // I don't really know what it is, so leave it as tt_word. |
| 137 | // Lots of cases ($, #, etc.) other than actual words fall | 146 | // Lots of cases ($, #, etc.) other than actual words fall |
| 138 | // into this category, but that's okay at least for now. | 147 | // into this category, but that's okay at least for now. |
| 139 | - type = tt_word; | 148 | + this->m->type = tt_word; |
| 140 | } | 149 | } |
| 141 | } | 150 | } |
| 142 | 151 | ||
| 143 | void | 152 | void |
| 144 | QPDFTokenizer::presentCharacter(char ch) | 153 | QPDFTokenizer::presentCharacter(char ch) |
| 145 | { | 154 | { |
| 146 | - if (state == st_token_ready) | 155 | + if (this->m->state == st_token_ready) |
| 147 | { | 156 | { |
| 148 | throw std::logic_error( | 157 | throw std::logic_error( |
| 149 | "INTERNAL ERROR: QPDF tokenizer presented character " | 158 | "INTERNAL ERROR: QPDF tokenizer presented character " |
| @@ -157,205 +166,210 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -157,205 +166,210 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 157 | // the character that caused a state change in the new state. | 166 | // the character that caused a state change in the new state. |
| 158 | 167 | ||
| 159 | bool handled = true; | 168 | bool handled = true; |
| 160 | - if (state == st_top) | 169 | + if (this->m->state == st_top) |
| 161 | { | 170 | { |
| 162 | // Note: we specifically do not use ctype here. It is | 171 | // Note: we specifically do not use ctype here. It is |
| 163 | // locale-dependent. | 172 | // locale-dependent. |
| 164 | if (isSpace(ch)) | 173 | if (isSpace(ch)) |
| 165 | { | 174 | { |
| 166 | - if (this->include_ignorable) | 175 | + if (this->m->include_ignorable) |
| 167 | { | 176 | { |
| 168 | - state = st_in_space; | ||
| 169 | - val += ch; | 177 | + this->m->state = st_in_space; |
| 178 | + this->m->val += ch; | ||
| 170 | } | 179 | } |
| 171 | } | 180 | } |
| 172 | else if (ch == '%') | 181 | else if (ch == '%') |
| 173 | { | 182 | { |
| 174 | - state = st_in_comment; | ||
| 175 | - if (this->include_ignorable) | 183 | + this->m->state = st_in_comment; |
| 184 | + if (this->m->include_ignorable) | ||
| 176 | { | 185 | { |
| 177 | - val += ch; | 186 | + this->m->val += ch; |
| 178 | } | 187 | } |
| 179 | } | 188 | } |
| 180 | else if (ch == '(') | 189 | else if (ch == '(') |
| 181 | { | 190 | { |
| 182 | - string_depth = 1; | ||
| 183 | - string_ignoring_newline = false; | ||
| 184 | - memset(bs_num_register, '\0', sizeof(bs_num_register)); | ||
| 185 | - last_char_was_bs = false; | ||
| 186 | - state = st_in_string; | 191 | + this->m->string_depth = 1; |
| 192 | + this->m->string_ignoring_newline = false; | ||
| 193 | + memset(this->m->bs_num_register, '\0', | ||
| 194 | + sizeof(this->m->bs_num_register)); | ||
| 195 | + this->m->last_char_was_bs = false; | ||
| 196 | + this->m->state = st_in_string; | ||
| 187 | } | 197 | } |
| 188 | else if (ch == '<') | 198 | else if (ch == '<') |
| 189 | { | 199 | { |
| 190 | - state = st_lt; | 200 | + this->m->state = st_lt; |
| 191 | } | 201 | } |
| 192 | else if (ch == '>') | 202 | else if (ch == '>') |
| 193 | { | 203 | { |
| 194 | - state = st_gt; | 204 | + this->m->state = st_gt; |
| 195 | } | 205 | } |
| 196 | else | 206 | else |
| 197 | { | 207 | { |
| 198 | - val += ch; | 208 | + this->m->val += ch; |
| 199 | if (ch == ')') | 209 | if (ch == ')') |
| 200 | { | 210 | { |
| 201 | - type = tt_bad; | 211 | + this->m->type = tt_bad; |
| 202 | QTC::TC("qpdf", "QPDF_Tokenizer bad )"); | 212 | QTC::TC("qpdf", "QPDF_Tokenizer bad )"); |
| 203 | - error_message = "unexpected )"; | ||
| 204 | - state = st_token_ready; | 213 | + this->m->error_message = "unexpected )"; |
| 214 | + this->m->state = st_token_ready; | ||
| 205 | } | 215 | } |
| 206 | else if (ch == '[') | 216 | else if (ch == '[') |
| 207 | { | 217 | { |
| 208 | - type = tt_array_open; | ||
| 209 | - state = st_token_ready; | 218 | + this->m->type = tt_array_open; |
| 219 | + this->m->state = st_token_ready; | ||
| 210 | } | 220 | } |
| 211 | else if (ch == ']') | 221 | else if (ch == ']') |
| 212 | { | 222 | { |
| 213 | - type = tt_array_close; | ||
| 214 | - state = st_token_ready; | 223 | + this->m->type = tt_array_close; |
| 224 | + this->m->state = st_token_ready; | ||
| 215 | } | 225 | } |
| 216 | else if (ch == '{') | 226 | else if (ch == '{') |
| 217 | { | 227 | { |
| 218 | - type = tt_brace_open; | ||
| 219 | - state = st_token_ready; | 228 | + this->m->type = tt_brace_open; |
| 229 | + this->m->state = st_token_ready; | ||
| 220 | } | 230 | } |
| 221 | else if (ch == '}') | 231 | else if (ch == '}') |
| 222 | { | 232 | { |
| 223 | - type = tt_brace_close; | ||
| 224 | - state = st_token_ready; | 233 | + this->m->type = tt_brace_close; |
| 234 | + this->m->state = st_token_ready; | ||
| 225 | } | 235 | } |
| 226 | else | 236 | else |
| 227 | { | 237 | { |
| 228 | - state = st_literal; | 238 | + this->m->state = st_literal; |
| 229 | } | 239 | } |
| 230 | } | 240 | } |
| 231 | } | 241 | } |
| 232 | - else if (state == st_in_space) | 242 | + else if (this->m->state == st_in_space) |
| 233 | { | 243 | { |
| 234 | // We only enter this state if include_ignorable is true. | 244 | // We only enter this state if include_ignorable is true. |
| 235 | if (! isSpace(ch)) | 245 | if (! isSpace(ch)) |
| 236 | { | 246 | { |
| 237 | - type = tt_space; | ||
| 238 | - unread_char = true; | ||
| 239 | - char_to_unread = ch; | ||
| 240 | - state = st_token_ready; | 247 | + this->m->type = tt_space; |
| 248 | + this->m->unread_char = true; | ||
| 249 | + this->m->char_to_unread = ch; | ||
| 250 | + this->m->state = st_token_ready; | ||
| 241 | } | 251 | } |
| 242 | else | 252 | else |
| 243 | { | 253 | { |
| 244 | - val += ch; | 254 | + this->m->val += ch; |
| 245 | } | 255 | } |
| 246 | } | 256 | } |
| 247 | - else if (state == st_in_comment) | 257 | + else if (this->m->state == st_in_comment) |
| 248 | { | 258 | { |
| 249 | if ((ch == '\r') || (ch == '\n')) | 259 | if ((ch == '\r') || (ch == '\n')) |
| 250 | { | 260 | { |
| 251 | - if (this->include_ignorable) | 261 | + if (this->m->include_ignorable) |
| 252 | { | 262 | { |
| 253 | - type = tt_comment; | ||
| 254 | - unread_char = true; | ||
| 255 | - char_to_unread = ch; | ||
| 256 | - state = st_token_ready; | 263 | + this->m->type = tt_comment; |
| 264 | + this->m->unread_char = true; | ||
| 265 | + this->m->char_to_unread = ch; | ||
| 266 | + this->m->state = st_token_ready; | ||
| 257 | } | 267 | } |
| 258 | else | 268 | else |
| 259 | { | 269 | { |
| 260 | - state = st_top; | 270 | + this->m->state = st_top; |
| 261 | } | 271 | } |
| 262 | } | 272 | } |
| 263 | - else if (this->include_ignorable) | 273 | + else if (this->m->include_ignorable) |
| 264 | { | 274 | { |
| 265 | - val += ch; | 275 | + this->m->val += ch; |
| 266 | } | 276 | } |
| 267 | } | 277 | } |
| 268 | - else if (state == st_lt) | 278 | + else if (this->m->state == st_lt) |
| 269 | { | 279 | { |
| 270 | if (ch == '<') | 280 | if (ch == '<') |
| 271 | { | 281 | { |
| 272 | - val = "<<"; | ||
| 273 | - type = tt_dict_open; | ||
| 274 | - state = st_token_ready; | 282 | + this->m->val = "<<"; |
| 283 | + this->m->type = tt_dict_open; | ||
| 284 | + this->m->state = st_token_ready; | ||
| 275 | } | 285 | } |
| 276 | else | 286 | else |
| 277 | { | 287 | { |
| 278 | handled = false; | 288 | handled = false; |
| 279 | - state = st_in_hexstring; | 289 | + this->m->state = st_in_hexstring; |
| 280 | } | 290 | } |
| 281 | } | 291 | } |
| 282 | - else if (state == st_gt) | 292 | + else if (this->m->state == st_gt) |
| 283 | { | 293 | { |
| 284 | if (ch == '>') | 294 | if (ch == '>') |
| 285 | { | 295 | { |
| 286 | - val = ">>"; | ||
| 287 | - type = tt_dict_close; | ||
| 288 | - state = st_token_ready; | 296 | + this->m->val = ">>"; |
| 297 | + this->m->type = tt_dict_close; | ||
| 298 | + this->m->state = st_token_ready; | ||
| 289 | } | 299 | } |
| 290 | else | 300 | else |
| 291 | { | 301 | { |
| 292 | - val = ">"; | ||
| 293 | - type = tt_bad; | 302 | + this->m->val = ">"; |
| 303 | + this->m->type = tt_bad; | ||
| 294 | QTC::TC("qpdf", "QPDF_Tokenizer bad >"); | 304 | QTC::TC("qpdf", "QPDF_Tokenizer bad >"); |
| 295 | - error_message = "unexpected >"; | ||
| 296 | - unread_char = true; | ||
| 297 | - char_to_unread = ch; | ||
| 298 | - state = st_token_ready; | 305 | + this->m->error_message = "unexpected >"; |
| 306 | + this->m->unread_char = true; | ||
| 307 | + this->m->char_to_unread = ch; | ||
| 308 | + this->m->state = st_token_ready; | ||
| 299 | } | 309 | } |
| 300 | } | 310 | } |
| 301 | - else if (state == st_in_string) | 311 | + else if (this->m->state == st_in_string) |
| 302 | { | 312 | { |
| 303 | - if (string_ignoring_newline && (! ((ch == '\r') || (ch == '\n')))) | 313 | + if (this->m->string_ignoring_newline && |
| 314 | + (! ((ch == '\r') || (ch == '\n')))) | ||
| 304 | { | 315 | { |
| 305 | - string_ignoring_newline = false; | 316 | + this->m->string_ignoring_newline = false; |
| 306 | } | 317 | } |
| 307 | 318 | ||
| 308 | - size_t bs_num_count = strlen(bs_num_register); | 319 | + size_t bs_num_count = strlen(this->m->bs_num_register); |
| 309 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); | 320 | bool ch_is_octal = ((ch >= '0') && (ch <= '7')); |
| 310 | if ((bs_num_count == 3) || ((bs_num_count > 0) && (! ch_is_octal))) | 321 | if ((bs_num_count == 3) || ((bs_num_count > 0) && (! ch_is_octal))) |
| 311 | { | 322 | { |
| 312 | // We've accumulated \ddd. PDF Spec says to ignore | 323 | // We've accumulated \ddd. PDF Spec says to ignore |
| 313 | // high-order overflow. | 324 | // high-order overflow. |
| 314 | - val += static_cast<char>(strtol(bs_num_register, 0, 8)); | ||
| 315 | - memset(bs_num_register, '\0', sizeof(bs_num_register)); | 325 | + this->m->val += static_cast<char>( |
| 326 | + strtol(this->m->bs_num_register, 0, 8)); | ||
| 327 | + memset(this->m->bs_num_register, '\0', | ||
| 328 | + sizeof(this->m->bs_num_register)); | ||
| 316 | bs_num_count = 0; | 329 | bs_num_count = 0; |
| 317 | } | 330 | } |
| 318 | 331 | ||
| 319 | - if (string_ignoring_newline && ((ch == '\r') || (ch == '\n'))) | 332 | + if (this->m->string_ignoring_newline && ((ch == '\r') || (ch == '\n'))) |
| 320 | { | 333 | { |
| 321 | // ignore | 334 | // ignore |
| 322 | } | 335 | } |
| 323 | - else if (ch_is_octal && (last_char_was_bs || (bs_num_count > 0))) | 336 | + else if (ch_is_octal && |
| 337 | + (this->m->last_char_was_bs || (bs_num_count > 0))) | ||
| 324 | { | 338 | { |
| 325 | - bs_num_register[bs_num_count++] = ch; | 339 | + this->m->bs_num_register[bs_num_count++] = ch; |
| 326 | } | 340 | } |
| 327 | - else if (last_char_was_bs) | 341 | + else if (this->m->last_char_was_bs) |
| 328 | { | 342 | { |
| 329 | switch (ch) | 343 | switch (ch) |
| 330 | { | 344 | { |
| 331 | case 'n': | 345 | case 'n': |
| 332 | - val += '\n'; | 346 | + this->m->val += '\n'; |
| 333 | break; | 347 | break; |
| 334 | 348 | ||
| 335 | case 'r': | 349 | case 'r': |
| 336 | - val += '\r'; | 350 | + this->m->val += '\r'; |
| 337 | break; | 351 | break; |
| 338 | 352 | ||
| 339 | case 't': | 353 | case 't': |
| 340 | - val += '\t'; | 354 | + this->m->val += '\t'; |
| 341 | break; | 355 | break; |
| 342 | 356 | ||
| 343 | case 'b': | 357 | case 'b': |
| 344 | - val += '\b'; | 358 | + this->m->val += '\b'; |
| 345 | break; | 359 | break; |
| 346 | 360 | ||
| 347 | case 'f': | 361 | case 'f': |
| 348 | - val += '\f'; | 362 | + this->m->val += '\f'; |
| 349 | break; | 363 | break; |
| 350 | 364 | ||
| 351 | case '\r': | 365 | case '\r': |
| 352 | case '\n': | 366 | case '\n': |
| 353 | - string_ignoring_newline = true; | 367 | + this->m->string_ignoring_newline = true; |
| 354 | break; | 368 | break; |
| 355 | 369 | ||
| 356 | default: | 370 | default: |
| 357 | // PDF spec says backslash is ignored before anything else | 371 | // PDF spec says backslash is ignored before anything else |
| 358 | - val += ch; | 372 | + this->m->val += ch; |
| 359 | break; | 373 | break; |
| 360 | } | 374 | } |
| 361 | } | 375 | } |
| @@ -371,22 +385,23 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -371,22 +385,23 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 371 | } | 385 | } |
| 372 | else if (ch == '(') | 386 | else if (ch == '(') |
| 373 | { | 387 | { |
| 374 | - val += ch; | ||
| 375 | - ++string_depth; | 388 | + this->m->val += ch; |
| 389 | + ++this->m->string_depth; | ||
| 376 | } | 390 | } |
| 377 | - else if ((ch == ')') && (--string_depth == 0)) | 391 | + else if ((ch == ')') && (--this->m->string_depth == 0)) |
| 378 | { | 392 | { |
| 379 | - type = tt_string; | ||
| 380 | - state = st_token_ready; | 393 | + this->m->type = tt_string; |
| 394 | + this->m->state = st_token_ready; | ||
| 381 | } | 395 | } |
| 382 | else | 396 | else |
| 383 | { | 397 | { |
| 384 | - val += ch; | 398 | + this->m->val += ch; |
| 385 | } | 399 | } |
| 386 | 400 | ||
| 387 | - last_char_was_bs = ((! last_char_was_bs) && (ch == '\\')); | 401 | + this->m->last_char_was_bs = |
| 402 | + ((! this->m->last_char_was_bs) && (ch == '\\')); | ||
| 388 | } | 403 | } |
| 389 | - else if (state == st_literal) | 404 | + else if (this->m->state == st_literal) |
| 390 | { | 405 | { |
| 391 | if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0) | 406 | if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0) |
| 392 | { | 407 | { |
| @@ -398,14 +413,14 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -398,14 +413,14 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 398 | // though not on any files in the test suite as of this | 413 | // though not on any files in the test suite as of this |
| 399 | // writing. | 414 | // writing. |
| 400 | 415 | ||
| 401 | - type = tt_word; | ||
| 402 | - unread_char = true; | ||
| 403 | - char_to_unread = ch; | ||
| 404 | - state = st_token_ready; | 416 | + this->m->type = tt_word; |
| 417 | + this->m->unread_char = true; | ||
| 418 | + this->m->char_to_unread = ch; | ||
| 419 | + this->m->state = st_token_ready; | ||
| 405 | } | 420 | } |
| 406 | else | 421 | else |
| 407 | { | 422 | { |
| 408 | - val += ch; | 423 | + this->m->val += ch; |
| 409 | } | 424 | } |
| 410 | } | 425 | } |
| 411 | else | 426 | else |
| @@ -418,33 +433,33 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -418,33 +433,33 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 418 | { | 433 | { |
| 419 | // okay | 434 | // okay |
| 420 | } | 435 | } |
| 421 | - else if (state == st_in_hexstring) | 436 | + else if (this->m->state == st_in_hexstring) |
| 422 | { | 437 | { |
| 423 | if (ch == '>') | 438 | if (ch == '>') |
| 424 | { | 439 | { |
| 425 | - type = tt_string; | ||
| 426 | - state = st_token_ready; | ||
| 427 | - if (val.length() % 2) | 440 | + this->m->type = tt_string; |
| 441 | + this->m->state = st_token_ready; | ||
| 442 | + if (this->m->val.length() % 2) | ||
| 428 | { | 443 | { |
| 429 | // PDF spec says odd hexstrings have implicit | 444 | // PDF spec says odd hexstrings have implicit |
| 430 | // trailing 0. | 445 | // trailing 0. |
| 431 | - val += '0'; | 446 | + this->m->val += '0'; |
| 432 | } | 447 | } |
| 433 | char num[3]; | 448 | char num[3]; |
| 434 | num[2] = '\0'; | 449 | num[2] = '\0'; |
| 435 | std::string nval; | 450 | std::string nval; |
| 436 | - for (unsigned int i = 0; i < val.length(); i += 2) | 451 | + for (unsigned int i = 0; i < this->m->val.length(); i += 2) |
| 437 | { | 452 | { |
| 438 | - num[0] = val.at(i); | ||
| 439 | - num[1] = val.at(i+1); | 453 | + num[0] = this->m->val.at(i); |
| 454 | + num[1] = this->m->val.at(i+1); | ||
| 440 | char nch = static_cast<char>(strtol(num, 0, 16)); | 455 | char nch = static_cast<char>(strtol(num, 0, 16)); |
| 441 | nval += nch; | 456 | nval += nch; |
| 442 | } | 457 | } |
| 443 | - val = nval; | 458 | + this->m->val = nval; |
| 444 | } | 459 | } |
| 445 | else if (QUtil::is_hex_digit(ch)) | 460 | else if (QUtil::is_hex_digit(ch)) |
| 446 | { | 461 | { |
| 447 | - val += ch; | 462 | + this->m->val += ch; |
| 448 | } | 463 | } |
| 449 | else if (isSpace(ch)) | 464 | else if (isSpace(ch)) |
| 450 | { | 465 | { |
| @@ -452,11 +467,11 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -452,11 +467,11 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 452 | } | 467 | } |
| 453 | else | 468 | else |
| 454 | { | 469 | { |
| 455 | - type = tt_bad; | 470 | + this->m->type = tt_bad; |
| 456 | QTC::TC("qpdf", "QPDF_Tokenizer bad hexstring character"); | 471 | QTC::TC("qpdf", "QPDF_Tokenizer bad hexstring character"); |
| 457 | - error_message = std::string("invalid character (") + | 472 | + this->m->error_message = std::string("invalid character (") + |
| 458 | ch + ") in hexstring"; | 473 | ch + ") in hexstring"; |
| 459 | - state = st_token_ready; | 474 | + this->m->state = st_token_ready; |
| 460 | } | 475 | } |
| 461 | } | 476 | } |
| 462 | else | 477 | else |
| @@ -465,61 +480,63 @@ QPDFTokenizer::presentCharacter(char ch) | @@ -465,61 +480,63 @@ QPDFTokenizer::presentCharacter(char ch) | ||
| 465 | "INTERNAL ERROR: invalid state while reading token"); | 480 | "INTERNAL ERROR: invalid state while reading token"); |
| 466 | } | 481 | } |
| 467 | 482 | ||
| 468 | - if ((state == st_token_ready) && (type == tt_word)) | 483 | + if ((this->m->state == st_token_ready) && (this->m->type == tt_word)) |
| 469 | { | 484 | { |
| 470 | resolveLiteral(); | 485 | resolveLiteral(); |
| 471 | } | 486 | } |
| 472 | 487 | ||
| 473 | - if (! (betweenTokens() || ((state == st_token_ready) && unread_char))) | 488 | + if (! (betweenTokens() || |
| 489 | + ((this->m->state == st_token_ready) && this->m->unread_char))) | ||
| 474 | { | 490 | { |
| 475 | - this->raw_val += orig_ch; | 491 | + this->m->raw_val += orig_ch; |
| 476 | } | 492 | } |
| 477 | } | 493 | } |
| 478 | 494 | ||
| 479 | void | 495 | void |
| 480 | QPDFTokenizer::presentEOF() | 496 | QPDFTokenizer::presentEOF() |
| 481 | { | 497 | { |
| 482 | - if (state == st_literal) | 498 | + if (this->m->state == st_literal) |
| 483 | { | 499 | { |
| 484 | QTC::TC("qpdf", "QPDF_Tokenizer EOF reading appendable token"); | 500 | QTC::TC("qpdf", "QPDF_Tokenizer EOF reading appendable token"); |
| 485 | resolveLiteral(); | 501 | resolveLiteral(); |
| 486 | } | 502 | } |
| 487 | - else if ((this->include_ignorable) && (state == st_in_space)) | 503 | + else if ((this->m->include_ignorable) && (this->m->state == st_in_space)) |
| 488 | { | 504 | { |
| 489 | - type = tt_space; | 505 | + this->m->type = tt_space; |
| 490 | } | 506 | } |
| 491 | - else if ((this->include_ignorable) && (state == st_in_comment)) | 507 | + else if ((this->m->include_ignorable) && (this->m->state == st_in_comment)) |
| 492 | { | 508 | { |
| 493 | - type = tt_comment; | 509 | + this->m->type = tt_comment; |
| 494 | } | 510 | } |
| 495 | else if (betweenTokens()) | 511 | else if (betweenTokens()) |
| 496 | { | 512 | { |
| 497 | - type = tt_eof; | 513 | + this->m->type = tt_eof; |
| 498 | } | 514 | } |
| 499 | - else if (state != st_token_ready) | 515 | + else if (this->m->state != st_token_ready) |
| 500 | { | 516 | { |
| 501 | QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token"); | 517 | QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token"); |
| 502 | - type = tt_bad; | ||
| 503 | - error_message = "EOF while reading token"; | 518 | + this->m->type = tt_bad; |
| 519 | + this->m->error_message = "EOF while reading token"; | ||
| 504 | } | 520 | } |
| 505 | 521 | ||
| 506 | - state = st_token_ready; | 522 | + this->m->state = st_token_ready; |
| 507 | } | 523 | } |
| 508 | 524 | ||
| 509 | bool | 525 | bool |
| 510 | QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) | 526 | QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) |
| 511 | { | 527 | { |
| 512 | - bool ready = (this->state == st_token_ready); | ||
| 513 | - unread_char = this->unread_char; | ||
| 514 | - ch = this->char_to_unread; | 528 | + bool ready = (this->m->state == st_token_ready); |
| 529 | + unread_char = this->m->unread_char; | ||
| 530 | + ch = this->m->char_to_unread; | ||
| 515 | if (ready) | 531 | if (ready) |
| 516 | { | 532 | { |
| 517 | - if (type == tt_bad) | 533 | + if (this->m->type == tt_bad) |
| 518 | { | 534 | { |
| 519 | - val = raw_val; | 535 | + this->m->val = this->m->raw_val; |
| 520 | } | 536 | } |
| 521 | - token = Token(type, val, raw_val, error_message); | ||
| 522 | - reset(); | 537 | + token = Token(this->m->type, this->m->val, |
| 538 | + this->m->raw_val, this->m->error_message); | ||
| 539 | + this->m->reset(); | ||
| 523 | } | 540 | } |
| 524 | return ready; | 541 | return ready; |
| 525 | } | 542 | } |
| @@ -527,10 +544,10 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) | @@ -527,10 +544,10 @@ QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) | ||
| 527 | bool | 544 | bool |
| 528 | QPDFTokenizer::betweenTokens() | 545 | QPDFTokenizer::betweenTokens() |
| 529 | { | 546 | { |
| 530 | - return ((state == st_top) || | ||
| 531 | - ((! this->include_ignorable) && | ||
| 532 | - ((state == st_in_comment) || | ||
| 533 | - (state == st_in_space)))); | 547 | + return ((this->m->state == st_top) || |
| 548 | + ((! this->m->include_ignorable) && | ||
| 549 | + ((this->m->state == st_in_comment) || | ||
| 550 | + (this->m->state == st_in_space)))); | ||
| 534 | } | 551 | } |
| 535 | 552 | ||
| 536 | QPDFTokenizer::Token | 553 | QPDFTokenizer::Token |
| @@ -553,11 +570,11 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | @@ -553,11 +570,11 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | ||
| 553 | { | 570 | { |
| 554 | presentEOF(); | 571 | presentEOF(); |
| 555 | presented_eof = true; | 572 | presented_eof = true; |
| 556 | - if ((type == tt_eof) && (! this->allow_eof)) | 573 | + if ((this->m->type == tt_eof) && (! this->m->allow_eof)) |
| 557 | { | 574 | { |
| 558 | QTC::TC("qpdf", "QPDF_Tokenizer EOF when not allowed"); | 575 | QTC::TC("qpdf", "QPDF_Tokenizer EOF when not allowed"); |
| 559 | - type = tt_bad; | ||
| 560 | - error_message = "unexpected EOF"; | 576 | + this->m->type = tt_bad; |
| 577 | + this->m->error_message = "unexpected EOF"; | ||
| 561 | offset = input->getLastOffset(); | 578 | offset = input->getLastOffset(); |
| 562 | } | 579 | } |
| 563 | } | 580 | } |
| @@ -574,14 +591,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | @@ -574,14 +591,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | ||
| 574 | { | 591 | { |
| 575 | ++offset; | 592 | ++offset; |
| 576 | } | 593 | } |
| 577 | - if (max_len && (raw_val.length() >= max_len) && | ||
| 578 | - (this->state != st_token_ready)) | 594 | + if (max_len && (this->m->raw_val.length() >= max_len) && |
| 595 | + (this->m->state != st_token_ready)) | ||
| 579 | { | 596 | { |
| 580 | // terminate this token now | 597 | // terminate this token now |
| 581 | QTC::TC("qpdf", "QPDFTokenizer block long token"); | 598 | QTC::TC("qpdf", "QPDFTokenizer block long token"); |
| 582 | - this->type = tt_bad; | ||
| 583 | - this->state = st_token_ready; | ||
| 584 | - error_message = | 599 | + this->m->type = tt_bad; |
| 600 | + this->m->state = st_token_ready; | ||
| 601 | + this->m->error_message = | ||
| 585 | "exceeded allowable length while reading token"; | 602 | "exceeded allowable length while reading token"; |
| 586 | } | 603 | } |
| 587 | } | 604 | } |