Commit aa95f26a753d93e4c89b7790a9543da27017e197
1 parent
dad20c2c
oleobj: parse OleNativeStream and OleObject from stream
Can parse both now from bytes array or stream
Showing
1 changed file
with
62 additions
and
25 deletions
oletools/oleobj.py
| @@ -159,6 +159,8 @@ assert struct_uint32.size == 4 # make sure it matches 4 bytes | @@ -159,6 +159,8 @@ assert struct_uint32.size == 4 # make sure it matches 4 bytes | ||
| 159 | struct_uint16 = struct.Struct('<H') | 159 | struct_uint16 = struct.Struct('<H') |
| 160 | assert struct_uint16.size == 2 # make sure it matches 2 bytes | 160 | assert struct_uint16.size == 2 # make sure it matches 2 bytes |
| 161 | 161 | ||
| 162 | +# max length of a zero-terminated ansi string. Not sure what this really is | ||
| 163 | +STR_MAX_LEN = 1024 | ||
| 162 | 164 | ||
| 163 | # === FUNCTIONS ============================================================== | 165 | # === FUNCTIONS ============================================================== |
| 164 | 166 | ||
| @@ -166,34 +168,42 @@ def read_uint32(data, index): | @@ -166,34 +168,42 @@ def read_uint32(data, index): | ||
| 166 | """ | 168 | """ |
| 167 | Read an unsigned integer from the first 32 bits of data. | 169 | Read an unsigned integer from the first 32 bits of data. |
| 168 | 170 | ||
| 169 | - :param data: bytes string containing the data to be extracted. | ||
| 170 | - :param index: index to start reading from. | 171 | + :param data: bytes string or stream containing the data to be extracted. |
| 172 | + :param index: index to start reading from or None if data is stream. | ||
| 171 | :return: tuple (value, index) containing the read value (int), | 173 | :return: tuple (value, index) containing the read value (int), |
| 172 | and the index to continue reading next time. | 174 | and the index to continue reading next time. |
| 173 | """ | 175 | """ |
| 174 | - value = struct_uint32.unpack(data[index:index+4])[0] | ||
| 175 | - return (value, index+4) | 176 | + if index is None: |
| 177 | + value = struct_uint32.unpack(data.read(4))[0] | ||
| 178 | + else: | ||
| 179 | + value = struct_uint32.unpack(data[index:index+4])[0] | ||
| 180 | + index += 4 | ||
| 181 | + return (value, index) | ||
| 176 | 182 | ||
| 177 | 183 | ||
| 178 | def read_uint16(data, index): | 184 | def read_uint16(data, index): |
| 179 | """ | 185 | """ |
| 180 | Read an unsigned integer from the 16 bits of data following index. | 186 | Read an unsigned integer from the 16 bits of data following index. |
| 181 | 187 | ||
| 182 | - :param data: bytes string containing the data to be extracted. | ||
| 183 | - :param index: index to start reading from. | 188 | + :param data: bytes string or stream containing the data to be extracted. |
| 189 | + :param index: index to start reading from or None if data is stream | ||
| 184 | :return: tuple (value, index) containing the read value (int), | 190 | :return: tuple (value, index) containing the read value (int), |
| 185 | and the index to continue reading next time. | 191 | and the index to continue reading next time. |
| 186 | """ | 192 | """ |
| 187 | - value = struct_uint16.unpack(data[index:index+2])[0] | ||
| 188 | - return (value, index+2) | 193 | + if index is None: |
| 194 | + value = struct_uint16.unpack(data.read(2))[0] | ||
| 195 | + else: | ||
| 196 | + value = struct_uint16.unpack(data[index:index+2])[0] | ||
| 197 | + index += 2 | ||
| 198 | + return (value, index) | ||
| 189 | 199 | ||
| 190 | 200 | ||
| 191 | def read_LengthPrefixedAnsiString(data, index): | 201 | def read_LengthPrefixedAnsiString(data, index): |
| 192 | """ | 202 | """ |
| 193 | Read a length-prefixed ANSI string from data. | 203 | Read a length-prefixed ANSI string from data. |
| 194 | 204 | ||
| 195 | - :param data: bytes string containing the data to be extracted. | ||
| 196 | - :param index: index in data where string size starts | 205 | + :param data: bytes string or stream containing the data to be extracted. |
| 206 | + :param index: index in data where string size start or None if data is stream | ||
| 197 | :return: tuple (value, index) containing the read value (bytes string), | 207 | :return: tuple (value, index) containing the read value (bytes string), |
| 198 | and the index to start reading from next time. | 208 | and the index to start reading from next time. |
| 199 | """ | 209 | """ |
| @@ -202,24 +212,41 @@ def read_LengthPrefixedAnsiString(data, index): | @@ -202,24 +212,41 @@ def read_LengthPrefixedAnsiString(data, index): | ||
| 202 | if length == 0: | 212 | if length == 0: |
| 203 | return ('', index) | 213 | return ('', index) |
| 204 | # extract the string without the last null character | 214 | # extract the string without the last null character |
| 205 | - ansi_string = data[index:index+length-1] | 215 | + if index is None: |
| 216 | + ansi_string = data.read(length-1) | ||
| 217 | + null_char = data.read(1) | ||
| 218 | + else: | ||
| 219 | + ansi_string = data[index:index+length-1] | ||
| 220 | + null_char = data[index+length] | ||
| 221 | + index += length | ||
| 206 | # TODO: only in strict mode: | 222 | # TODO: only in strict mode: |
| 207 | # check the presence of the null char: | 223 | # check the presence of the null char: |
| 208 | - assert data[index+length] == NULL_CHAR | ||
| 209 | - return (ansi_string, index+length) | 224 | + assert null_char == NULL_CHAR |
| 225 | + return (ansi_string, index) | ||
| 210 | 226 | ||
| 211 | 227 | ||
| 212 | def read_zero_terminated_ansi_string(data, index): | 228 | def read_zero_terminated_ansi_string(data, index): |
| 213 | """ | 229 | """ |
| 214 | Read a zero-terminated ANSI string from data | 230 | Read a zero-terminated ANSI string from data |
| 215 | 231 | ||
| 216 | - :param data: bytes string containing an ansi string | ||
| 217 | - :param index: index at which the string should start | 232 | + Guessing that max length is 256 bytes. |
| 233 | + | ||
| 234 | + :param data: bytes string or stream containing an ansi string | ||
| 235 | + :param index: index at which the string should start or None if data is stream | ||
| 218 | :return: tuple (string, index) containing the read string (bytes string), | 236 | :return: tuple (string, index) containing the read string (bytes string), |
| 219 | and the index to start reading from next time. | 237 | and the index to start reading from next time. |
| 220 | """ | 238 | """ |
| 221 | - end_idx = data.find(b'\x00', index) | ||
| 222 | - return data[index:end_idx], end_idx+1 # return index after the 0-byte | 239 | + if index is None: |
| 240 | + result = [] | ||
| 241 | + for count in xrange(STR_MAX_LEN): | ||
| 242 | + char = data.read(1) | ||
| 243 | + if char == b'\x00': | ||
| 244 | + return b''.join(result), index | ||
| 245 | + result.append(char) | ||
| 246 | + raise ValueError('found no string-terminating zero-byte!') | ||
| 247 | + else: # data is byte array, can just search | ||
| 248 | + end_idx = data.index(b'\x00', index, index+STR_MAX_LEN) | ||
| 249 | + return data[index:end_idx], end_idx+1 # return index after the 0-byte | ||
| 223 | 250 | ||
| 224 | 251 | ||
| 225 | # === CLASSES ================================================================ | 252 | # === CLASSES ================================================================ |
| @@ -240,8 +267,9 @@ class OleNativeStream (object): | @@ -240,8 +267,9 @@ class OleNativeStream (object): | ||
| 240 | Constructor for OleNativeStream. | 267 | Constructor for OleNativeStream. |
| 241 | If bindata is provided, it will be parsed using the parse() method. | 268 | If bindata is provided, it will be parsed using the parse() method. |
| 242 | 269 | ||
| 243 | - :param bindata: bytes, OLENativeStream structure containing an OLE object | ||
| 244 | - :param package: bool, set to True when extracting from an OLE Package object | 270 | + :param bindata: forwarded to parse, see docu there |
| 271 | + :param package: bool, set to True when extracting from an OLE Package | ||
| 272 | + object | ||
| 245 | """ | 273 | """ |
| 246 | self.filename = None | 274 | self.filename = None |
| 247 | self.src_path = None | 275 | self.src_path = None |
| @@ -261,16 +289,22 @@ class OleNativeStream (object): | @@ -261,16 +289,22 @@ class OleNativeStream (object): | ||
| 261 | to extract the OLE object it contains. | 289 | to extract the OLE object it contains. |
| 262 | (see MS-OLEDS 2.3.6 OLENativeStream) | 290 | (see MS-OLEDS 2.3.6 OLENativeStream) |
| 263 | 291 | ||
| 264 | - :param data: bytes, OLENativeStream structure containing an OLE object | ||
| 265 | - :return: | 292 | + :param data: bytes array or stream, containing OLENativeStream |
| 293 | + structure containing an OLE object | ||
| 294 | + :return: None | ||
| 266 | """ | 295 | """ |
| 267 | # TODO: strict mode to raise exceptions when values are incorrect | 296 | # TODO: strict mode to raise exceptions when values are incorrect |
| 268 | # (permissive mode by default) | 297 | # (permissive mode by default) |
| 298 | + if hasattr(data, 'read'): | ||
| 299 | + index = None # marker for read_* functions to expect stream | ||
| 300 | + else: | ||
| 301 | + index = 0 # marker for read_* functions to expect array | ||
| 302 | + | ||
| 269 | # An OLE Package object does not have the native data size field | 303 | # An OLE Package object does not have the native data size field |
| 270 | - index = 0 | ||
| 271 | if not self.package: | 304 | if not self.package: |
| 272 | self.native_data_size, index = read_uint32(data, index) | 305 | self.native_data_size, index = read_uint32(data, index) |
| 273 | - log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) | 306 | + log.debug('OLE native data size = {0:08X} ({0} bytes)' |
| 307 | + .format(self.native_data_size)) | ||
| 274 | # I thought this might be an OLE type specifier ??? | 308 | # I thought this might be an OLE type specifier ??? |
| 275 | self.unknown_short, index = read_uint16(data, index) | 309 | self.unknown_short, index = read_uint16(data, index) |
| 276 | self.filename, index = read_zero_terminated_ansi_string(data, index) | 310 | self.filename, index = read_zero_terminated_ansi_string(data, index) |
| @@ -284,10 +318,13 @@ class OleNativeStream (object): | @@ -284,10 +318,13 @@ class OleNativeStream (object): | ||
| 284 | # size of the rest of the data | 318 | # size of the rest of the data |
| 285 | try: | 319 | try: |
| 286 | self.actual_size, index = read_uint32(data, index) | 320 | self.actual_size, index = read_uint32(data, index) |
| 287 | - self.data = data[index:index+self.actual_size] | 321 | + if index is None: # data is a bytes stream |
| 322 | + self.data = data | ||
| 323 | + else: # data is a bytes array | ||
| 324 | + self.data = data[index:index+self.actual_size] | ||
| 288 | # TODO: exception when size > remaining data | 325 | # TODO: exception when size > remaining data |
| 289 | # TODO: SLACK DATA | 326 | # TODO: SLACK DATA |
| 290 | - except IOError: # data is not embedded but only linked to | 327 | + except IOError, struct.error: # no data to read actual_size |
| 291 | logging.debug('data is not embedded but only a link') | 328 | logging.debug('data is not embedded but only a link') |
| 292 | self.actual_size = 0 | 329 | self.actual_size = 0 |
| 293 | self.data = None | 330 | self.data = None |