Commit aa95f26a753d93e4c89b7790a9543da27017e197
1 parent
dad20c2c
oleobj: parse OleNativeStream and OleObject from stream
Can parse both now from bytes array or stream
Showing
1 changed file
with
62 additions
and
25 deletions
oletools/oleobj.py
| ... | ... | @@ -159,6 +159,8 @@ assert struct_uint32.size == 4 # make sure it matches 4 bytes |
| 159 | 159 | struct_uint16 = struct.Struct('<H') |
| 160 | 160 | assert struct_uint16.size == 2 # make sure it matches 2 bytes |
| 161 | 161 | |
| 162 | +# max length of a zero-terminated ansi string. Not sure what this really is | |
| 163 | +STR_MAX_LEN = 1024 | |
| 162 | 164 | |
| 163 | 165 | # === FUNCTIONS ============================================================== |
| 164 | 166 | |
| ... | ... | @@ -166,34 +168,42 @@ def read_uint32(data, index): |
| 166 | 168 | """ |
| 167 | 169 | Read an unsigned integer from the first 32 bits of data. |
| 168 | 170 | |
| 169 | - :param data: bytes string containing the data to be extracted. | |
| 170 | - :param index: index to start reading from. | |
| 171 | + :param data: bytes string or stream containing the data to be extracted. | |
| 172 | + :param index: index to start reading from or None if data is stream. | |
| 171 | 173 | :return: tuple (value, index) containing the read value (int), |
| 172 | 174 | and the index to continue reading next time. |
| 173 | 175 | """ |
| 174 | - value = struct_uint32.unpack(data[index:index+4])[0] | |
| 175 | - return (value, index+4) | |
| 176 | + if index is None: | |
| 177 | + value = struct_uint32.unpack(data.read(4))[0] | |
| 178 | + else: | |
| 179 | + value = struct_uint32.unpack(data[index:index+4])[0] | |
| 180 | + index += 4 | |
| 181 | + return (value, index) | |
| 176 | 182 | |
| 177 | 183 | |
| 178 | 184 | def read_uint16(data, index): |
| 179 | 185 | """ |
| 180 | 186 | Read an unsigned integer from the 16 bits of data following index. |
| 181 | 187 | |
| 182 | - :param data: bytes string containing the data to be extracted. | |
| 183 | - :param index: index to start reading from. | |
| 188 | + :param data: bytes string or stream containing the data to be extracted. | |
| 189 | + :param index: index to start reading from or None if data is stream | |
| 184 | 190 | :return: tuple (value, index) containing the read value (int), |
| 185 | 191 | and the index to continue reading next time. |
| 186 | 192 | """ |
| 187 | - value = struct_uint16.unpack(data[index:index+2])[0] | |
| 188 | - return (value, index+2) | |
| 193 | + if index is None: | |
| 194 | + value = struct_uint16.unpack(data.read(2))[0] | |
| 195 | + else: | |
| 196 | + value = struct_uint16.unpack(data[index:index+2])[0] | |
| 197 | + index += 2 | |
| 198 | + return (value, index) | |
| 189 | 199 | |
| 190 | 200 | |
| 191 | 201 | def read_LengthPrefixedAnsiString(data, index): |
| 192 | 202 | """ |
| 193 | 203 | Read a length-prefixed ANSI string from data. |
| 194 | 204 | |
| 195 | - :param data: bytes string containing the data to be extracted. | |
| 196 | - :param index: index in data where string size starts | |
| 205 | + :param data: bytes string or stream containing the data to be extracted. | |
| 206 | + :param index: index in data where string size start or None if data is stream | |
| 197 | 207 | :return: tuple (value, index) containing the read value (bytes string), |
| 198 | 208 | and the index to start reading from next time. |
| 199 | 209 | """ |
| ... | ... | @@ -202,24 +212,41 @@ def read_LengthPrefixedAnsiString(data, index): |
| 202 | 212 | if length == 0: |
| 203 | 213 | return ('', index) |
| 204 | 214 | # extract the string without the last null character |
| 205 | - ansi_string = data[index:index+length-1] | |
| 215 | + if index is None: | |
| 216 | + ansi_string = data.read(length-1) | |
| 217 | + null_char = data.read(1) | |
| 218 | + else: | |
| 219 | + ansi_string = data[index:index+length-1] | |
| 220 | + null_char = data[index+length] | |
| 221 | + index += length | |
| 206 | 222 | # TODO: only in strict mode: |
| 207 | 223 | # check the presence of the null char: |
| 208 | - assert data[index+length] == NULL_CHAR | |
| 209 | - return (ansi_string, index+length) | |
| 224 | + assert null_char == NULL_CHAR | |
| 225 | + return (ansi_string, index) | |
| 210 | 226 | |
| 211 | 227 | |
| 212 | 228 | def read_zero_terminated_ansi_string(data, index): |
| 213 | 229 | """ |
| 214 | 230 | Read a zero-terminated ANSI string from data |
| 215 | 231 | |
| 216 | - :param data: bytes string containing an ansi string | |
| 217 | - :param index: index at which the string should start | |
| 232 | + Guessing that max length is 256 bytes. | |
| 233 | + | |
| 234 | + :param data: bytes string or stream containing an ansi string | |
| 235 | + :param index: index at which the string should start or None if data is stream | |
| 218 | 236 | :return: tuple (string, index) containing the read string (bytes string), |
| 219 | 237 | and the index to start reading from next time. |
| 220 | 238 | """ |
| 221 | - end_idx = data.find(b'\x00', index) | |
| 222 | - return data[index:end_idx], end_idx+1 # return index after the 0-byte | |
| 239 | + if index is None: | |
| 240 | + result = [] | |
| 241 | + for count in xrange(STR_MAX_LEN): | |
| 242 | + char = data.read(1) | |
| 243 | + if char == b'\x00': | |
| 244 | + return b''.join(result), index | |
| 245 | + result.append(char) | |
| 246 | + raise ValueError('found no string-terminating zero-byte!') | |
| 247 | + else: # data is byte array, can just search | |
| 248 | + end_idx = data.index(b'\x00', index, index+STR_MAX_LEN) | |
| 249 | + return data[index:end_idx], end_idx+1 # return index after the 0-byte | |
| 223 | 250 | |
| 224 | 251 | |
| 225 | 252 | # === CLASSES ================================================================ |
| ... | ... | @@ -240,8 +267,9 @@ class OleNativeStream (object): |
| 240 | 267 | Constructor for OleNativeStream. |
| 241 | 268 | If bindata is provided, it will be parsed using the parse() method. |
| 242 | 269 | |
| 243 | - :param bindata: bytes, OLENativeStream structure containing an OLE object | |
| 244 | - :param package: bool, set to True when extracting from an OLE Package object | |
| 270 | + :param bindata: forwarded to parse, see docu there | |
| 271 | + :param package: bool, set to True when extracting from an OLE Package | |
| 272 | + object | |
| 245 | 273 | """ |
| 246 | 274 | self.filename = None |
| 247 | 275 | self.src_path = None |
| ... | ... | @@ -261,16 +289,22 @@ class OleNativeStream (object): |
| 261 | 289 | to extract the OLE object it contains. |
| 262 | 290 | (see MS-OLEDS 2.3.6 OLENativeStream) |
| 263 | 291 | |
| 264 | - :param data: bytes, OLENativeStream structure containing an OLE object | |
| 265 | - :return: | |
| 292 | + :param data: bytes array or stream, containing OLENativeStream | |
| 293 | + structure containing an OLE object | |
| 294 | + :return: None | |
| 266 | 295 | """ |
| 267 | 296 | # TODO: strict mode to raise exceptions when values are incorrect |
| 268 | 297 | # (permissive mode by default) |
| 298 | + if hasattr(data, 'read'): | |
| 299 | + index = None # marker for read_* functions to expect stream | |
| 300 | + else: | |
| 301 | + index = 0 # marker for read_* functions to expect array | |
| 302 | + | |
| 269 | 303 | # An OLE Package object does not have the native data size field |
| 270 | - index = 0 | |
| 271 | 304 | if not self.package: |
| 272 | 305 | self.native_data_size, index = read_uint32(data, index) |
| 273 | - log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) | |
| 306 | + log.debug('OLE native data size = {0:08X} ({0} bytes)' | |
| 307 | + .format(self.native_data_size)) | |
| 274 | 308 | # I thought this might be an OLE type specifier ??? |
| 275 | 309 | self.unknown_short, index = read_uint16(data, index) |
| 276 | 310 | self.filename, index = read_zero_terminated_ansi_string(data, index) |
| ... | ... | @@ -284,10 +318,13 @@ class OleNativeStream (object): |
| 284 | 318 | # size of the rest of the data |
| 285 | 319 | try: |
| 286 | 320 | self.actual_size, index = read_uint32(data, index) |
| 287 | - self.data = data[index:index+self.actual_size] | |
| 321 | + if index is None: # data is a bytes stream | |
| 322 | + self.data = data | |
| 323 | + else: # data is a bytes array | |
| 324 | + self.data = data[index:index+self.actual_size] | |
| 288 | 325 | # TODO: exception when size > remaining data |
| 289 | 326 | # TODO: SLACK DATA |
| 290 | - except IOError: # data is not embedded but only linked to | |
| 327 | + except IOError, struct.error: # no data to read actual_size | |
| 291 | 328 | logging.debug('data is not embedded but only a link') |
| 292 | 329 | self.actual_size = 0 |
| 293 | 330 | self.data = None | ... | ... |