Commit b8c80db73981adc32332f076a07b0fbbfcb6a181
1 parent
bde819d8
oleobj: fixed to extract OLE objects embedded into MS Office OLE files
Showing
2 changed files
with
23 additions
and
34 deletions
oletools/oleobj.py
| @@ -45,8 +45,9 @@ http://www.decalage.info/python/oletools | @@ -45,8 +45,9 @@ http://www.decalage.info/python/oletools | ||
| 45 | # 2016-06 PL: - added main and process_file (not working yet) | 45 | # 2016-06 PL: - added main and process_file (not working yet) |
| 46 | # 2016-07-18 v0.48 SL: - added Python 3.5 support | 46 | # 2016-07-18 v0.48 SL: - added Python 3.5 support |
| 47 | # 2016-07-19 PL: - fixed Python 2.6-7 support | 47 | # 2016-07-19 PL: - fixed Python 2.6-7 support |
| 48 | +# 2016-11-17 v0.51 PL: - fixed OLE native object extraction | ||
| 48 | 49 | ||
| 49 | -__version__ = '0.48' | 50 | +__version__ = '0.51' |
| 50 | 51 | ||
| 51 | #------------------------------------------------------------------------------ | 52 | #------------------------------------------------------------------------------ |
| 52 | # TODO: | 53 | # TODO: |
| @@ -198,12 +199,13 @@ class OleNativeStream (object): | @@ -198,12 +199,13 @@ class OleNativeStream (object): | ||
| 198 | TYPE_EMBEDDED = 0x02 | 199 | TYPE_EMBEDDED = 0x02 |
| 199 | 200 | ||
| 200 | 201 | ||
| 201 | - def __init__(self, bindata=None): | 202 | + def __init__(self, bindata=None, package=False): |
| 202 | """ | 203 | """ |
| 203 | Constructor for OleNativeStream. | 204 | Constructor for OleNativeStream. |
| 204 | If bindata is provided, it will be parsed using the parse() method. | 205 | If bindata is provided, it will be parsed using the parse() method. |
| 205 | 206 | ||
| 206 | :param bindata: bytes, OLENativeStream structure containing an OLE object | 207 | :param bindata: bytes, OLENativeStream structure containing an OLE object |
| 208 | + :param package: bool, set to True when extracting from an OLE Package object | ||
| 207 | """ | 209 | """ |
| 208 | self.filename = None | 210 | self.filename = None |
| 209 | self.src_path = None | 211 | self.src_path = None |
| @@ -213,6 +215,7 @@ class OleNativeStream (object): | @@ -213,6 +215,7 @@ class OleNativeStream (object): | ||
| 213 | self.temp_path = None | 215 | self.temp_path = None |
| 214 | self.actual_size = None | 216 | self.actual_size = None |
| 215 | self.data = None | 217 | self.data = None |
| 218 | + self.package = package | ||
| 216 | if bindata is not None: | 219 | if bindata is not None: |
| 217 | self.parse(data=bindata) | 220 | self.parse(data=bindata) |
| 218 | 221 | ||
| @@ -227,9 +230,11 @@ class OleNativeStream (object): | @@ -227,9 +230,11 @@ class OleNativeStream (object): | ||
| 227 | """ | 230 | """ |
| 228 | # TODO: strict mode to raise exceptions when values are incorrect | 231 | # TODO: strict mode to raise exceptions when values are incorrect |
| 229 | # (permissive mode by default) | 232 | # (permissive mode by default) |
| 230 | - # self.native_data_size = struct.unpack('<L', data[0:4])[0] | ||
| 231 | - # data = data[4:] | ||
| 232 | - # log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) | 233 | + # An OLE Package object does not have the native data size field |
| 234 | + if not self.package: | ||
| 235 | + self.native_data_size = struct.unpack('<L', data[0:4])[0] | ||
| 236 | + data = data[4:] | ||
| 237 | + log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) | ||
| 233 | # I thought this might be an OLE type specifier ??? | 238 | # I thought this might be an OLE type specifier ??? |
| 234 | self.unknown_short, data = read_uint16(data) | 239 | self.unknown_short, data = read_uint16(data) |
| 235 | self.filename, data = data.split(b'\x00', 1) | 240 | self.filename, data = data.split(b'\x00', 1) |
| @@ -349,31 +354,14 @@ def process_file(container, filename, data, output_dir=None): | @@ -349,31 +354,14 @@ def process_file(container, filename, data, output_dir=None): | ||
| 349 | ole = olefile.OleFileIO(data) | 354 | ole = olefile.OleFileIO(data) |
| 350 | index = 1 | 355 | index = 1 |
| 351 | for stream in ole.listdir(): | 356 | for stream in ole.listdir(): |
| 352 | - objdata = ole.openstream(stream).read() | ||
| 353 | - stream_path = '/'.join(stream) | ||
| 354 | - log.debug('Checking stream %r' % stream_path) | ||
| 355 | - obj = OleObject() | ||
| 356 | - try: | ||
| 357 | - obj.parse(objdata) | ||
| 358 | - print('extract file embedded in OLE object from stream %r:' % stream_path) | ||
| 359 | - print('format_id = %d' % obj.format_id) | ||
| 360 | - print('class name = %r' % obj.class_name) | ||
| 361 | - print('data size = %d' % obj.data_size) | ||
| 362 | - # set a file extension according to the class name: | ||
| 363 | - class_name = obj.class_name.lower() | ||
| 364 | - if class_name.startswith('word'): | ||
| 365 | - ext = 'doc' | ||
| 366 | - elif class_name.startswith('package'): | ||
| 367 | - ext = 'package' | ||
| 368 | - else: | ||
| 369 | - ext = 'bin' | ||
| 370 | - | ||
| 371 | - fname = '%s_object_%03d.%s' % (fname_prefix, index, ext) | ||
| 372 | - print ('saving to file %s' % fname) | ||
| 373 | - open(fname, 'wb').write(obj.data) | ||
| 374 | - if obj.class_name.lower() == 'package': | 357 | + if stream[-1] == '\x01Ole10Native': |
| 358 | + objdata = ole.openstream(stream).read() | ||
| 359 | + stream_path = '/'.join(stream) | ||
| 360 | + log.debug('Checking stream %r' % stream_path) | ||
| 361 | + try: | ||
| 362 | + print('extract file embedded in OLE object from stream %r:' % stream_path) | ||
| 375 | print ('Parsing OLE Package') | 363 | print ('Parsing OLE Package') |
| 376 | - opkg = OleNativeStream(bindata=obj.data) | 364 | + opkg = OleNativeStream(bindata=objdata) |
| 377 | print ('Filename = %r' % opkg.filename) | 365 | print ('Filename = %r' % opkg.filename) |
| 378 | print ('Source path = %r' % opkg.src_path) | 366 | print ('Source path = %r' % opkg.src_path) |
| 379 | print ('Temp path = %r' % opkg.temp_path) | 367 | print ('Temp path = %r' % opkg.temp_path) |
| @@ -384,9 +372,9 @@ def process_file(container, filename, data, output_dir=None): | @@ -384,9 +372,9 @@ def process_file(container, filename, data, output_dir=None): | ||
| 384 | fname = '%s_object_%03d.noname' % (fname_prefix, index) | 372 | fname = '%s_object_%03d.noname' % (fname_prefix, index) |
| 385 | print ('saving to file %s' % fname) | 373 | print ('saving to file %s' % fname) |
| 386 | open(fname, 'wb').write(opkg.data) | 374 | open(fname, 'wb').write(opkg.data) |
| 387 | - index += 1 | ||
| 388 | - except: | ||
| 389 | - log.debug('*** Not an OLE 1.0 Object') | 375 | + index += 1 |
| 376 | + except: | ||
| 377 | + log.debug('*** Not an OLE 1.0 Object') | ||
| 390 | 378 | ||
| 391 | 379 | ||
| 392 | 380 |
oletools/rtfobj.py
| @@ -64,8 +64,9 @@ http://www.decalage.info/python/oletools | @@ -64,8 +64,9 @@ http://www.decalage.info/python/oletools | ||
| 64 | # 2016-08-08 PL: - added option -s to save objects to files | 64 | # 2016-08-08 PL: - added option -s to save objects to files |
| 65 | # 2016-08-09 PL: - fixed issue #78, improved regex | 65 | # 2016-08-09 PL: - fixed issue #78, improved regex |
| 66 | # 2016-09-06 PL: - fixed issue #83, backward compatible API | 66 | # 2016-09-06 PL: - fixed issue #83, backward compatible API |
| 67 | +# 2016-11-17 v0.51 PL: - updated call to oleobj.OleNativeStream | ||
| 67 | 68 | ||
| 68 | -__version__ = '0.50' | 69 | +__version__ = '0.51' |
| 69 | 70 | ||
| 70 | # ------------------------------------------------------------------------------ | 71 | # ------------------------------------------------------------------------------ |
| 71 | # TODO: | 72 | # TODO: |
| @@ -552,7 +553,7 @@ class RtfObjParser(RtfParser): | @@ -552,7 +553,7 @@ class RtfObjParser(RtfParser): | ||
| 552 | rtfobj.oledata = obj.data | 553 | rtfobj.oledata = obj.data |
| 553 | rtfobj.is_ole = True | 554 | rtfobj.is_ole = True |
| 554 | if obj.class_name.lower() == 'package': | 555 | if obj.class_name.lower() == 'package': |
| 555 | - opkg = OleNativeStream(bindata=obj.data) | 556 | + opkg = OleNativeStream(bindata=obj.data, package=True) |
| 556 | rtfobj.filename = opkg.filename | 557 | rtfobj.filename = opkg.filename |
| 557 | rtfobj.src_path = opkg.src_path | 558 | rtfobj.src_path = opkg.src_path |
| 558 | rtfobj.temp_path = opkg.temp_path | 559 | rtfobj.temp_path = opkg.temp_path |