Commit b8c80db73981adc32332f076a07b0fbbfcb6a181
1 parent
bde819d8
oleobj: fixed to extract OLE objects embedded into MS Office OLE files
Showing
2 changed files
with
23 additions
and
34 deletions
oletools/oleobj.py
| ... | ... | @@ -45,8 +45,9 @@ http://www.decalage.info/python/oletools |
| 45 | 45 | # 2016-06 PL: - added main and process_file (not working yet) |
| 46 | 46 | # 2016-07-18 v0.48 SL: - added Python 3.5 support |
| 47 | 47 | # 2016-07-19 PL: - fixed Python 2.6-7 support |
| 48 | +# 2016-11-17 v0.51 PL: - fixed OLE native object extraction | |
| 48 | 49 | |
| 49 | -__version__ = '0.48' | |
| 50 | +__version__ = '0.51' | |
| 50 | 51 | |
| 51 | 52 | #------------------------------------------------------------------------------ |
| 52 | 53 | # TODO: |
| ... | ... | @@ -198,12 +199,13 @@ class OleNativeStream (object): |
| 198 | 199 | TYPE_EMBEDDED = 0x02 |
| 199 | 200 | |
| 200 | 201 | |
| 201 | - def __init__(self, bindata=None): | |
| 202 | + def __init__(self, bindata=None, package=False): | |
| 202 | 203 | """ |
| 203 | 204 | Constructor for OleNativeStream. |
| 204 | 205 | If bindata is provided, it will be parsed using the parse() method. |
| 205 | 206 | |
| 206 | 207 | :param bindata: bytes, OLENativeStream structure containing an OLE object |
| 208 | + :param package: bool, set to True when extracting from an OLE Package object | |
| 207 | 209 | """ |
| 208 | 210 | self.filename = None |
| 209 | 211 | self.src_path = None |
| ... | ... | @@ -213,6 +215,7 @@ class OleNativeStream (object): |
| 213 | 215 | self.temp_path = None |
| 214 | 216 | self.actual_size = None |
| 215 | 217 | self.data = None |
| 218 | + self.package = package | |
| 216 | 219 | if bindata is not None: |
| 217 | 220 | self.parse(data=bindata) |
| 218 | 221 | |
| ... | ... | @@ -227,9 +230,11 @@ class OleNativeStream (object): |
| 227 | 230 | """ |
| 228 | 231 | # TODO: strict mode to raise exceptions when values are incorrect |
| 229 | 232 | # (permissive mode by default) |
| 230 | - # self.native_data_size = struct.unpack('<L', data[0:4])[0] | |
| 231 | - # data = data[4:] | |
| 232 | - # log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) | |
| 233 | + # An OLE Package object does not have the native data size field | |
| 234 | + if not self.package: | |
| 235 | + self.native_data_size = struct.unpack('<L', data[0:4])[0] | |
| 236 | + data = data[4:] | |
| 237 | + log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) | |
| 233 | 238 | # I thought this might be an OLE type specifier ??? |
| 234 | 239 | self.unknown_short, data = read_uint16(data) |
| 235 | 240 | self.filename, data = data.split(b'\x00', 1) |
| ... | ... | @@ -349,31 +354,14 @@ def process_file(container, filename, data, output_dir=None): |
| 349 | 354 | ole = olefile.OleFileIO(data) |
| 350 | 355 | index = 1 |
| 351 | 356 | for stream in ole.listdir(): |
| 352 | - objdata = ole.openstream(stream).read() | |
| 353 | - stream_path = '/'.join(stream) | |
| 354 | - log.debug('Checking stream %r' % stream_path) | |
| 355 | - obj = OleObject() | |
| 356 | - try: | |
| 357 | - obj.parse(objdata) | |
| 358 | - print('extract file embedded in OLE object from stream %r:' % stream_path) | |
| 359 | - print('format_id = %d' % obj.format_id) | |
| 360 | - print('class name = %r' % obj.class_name) | |
| 361 | - print('data size = %d' % obj.data_size) | |
| 362 | - # set a file extension according to the class name: | |
| 363 | - class_name = obj.class_name.lower() | |
| 364 | - if class_name.startswith('word'): | |
| 365 | - ext = 'doc' | |
| 366 | - elif class_name.startswith('package'): | |
| 367 | - ext = 'package' | |
| 368 | - else: | |
| 369 | - ext = 'bin' | |
| 370 | - | |
| 371 | - fname = '%s_object_%03d.%s' % (fname_prefix, index, ext) | |
| 372 | - print ('saving to file %s' % fname) | |
| 373 | - open(fname, 'wb').write(obj.data) | |
| 374 | - if obj.class_name.lower() == 'package': | |
| 357 | + if stream[-1] == '\x01Ole10Native': | |
| 358 | + objdata = ole.openstream(stream).read() | |
| 359 | + stream_path = '/'.join(stream) | |
| 360 | + log.debug('Checking stream %r' % stream_path) | |
| 361 | + try: | |
| 362 | + print('extract file embedded in OLE object from stream %r:' % stream_path) | |
| 375 | 363 | print ('Parsing OLE Package') |
| 376 | - opkg = OleNativeStream(bindata=obj.data) | |
| 364 | + opkg = OleNativeStream(bindata=objdata) | |
| 377 | 365 | print ('Filename = %r' % opkg.filename) |
| 378 | 366 | print ('Source path = %r' % opkg.src_path) |
| 379 | 367 | print ('Temp path = %r' % opkg.temp_path) |
| ... | ... | @@ -384,9 +372,9 @@ def process_file(container, filename, data, output_dir=None): |
| 384 | 372 | fname = '%s_object_%03d.noname' % (fname_prefix, index) |
| 385 | 373 | print ('saving to file %s' % fname) |
| 386 | 374 | open(fname, 'wb').write(opkg.data) |
| 387 | - index += 1 | |
| 388 | - except: | |
| 389 | - log.debug('*** Not an OLE 1.0 Object') | |
| 375 | + index += 1 | |
| 376 | + except: | |
| 377 | + log.debug('*** Not an OLE 1.0 Object') | |
| 390 | 378 | |
| 391 | 379 | |
| 392 | 380 | ... | ... |
oletools/rtfobj.py
| ... | ... | @@ -64,8 +64,9 @@ http://www.decalage.info/python/oletools |
| 64 | 64 | # 2016-08-08 PL: - added option -s to save objects to files |
| 65 | 65 | # 2016-08-09 PL: - fixed issue #78, improved regex |
| 66 | 66 | # 2016-09-06 PL: - fixed issue #83, backward compatible API |
| 67 | +# 2016-11-17 v0.51 PL: - updated call to oleobj.OleNativeStream | |
| 67 | 68 | |
| 68 | -__version__ = '0.50' | |
| 69 | +__version__ = '0.51' | |
| 69 | 70 | |
| 70 | 71 | # ------------------------------------------------------------------------------ |
| 71 | 72 | # TODO: |
| ... | ... | @@ -552,7 +553,7 @@ class RtfObjParser(RtfParser): |
| 552 | 553 | rtfobj.oledata = obj.data |
| 553 | 554 | rtfobj.is_ole = True |
| 554 | 555 | if obj.class_name.lower() == 'package': |
| 555 | - opkg = OleNativeStream(bindata=obj.data) | |
| 556 | + opkg = OleNativeStream(bindata=obj.data, package=True) | |
| 556 | 557 | rtfobj.filename = opkg.filename |
| 557 | 558 | rtfobj.src_path = opkg.src_path |
| 558 | 559 | rtfobj.temp_path = opkg.temp_path | ... | ... |