Commit b8c80db73981adc32332f076a07b0fbbfcb6a181

Authored by decalage2
1 parent bde819d8

oleobj: fixed to extract OLE objects embedded into MS Office OLE files

oletools/oleobj.py
@@ -45,8 +45,9 @@ http://www.decalage.info/python/oletools @@ -45,8 +45,9 @@ http://www.decalage.info/python/oletools
45 # 2016-06 PL: - added main and process_file (not working yet) 45 # 2016-06 PL: - added main and process_file (not working yet)
46 # 2016-07-18 v0.48 SL: - added Python 3.5 support 46 # 2016-07-18 v0.48 SL: - added Python 3.5 support
47 # 2016-07-19 PL: - fixed Python 2.6-7 support 47 # 2016-07-19 PL: - fixed Python 2.6-7 support
  48 +# 2016-11-17 v0.51 PL: - fixed OLE native object extraction
48 49
49 -__version__ = '0.48' 50 +__version__ = '0.51'
50 51
51 #------------------------------------------------------------------------------ 52 #------------------------------------------------------------------------------
52 # TODO: 53 # TODO:
@@ -198,12 +199,13 @@ class OleNativeStream (object): @@ -198,12 +199,13 @@ class OleNativeStream (object):
198 TYPE_EMBEDDED = 0x02 199 TYPE_EMBEDDED = 0x02
199 200
200 201
201 - def __init__(self, bindata=None): 202 + def __init__(self, bindata=None, package=False):
202 """ 203 """
203 Constructor for OleNativeStream. 204 Constructor for OleNativeStream.
204 If bindata is provided, it will be parsed using the parse() method. 205 If bindata is provided, it will be parsed using the parse() method.
205 206
206 :param bindata: bytes, OLENativeStream structure containing an OLE object 207 :param bindata: bytes, OLENativeStream structure containing an OLE object
  208 + :param package: bool, set to True when extracting from an OLE Package object
207 """ 209 """
208 self.filename = None 210 self.filename = None
209 self.src_path = None 211 self.src_path = None
@@ -213,6 +215,7 @@ class OleNativeStream (object): @@ -213,6 +215,7 @@ class OleNativeStream (object):
213 self.temp_path = None 215 self.temp_path = None
214 self.actual_size = None 216 self.actual_size = None
215 self.data = None 217 self.data = None
  218 + self.package = package
216 if bindata is not None: 219 if bindata is not None:
217 self.parse(data=bindata) 220 self.parse(data=bindata)
218 221
@@ -227,9 +230,11 @@ class OleNativeStream (object): @@ -227,9 +230,11 @@ class OleNativeStream (object):
227 """ 230 """
228 # TODO: strict mode to raise exceptions when values are incorrect 231 # TODO: strict mode to raise exceptions when values are incorrect
229 # (permissive mode by default) 232 # (permissive mode by default)
230 - # self.native_data_size = struct.unpack('<L', data[0:4])[0]  
231 - # data = data[4:]  
232 - # log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size)) 233 + # An OLE Package object does not have the native data size field
  234 + if not self.package:
  235 + self.native_data_size = struct.unpack('<L', data[0:4])[0]
  236 + data = data[4:]
  237 + log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size))
233 # I thought this might be an OLE type specifier ??? 238 # I thought this might be an OLE type specifier ???
234 self.unknown_short, data = read_uint16(data) 239 self.unknown_short, data = read_uint16(data)
235 self.filename, data = data.split(b'\x00', 1) 240 self.filename, data = data.split(b'\x00', 1)
@@ -349,31 +354,14 @@ def process_file(container, filename, data, output_dir=None): @@ -349,31 +354,14 @@ def process_file(container, filename, data, output_dir=None):
349 ole = olefile.OleFileIO(data) 354 ole = olefile.OleFileIO(data)
350 index = 1 355 index = 1
351 for stream in ole.listdir(): 356 for stream in ole.listdir():
352 - objdata = ole.openstream(stream).read()  
353 - stream_path = '/'.join(stream)  
354 - log.debug('Checking stream %r' % stream_path)  
355 - obj = OleObject()  
356 - try:  
357 - obj.parse(objdata)  
358 - print('extract file embedded in OLE object from stream %r:' % stream_path)  
359 - print('format_id = %d' % obj.format_id)  
360 - print('class name = %r' % obj.class_name)  
361 - print('data size = %d' % obj.data_size)  
362 - # set a file extension according to the class name:  
363 - class_name = obj.class_name.lower()  
364 - if class_name.startswith('word'):  
365 - ext = 'doc'  
366 - elif class_name.startswith('package'):  
367 - ext = 'package'  
368 - else:  
369 - ext = 'bin'  
370 -  
371 - fname = '%s_object_%03d.%s' % (fname_prefix, index, ext)  
372 - print ('saving to file %s' % fname)  
373 - open(fname, 'wb').write(obj.data)  
374 - if obj.class_name.lower() == 'package': 357 + if stream[-1] == '\x01Ole10Native':
  358 + objdata = ole.openstream(stream).read()
  359 + stream_path = '/'.join(stream)
  360 + log.debug('Checking stream %r' % stream_path)
  361 + try:
  362 + print('extract file embedded in OLE object from stream %r:' % stream_path)
375 print ('Parsing OLE Package') 363 print ('Parsing OLE Package')
376 - opkg = OleNativeStream(bindata=obj.data) 364 + opkg = OleNativeStream(bindata=objdata)
377 print ('Filename = %r' % opkg.filename) 365 print ('Filename = %r' % opkg.filename)
378 print ('Source path = %r' % opkg.src_path) 366 print ('Source path = %r' % opkg.src_path)
379 print ('Temp path = %r' % opkg.temp_path) 367 print ('Temp path = %r' % opkg.temp_path)
@@ -384,9 +372,9 @@ def process_file(container, filename, data, output_dir=None): @@ -384,9 +372,9 @@ def process_file(container, filename, data, output_dir=None):
384 fname = '%s_object_%03d.noname' % (fname_prefix, index) 372 fname = '%s_object_%03d.noname' % (fname_prefix, index)
385 print ('saving to file %s' % fname) 373 print ('saving to file %s' % fname)
386 open(fname, 'wb').write(opkg.data) 374 open(fname, 'wb').write(opkg.data)
387 - index += 1  
388 - except:  
389 - log.debug('*** Not an OLE 1.0 Object') 375 + index += 1
  376 + except:
  377 + log.debug('*** Not an OLE 1.0 Object')
390 378
391 379
392 380
oletools/rtfobj.py
@@ -64,8 +64,9 @@ http://www.decalage.info/python/oletools @@ -64,8 +64,9 @@ http://www.decalage.info/python/oletools
64 # 2016-08-08 PL: - added option -s to save objects to files 64 # 2016-08-08 PL: - added option -s to save objects to files
65 # 2016-08-09 PL: - fixed issue #78, improved regex 65 # 2016-08-09 PL: - fixed issue #78, improved regex
66 # 2016-09-06 PL: - fixed issue #83, backward compatible API 66 # 2016-09-06 PL: - fixed issue #83, backward compatible API
  67 +# 2016-11-17 v0.51 PL: - updated call to oleobj.OleNativeStream
67 68
68 -__version__ = '0.50' 69 +__version__ = '0.51'
69 70
70 # ------------------------------------------------------------------------------ 71 # ------------------------------------------------------------------------------
71 # TODO: 72 # TODO:
@@ -552,7 +553,7 @@ class RtfObjParser(RtfParser): @@ -552,7 +553,7 @@ class RtfObjParser(RtfParser):
552 rtfobj.oledata = obj.data 553 rtfobj.oledata = obj.data
553 rtfobj.is_ole = True 554 rtfobj.is_ole = True
554 if obj.class_name.lower() == 'package': 555 if obj.class_name.lower() == 'package':
555 - opkg = OleNativeStream(bindata=obj.data) 556 + opkg = OleNativeStream(bindata=obj.data, package=True)
556 rtfobj.filename = opkg.filename 557 rtfobj.filename = opkg.filename
557 rtfobj.src_path = opkg.src_path 558 rtfobj.src_path = opkg.src_path
558 rtfobj.temp_path = opkg.temp_path 559 rtfobj.temp_path = opkg.temp_path