Commit b8c80db73981adc32332f076a07b0fbbfcb6a181

Authored by decalage2
1 parent bde819d8

oleobj: fixed to extract OLE objects embedded into MS Office OLE files

oletools/oleobj.py
... ... @@ -45,8 +45,9 @@ http://www.decalage.info/python/oletools
45 45 # 2016-06 PL: - added main and process_file (not working yet)
46 46 # 2016-07-18 v0.48 SL: - added Python 3.5 support
47 47 # 2016-07-19 PL: - fixed Python 2.6-7 support
  48 +# 2016-11-17 v0.51 PL: - fixed OLE native object extraction
48 49  
49   -__version__ = '0.48'
  50 +__version__ = '0.51'
50 51  
51 52 #------------------------------------------------------------------------------
52 53 # TODO:
... ... @@ -198,12 +199,13 @@ class OleNativeStream (object):
198 199 TYPE_EMBEDDED = 0x02
199 200  
200 201  
201   - def __init__(self, bindata=None):
  202 + def __init__(self, bindata=None, package=False):
202 203 """
203 204 Constructor for OleNativeStream.
204 205 If bindata is provided, it will be parsed using the parse() method.
205 206  
206 207 :param bindata: bytes, OLENativeStream structure containing an OLE object
  208 + :param package: bool, set to True when extracting from an OLE Package object
207 209 """
208 210 self.filename = None
209 211 self.src_path = None
... ... @@ -213,6 +215,7 @@ class OleNativeStream (object):
213 215 self.temp_path = None
214 216 self.actual_size = None
215 217 self.data = None
  218 + self.package = package
216 219 if bindata is not None:
217 220 self.parse(data=bindata)
218 221  
... ... @@ -227,9 +230,11 @@ class OleNativeStream (object):
227 230 """
228 231 # TODO: strict mode to raise exceptions when values are incorrect
229 232 # (permissive mode by default)
230   - # self.native_data_size = struct.unpack('<L', data[0:4])[0]
231   - # data = data[4:]
232   - # log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size))
  233 + # An OLE Package object does not have the native data size field
  234 + if not self.package:
  235 + self.native_data_size = struct.unpack('<L', data[0:4])[0]
  236 + data = data[4:]
  237 + log.debug('OLE native data size = {0:08X} ({0} bytes)'.format(self.native_data_size))
233 238 # I thought this might be an OLE type specifier ???
234 239 self.unknown_short, data = read_uint16(data)
235 240 self.filename, data = data.split(b'\x00', 1)
... ... @@ -349,31 +354,14 @@ def process_file(container, filename, data, output_dir=None):
349 354 ole = olefile.OleFileIO(data)
350 355 index = 1
351 356 for stream in ole.listdir():
352   - objdata = ole.openstream(stream).read()
353   - stream_path = '/'.join(stream)
354   - log.debug('Checking stream %r' % stream_path)
355   - obj = OleObject()
356   - try:
357   - obj.parse(objdata)
358   - print('extract file embedded in OLE object from stream %r:' % stream_path)
359   - print('format_id = %d' % obj.format_id)
360   - print('class name = %r' % obj.class_name)
361   - print('data size = %d' % obj.data_size)
362   - # set a file extension according to the class name:
363   - class_name = obj.class_name.lower()
364   - if class_name.startswith('word'):
365   - ext = 'doc'
366   - elif class_name.startswith('package'):
367   - ext = 'package'
368   - else:
369   - ext = 'bin'
370   -
371   - fname = '%s_object_%03d.%s' % (fname_prefix, index, ext)
372   - print ('saving to file %s' % fname)
373   - open(fname, 'wb').write(obj.data)
374   - if obj.class_name.lower() == 'package':
  357 + if stream[-1] == '\x01Ole10Native':
  358 + objdata = ole.openstream(stream).read()
  359 + stream_path = '/'.join(stream)
  360 + log.debug('Checking stream %r' % stream_path)
  361 + try:
  362 + print('extract file embedded in OLE object from stream %r:' % stream_path)
375 363 print ('Parsing OLE Package')
376   - opkg = OleNativeStream(bindata=obj.data)
  364 + opkg = OleNativeStream(bindata=objdata)
377 365 print ('Filename = %r' % opkg.filename)
378 366 print ('Source path = %r' % opkg.src_path)
379 367 print ('Temp path = %r' % opkg.temp_path)
... ... @@ -384,9 +372,9 @@ def process_file(container, filename, data, output_dir=None):
384 372 fname = '%s_object_%03d.noname' % (fname_prefix, index)
385 373 print ('saving to file %s' % fname)
386 374 open(fname, 'wb').write(opkg.data)
387   - index += 1
388   - except:
389   - log.debug('*** Not an OLE 1.0 Object')
  375 + index += 1
  376 + except:
  377 + log.debug('*** Not an OLE 1.0 Object')
390 378  
391 379  
392 380  
... ...
oletools/rtfobj.py
... ... @@ -64,8 +64,9 @@ http://www.decalage.info/python/oletools
64 64 # 2016-08-08 PL: - added option -s to save objects to files
65 65 # 2016-08-09 PL: - fixed issue #78, improved regex
66 66 # 2016-09-06 PL: - fixed issue #83, backward compatible API
  67 +# 2016-11-17 v0.51 PL: - updated call to oleobj.OleNativeStream
67 68  
68   -__version__ = '0.50'
  69 +__version__ = '0.51'
69 70  
70 71 # ------------------------------------------------------------------------------
71 72 # TODO:
... ... @@ -552,7 +553,7 @@ class RtfObjParser(RtfParser):
552 553 rtfobj.oledata = obj.data
553 554 rtfobj.is_ole = True
554 555 if obj.class_name.lower() == 'package':
555   - opkg = OleNativeStream(bindata=obj.data)
  556 + opkg = OleNativeStream(bindata=obj.data, package=True)
556 557 rtfobj.filename = opkg.filename
557 558 rtfobj.src_path = opkg.src_path
558 559 rtfobj.temp_path = opkg.temp_path
... ...