From cda797574d2076115cc8547c9ccc74aa5664a991 Mon Sep 17 00:00:00 2001 From: Philippe Lagadec Date: Thu, 19 Mar 2015 08:49:56 +0100 Subject: [PATCH] changed line endings from CRLF to LF in all scripts to improve Linux/Unix compatibility --- oletools/ezhexviewer.py | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------- oletools/olebrowse.py | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------- oletools/oleid.py | 592 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- oletools/olemeta.py | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------- oletools/oletimes.py | 198 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------- oletools/pyxswf.py | 282 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------------------------------- oletools/rtfobj.py | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------- 7 files changed, 961 insertions(+), 961 deletions(-) diff --git a/oletools/ezhexviewer.py b/oletools/ezhexviewer.py index af158d1..094722d 100644 --- a/oletools/ezhexviewer.py +++ b/oletools/ezhexviewer.py @@ -1,105 +1,105 @@ -#!/usr/bin/env python -""" -ezhexviewer.py - -A simple hexadecimal viewer based on easygui. It should work on any platform -with Python 2.x. - -Usage: ezhexviewer.py [file] - -Usage in a python application: - - import ezhexviewer - ezhexviewer.hexview_file(filename) - ezhexviewer.hexview_data(data) - - -ezhexviewer project website: http://www.decalage.info/python/ezhexviewer - -ezhexviewer is copyright (c) 2012, Philippe Lagadec (http://www.decalage.info) -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" - -__version__ = '0.01' - -#------------------------------------------------------------------------------ -# CHANGELOG: -# 2012-09-17 v0.01 PL: - first version -# 2012-10-04 v0.02 PL: - added license - -#------------------------------------------------------------------------------ -# TODO: -# + options to set title and msg - - -from thirdparty.easygui import easygui -import sys - -#------------------------------------------------------------------------------ -# The following code (hexdump3 only) is a modified version of the hex dumper -# recipe published on ASPN by Sebastien Keim and Raymond Hattinger under the -# PSF license. I added the startindex parameter. -# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812 -# PSF license: http://docs.python.org/license.html -# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved - -FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) - -def hexdump3(src, length=8, startindex=0): - """ - Returns a hexadecimal dump of a binary string. - length: number of bytes per row. - startindex: index of 1st byte. - """ - result=[] - for i in xrange(0, len(src), length): - s = src[i:i+length] - hexa = ' '.join(["%02X"%ord(x) for x in s]) - printable = s.translate(FILTER) - result.append("%08X %-*s %s\n" % (i+startindex, length*3, hexa, printable)) - return ''.join(result) - -# end of PSF-licensed code. -#------------------------------------------------------------------------------ - - -def hexview_data (data, msg='', title='ezhexviewer', length=16, startindex=0): - hex = hexdump3(data, length=length, startindex=startindex) - easygui.codebox(msg=msg, title=title, text=hex) - - -def hexview_file (filename, msg='', title='ezhexviewer', length=16, startindex=0): - data = open(filename, 'rb').read() - hexview_data(data, msg=msg, title=title, length=length, startindex=startindex) - - -if __name__ == '__main__': - try: - filename = sys.argv[1] - except: - filename = easygui.fileopenbox() - if filename: - try: - hexview_file(filename, msg='File: %s' % filename) - except: - easygui.exceptionbox(msg='Error:', title='ezhexviewer') +#!/usr/bin/env python +""" +ezhexviewer.py + +A simple hexadecimal viewer based on easygui. It should work on any platform +with Python 2.x. + +Usage: ezhexviewer.py [file] + +Usage in a python application: + + import ezhexviewer + ezhexviewer.hexview_file(filename) + ezhexviewer.hexview_data(data) + + +ezhexviewer project website: http://www.decalage.info/python/ezhexviewer + +ezhexviewer is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +__version__ = '0.02' + +#------------------------------------------------------------------------------ +# CHANGELOG: +# 2012-09-17 v0.01 PL: - first version +# 2012-10-04 v0.02 PL: - added license + +#------------------------------------------------------------------------------ +# TODO: +# + options to set title and msg + + +from thirdparty.easygui import easygui +import sys + +#------------------------------------------------------------------------------ +# The following code (hexdump3 only) is a modified version of the hex dumper +# recipe published on ASPN by Sebastien Keim and Raymond Hattinger under the +# PSF license. I added the startindex parameter. +# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812 +# PSF license: http://docs.python.org/license.html +# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved + +FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) + +def hexdump3(src, length=8, startindex=0): + """ + Returns a hexadecimal dump of a binary string. + length: number of bytes per row. + startindex: index of 1st byte. + """ + result=[] + for i in xrange(0, len(src), length): + s = src[i:i+length] + hexa = ' '.join(["%02X"%ord(x) for x in s]) + printable = s.translate(FILTER) + result.append("%08X %-*s %s\n" % (i+startindex, length*3, hexa, printable)) + return ''.join(result) + +# end of PSF-licensed code. +#------------------------------------------------------------------------------ + + +def hexview_data (data, msg='', title='ezhexviewer', length=16, startindex=0): + hex = hexdump3(data, length=length, startindex=startindex) + easygui.codebox(msg=msg, title=title, text=hex) + + +def hexview_file (filename, msg='', title='ezhexviewer', length=16, startindex=0): + data = open(filename, 'rb').read() + hexview_data(data, msg=msg, title=title, length=length, startindex=startindex) + + +if __name__ == '__main__': + try: + filename = sys.argv[1] + except: + filename = easygui.fileopenbox() + if filename: + try: + hexview_file(filename, msg='File: %s' % filename) + except: + easygui.exceptionbox(msg='Error:', title='ezhexviewer') diff --git a/oletools/olebrowse.py b/oletools/olebrowse.py index 6f4a63a..ba2e3dc 100644 --- a/oletools/olebrowse.py +++ b/oletools/olebrowse.py @@ -1,142 +1,142 @@ -#!/usr/bin/env python -""" -olebrowse.py - -A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to -view and extract individual data streams. - -Usage: olebrowse.py [file] - -olebrowse project website: http://www.decalage.info/python/olebrowse - -olebrowse is part of the python-oletools package: -http://www.decalage.info/python/oletools - -olebrowse is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info) -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" - -__version__ = '0.02' - -#------------------------------------------------------------------------------ -# CHANGELOG: -# 2012-09-17 v0.01 PL: - first version -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL - -#------------------------------------------------------------------------------ -# TODO: -# - menu option to open another file -# - menu option to display properties -# - menu option to run other oletools, external tools such as OfficeCat? -# - for a stream, display info: size, path, etc -# - stream info: magic, entropy, ... ? - -import optparse, sys, os -from thirdparty.easygui import easygui -import thirdparty.olefile as olefile -import ezhexviewer - -ABOUT = '~ About olebrowse' -QUIT = '~ Quit' - - -def about (): - """ - Display information about this tool - """ - easygui.textbox(title='About olebrowse', text=__doc__) - - -def browse_stream (ole, stream): - """ - Browse a stream (hex view or save to file) - """ - #print 'stream:', stream - while True: - msg ='Select an action for the stream "%s", or press Esc to exit' % repr(stream) - actions = [ - 'Hex view', -## 'Text view', -## 'Repr view', - 'Save stream to file', - '~ Back to main menu', - ] - action = easygui.choicebox(msg, title='olebrowse', choices=actions) - if action is None or 'Back' in action: - break - elif action.startswith('Hex'): - data = ole.openstream(stream).getvalue() - ezhexviewer.hexview_data(data, msg='Stream: %s' % stream, title='olebrowse') -## elif action.startswith('Text'): -## data = ole.openstream(stream).getvalue() -## easygui.codebox(title='Text view - %s' % stream, text=data) -## elif action.startswith('Repr'): -## data = ole.openstream(stream).getvalue() -## easygui.codebox(title='Repr view - %s' % stream, text=repr(data)) - elif action.startswith('Save'): - data = ole.openstream(stream).getvalue() - fname = easygui.filesavebox(default='stream.bin') - if fname is not None: - f = open(fname, 'wb') - f.write(data) - f.close() - easygui.msgbox('stream saved to file %s' % fname) - - - -def main(): - """ - Main function - """ - try: - filename = sys.argv[1] - except: - filename = easygui.fileopenbox() - try: - ole = olefile.OleFileIO(filename) - listdir = ole.listdir() - streams = [] - for direntry in listdir: - #print direntry - streams.append('/'.join(direntry)) - streams.append(ABOUT) - streams.append(QUIT) - stream = True - while stream is not None: - msg ="Select a stream, or press Esc to exit" - title = "olebrowse" - stream = easygui.choicebox(msg, title, streams) - if stream is None or stream == QUIT: - break - if stream == ABOUT: - about() - else: - browse_stream(ole, stream) - except: - easygui.exceptionbox() - - - - -if __name__ == '__main__': - main() +#!/usr/bin/env python +""" +olebrowse.py + +A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to +view and extract individual data streams. + +Usage: olebrowse.py [file] + +olebrowse project website: http://www.decalage.info/python/olebrowse + +olebrowse is part of the python-oletools package: +http://www.decalage.info/python/oletools + +olebrowse is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +__version__ = '0.02' + +#------------------------------------------------------------------------------ +# CHANGELOG: +# 2012-09-17 v0.01 PL: - first version +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL + +#------------------------------------------------------------------------------ +# TODO: +# - menu option to open another file +# - menu option to display properties +# - menu option to run other oletools, external tools such as OfficeCat? +# - for a stream, display info: size, path, etc +# - stream info: magic, entropy, ... ? + +import optparse, sys, os +from thirdparty.easygui import easygui +import thirdparty.olefile as olefile +import ezhexviewer + +ABOUT = '~ About olebrowse' +QUIT = '~ Quit' + + +def about (): + """ + Display information about this tool + """ + easygui.textbox(title='About olebrowse', text=__doc__) + + +def browse_stream (ole, stream): + """ + Browse a stream (hex view or save to file) + """ + #print 'stream:', stream + while True: + msg ='Select an action for the stream "%s", or press Esc to exit' % repr(stream) + actions = [ + 'Hex view', +## 'Text view', +## 'Repr view', + 'Save stream to file', + '~ Back to main menu', + ] + action = easygui.choicebox(msg, title='olebrowse', choices=actions) + if action is None or 'Back' in action: + break + elif action.startswith('Hex'): + data = ole.openstream(stream).getvalue() + ezhexviewer.hexview_data(data, msg='Stream: %s' % stream, title='olebrowse') +## elif action.startswith('Text'): +## data = ole.openstream(stream).getvalue() +## easygui.codebox(title='Text view - %s' % stream, text=data) +## elif action.startswith('Repr'): +## data = ole.openstream(stream).getvalue() +## easygui.codebox(title='Repr view - %s' % stream, text=repr(data)) + elif action.startswith('Save'): + data = ole.openstream(stream).getvalue() + fname = easygui.filesavebox(default='stream.bin') + if fname is not None: + f = open(fname, 'wb') + f.write(data) + f.close() + easygui.msgbox('stream saved to file %s' % fname) + + + +def main(): + """ + Main function + """ + try: + filename = sys.argv[1] + except: + filename = easygui.fileopenbox() + try: + ole = olefile.OleFileIO(filename) + listdir = ole.listdir() + streams = [] + for direntry in listdir: + #print direntry + streams.append('/'.join(direntry)) + streams.append(ABOUT) + streams.append(QUIT) + stream = True + while stream is not None: + msg ="Select a stream, or press Esc to exit" + title = "olebrowse" + stream = easygui.choicebox(msg, title, streams) + if stream is None or stream == QUIT: + break + if stream == ABOUT: + about() + else: + browse_stream(ole, stream) + except: + easygui.exceptionbox() + + + + +if __name__ == '__main__': + main() diff --git a/oletools/oleid.py b/oletools/oleid.py index 8f63381..6e88736 100644 --- a/oletools/oleid.py +++ b/oletools/oleid.py @@ -1,296 +1,296 @@ -#!/usr/bin/env python -""" -oleid.py - -oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, -Excel), to detect specific characteristics that could potentially indicate that -the file is suspicious or malicious, in terms of security (e.g. malware). -For example it can detect VBA macros, embedded Flash objects, fragmentation. -The results can be displayed or returned as XML for further processing. - -Usage: oleid.py - -oleid project website: http://www.decalage.info/python/oleid - -oleid is part of the python-oletools package: -http://www.decalage.info/python/oletools -""" - -#=== LICENSE ================================================================= - -# oleid is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -#------------------------------------------------------------------------------ -# CHANGELOG: -# 2012-10-29 v0.01 PL: - first version -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL -# - improved usage display with -h -# 2014-11-30 v0.03 PL: - improved output with prettytable - -__version__ = '0.03' - - -#------------------------------------------------------------------------------ -# TODO: -# + extract relevant metadata: codepage, author, application, timestamps, etc -# - detect RTF and OpenXML -# - fragmentation -# - OLE package -# - entropy -# - detect PE header? -# - detect NOPs? -# - list type of each object in object pool? -# - criticality for each indicator?: info, low, medium, high -# - support wildcards with glob? -# - verbose option -# - csv, xml output - - -#=== IMPORTS ================================================================= - -import optparse, sys, os, re, zlib, struct -import thirdparty.olefile as olefile -from thirdparty.prettytable import prettytable - - -#=== FUNCTIONS =============================================================== - -def detect_flash (data): - """ - Detect Flash objects (SWF files) within a binary string of data - return a list of (start_index, length, compressed) tuples, or [] if nothing - found. - - Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked) - http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html - """ - #TODO: report - found = [] - for match in re.finditer('CWS|FWS', data): - start = match.start() - if start+8 > len(data): - # header size larger than remaining data, this is not a SWF - continue - #TODO: one struct.unpack should be simpler - # Read Header - header = data[start:start+3] - # Read Version - ver = struct.unpack(' 20: - continue - # Read SWF Size - size = struct.unpack(' len(data) or size < 1024: - # declared size larger than remaining data, this is not a SWF - # or declared size too small for a usual SWF - continue - # Read SWF into buffer. If compressed read uncompressed size. - swf = data[start:start+size] - compressed = False - if 'CWS' in header: - compressed = True - # compressed SWF: data after header (8 bytes) until the end is - # compressed with zlib. Attempt to decompress it to check if it is - # valid - compressed_data = swf[8:] - try: - zlib.decompress(compressed_data) - except: - continue - # else we don't check anything at this stage, we only assume it is a - # valid SWF. So there might be false positives for uncompressed SWF. - found.append((start, size, compressed)) - #print 'Found SWF start=%x, length=%d' % (start, size) - return found - - -#=== CLASSES ================================================================= - -class Indicator (object): - - def __init__(self, _id, value=None, _type=bool, name=None, description=None): - self.id = _id - self.value = value - self.type = _type - self.name = name - if name == None: - self.name = _id - self.description = description - - -class OleID: - - def __init__(self, filename): - self.filename = filename - self.indicators = [] - - def check(self): - # check if it is actually an OLE file: - oleformat = Indicator('ole_format', True, name='OLE format') - self.indicators.append(oleformat) - if not olefile.isOleFile(self.filename): - oleformat.value = False - return self.indicators - # parse file: - self.ole = olefile.OleFileIO(self.filename) - # checks: - self.check_properties() - self.check_encrypted() - self.check_word() - self.check_excel() - self.check_powerpoint() - self.check_visio() - self.check_ObjectPool() - self.check_flash() - self.ole.close() - return self.indicators - - def check_properties (self): - suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream') - self.indicators.append(suminfo) - appname = Indicator('appname', 'unknown', _type=str, name='Application name') - self.indicators.append(appname) - self.suminfo = {} - # check stream SummaryInformation - if self.ole.exists("\x05SummaryInformation"): - suminfo.value = True - self.suminfo = self.ole.getproperties("\x05SummaryInformation") - # check application name: - appname.value = self.suminfo.get(0x12, 'unknown') - - def check_encrypted (self): - # we keep the pointer to the indicator, can be modified by other checks: - self.encrypted = Indicator('encrypted', False, name='Encrypted') - self.indicators.append(self.encrypted) - # check if bit 1 of security field = 1: - # (this field may be missing for Powerpoint2000, for example) - if 0x13 in self.suminfo: - if self.suminfo[0x13] & 1: - self.encrypted.value = True - - def check_word (self): - word = Indicator('word', False, name='Word Document', - description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.') - self.indicators.append(word) - self.macros = Indicator('vba_macros', False, name='VBA Macros') - self.indicators.append(self.macros) - if self.ole.exists('WordDocument'): - word.value = True - # check for Word-specific encryption flag: - s = self.ole.openstream(["WordDocument"]) - # pass header 10 bytes - s.read(10) - # read flag structure: - temp16 = struct.unpack("H", s.read(2))[0] - fEncrypted = (temp16 & 0x0100) >> 8 - if fEncrypted: - self.encrypted.value = True - s.close() - # check for VBA macros: - if self.ole.exists('Macros'): - self.macros.value = True - - def check_excel (self): - excel = Indicator('excel', False, name='Excel Workbook', - description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.') - self.indicators.append(excel) - #self.macros = Indicator('vba_macros', False, name='VBA Macros') - #self.indicators.append(self.macros) - if self.ole.exists('Workbook') or self.ole.exists('Book'): - excel.value = True - # check for VBA macros: - if self.ole.exists('_VBA_PROJECT_CUR'): - self.macros.value = True - - def check_powerpoint (self): - ppt = Indicator('ppt', False, name='PowerPoint Presentation', - description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.') - self.indicators.append(ppt) - if self.ole.exists('PowerPoint Document'): - ppt.value = True - - def check_visio (self): - visio = Indicator('visio', False, name='Visio Drawing', - description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.') - self.indicators.append(visio) - if self.ole.exists('VisioDocument'): - visio.value = True - - def check_ObjectPool (self): - objpool = Indicator('ObjectPool', False, name='ObjectPool', - description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.') - self.indicators.append(objpool) - if self.ole.exists('ObjectPool'): - objpool.value = True - - - def check_flash (self): - flash = Indicator('flash', 0, _type=int, name='Flash objects', - description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.') - self.indicators.append(flash) - for stream in self.ole.listdir(): - data = self.ole.openstream(stream).read() - found = detect_flash(data) - # just add to the count of Flash objects: - flash.value += len(found) - #print stream, found - - -#=== MAIN ================================================================= - -def main(): - usage = 'usage: %prog [options] ' - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) -## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') - - (options, args) = parser.parse_args() - - # Print help if no argurments are passed - if len(args) == 0: - parser.print_help() - return - - for filename in args: - print '\nFilename:', filename - oleid = OleID(filename) - indicators = oleid.check() - - #TODO: add description - #TODO: highlight suspicious indicators - t = prettytable.PrettyTable(['Indicator', 'Value']) - t.align = 'l' - t.max_width = 39 - #t.border = False - - for indicator in indicators: - #print '%s: %s' % (indicator.name, indicator.value) - t.add_row((indicator.name, indicator.value)) - - print t - -if __name__ == '__main__': - main() +#!/usr/bin/env python +""" +oleid.py + +oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, +Excel), to detect specific characteristics that could potentially indicate that +the file is suspicious or malicious, in terms of security (e.g. malware). +For example it can detect VBA macros, embedded Flash objects, fragmentation. +The results can be displayed or returned as XML for further processing. + +Usage: oleid.py + +oleid project website: http://www.decalage.info/python/oleid + +oleid is part of the python-oletools package: +http://www.decalage.info/python/oletools +""" + +#=== LICENSE ================================================================= + +# oleid is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#------------------------------------------------------------------------------ +# CHANGELOG: +# 2012-10-29 v0.01 PL: - first version +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL +# - improved usage display with -h +# 2014-11-30 v0.03 PL: - improved output with prettytable + +__version__ = '0.03' + + +#------------------------------------------------------------------------------ +# TODO: +# + extract relevant metadata: codepage, author, application, timestamps, etc +# - detect RTF and OpenXML +# - fragmentation +# - OLE package +# - entropy +# - detect PE header? +# - detect NOPs? +# - list type of each object in object pool? +# - criticality for each indicator?: info, low, medium, high +# - support wildcards with glob? +# - verbose option +# - csv, xml output + + +#=== IMPORTS ================================================================= + +import optparse, sys, os, re, zlib, struct +import thirdparty.olefile as olefile +from thirdparty.prettytable import prettytable + + +#=== FUNCTIONS =============================================================== + +def detect_flash (data): + """ + Detect Flash objects (SWF files) within a binary string of data + return a list of (start_index, length, compressed) tuples, or [] if nothing + found. + + Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked) + http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html + """ + #TODO: report + found = [] + for match in re.finditer('CWS|FWS', data): + start = match.start() + if start+8 > len(data): + # header size larger than remaining data, this is not a SWF + continue + #TODO: one struct.unpack should be simpler + # Read Header + header = data[start:start+3] + # Read Version + ver = struct.unpack(' 20: + continue + # Read SWF Size + size = struct.unpack(' len(data) or size < 1024: + # declared size larger than remaining data, this is not a SWF + # or declared size too small for a usual SWF + continue + # Read SWF into buffer. If compressed read uncompressed size. + swf = data[start:start+size] + compressed = False + if 'CWS' in header: + compressed = True + # compressed SWF: data after header (8 bytes) until the end is + # compressed with zlib. Attempt to decompress it to check if it is + # valid + compressed_data = swf[8:] + try: + zlib.decompress(compressed_data) + except: + continue + # else we don't check anything at this stage, we only assume it is a + # valid SWF. So there might be false positives for uncompressed SWF. + found.append((start, size, compressed)) + #print 'Found SWF start=%x, length=%d' % (start, size) + return found + + +#=== CLASSES ================================================================= + +class Indicator (object): + + def __init__(self, _id, value=None, _type=bool, name=None, description=None): + self.id = _id + self.value = value + self.type = _type + self.name = name + if name == None: + self.name = _id + self.description = description + + +class OleID: + + def __init__(self, filename): + self.filename = filename + self.indicators = [] + + def check(self): + # check if it is actually an OLE file: + oleformat = Indicator('ole_format', True, name='OLE format') + self.indicators.append(oleformat) + if not olefile.isOleFile(self.filename): + oleformat.value = False + return self.indicators + # parse file: + self.ole = olefile.OleFileIO(self.filename) + # checks: + self.check_properties() + self.check_encrypted() + self.check_word() + self.check_excel() + self.check_powerpoint() + self.check_visio() + self.check_ObjectPool() + self.check_flash() + self.ole.close() + return self.indicators + + def check_properties (self): + suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream') + self.indicators.append(suminfo) + appname = Indicator('appname', 'unknown', _type=str, name='Application name') + self.indicators.append(appname) + self.suminfo = {} + # check stream SummaryInformation + if self.ole.exists("\x05SummaryInformation"): + suminfo.value = True + self.suminfo = self.ole.getproperties("\x05SummaryInformation") + # check application name: + appname.value = self.suminfo.get(0x12, 'unknown') + + def check_encrypted (self): + # we keep the pointer to the indicator, can be modified by other checks: + self.encrypted = Indicator('encrypted', False, name='Encrypted') + self.indicators.append(self.encrypted) + # check if bit 1 of security field = 1: + # (this field may be missing for Powerpoint2000, for example) + if 0x13 in self.suminfo: + if self.suminfo[0x13] & 1: + self.encrypted.value = True + + def check_word (self): + word = Indicator('word', False, name='Word Document', + description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.') + self.indicators.append(word) + self.macros = Indicator('vba_macros', False, name='VBA Macros') + self.indicators.append(self.macros) + if self.ole.exists('WordDocument'): + word.value = True + # check for Word-specific encryption flag: + s = self.ole.openstream(["WordDocument"]) + # pass header 10 bytes + s.read(10) + # read flag structure: + temp16 = struct.unpack("H", s.read(2))[0] + fEncrypted = (temp16 & 0x0100) >> 8 + if fEncrypted: + self.encrypted.value = True + s.close() + # check for VBA macros: + if self.ole.exists('Macros'): + self.macros.value = True + + def check_excel (self): + excel = Indicator('excel', False, name='Excel Workbook', + description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.') + self.indicators.append(excel) + #self.macros = Indicator('vba_macros', False, name='VBA Macros') + #self.indicators.append(self.macros) + if self.ole.exists('Workbook') or self.ole.exists('Book'): + excel.value = True + # check for VBA macros: + if self.ole.exists('_VBA_PROJECT_CUR'): + self.macros.value = True + + def check_powerpoint (self): + ppt = Indicator('ppt', False, name='PowerPoint Presentation', + description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.') + self.indicators.append(ppt) + if self.ole.exists('PowerPoint Document'): + ppt.value = True + + def check_visio (self): + visio = Indicator('visio', False, name='Visio Drawing', + description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.') + self.indicators.append(visio) + if self.ole.exists('VisioDocument'): + visio.value = True + + def check_ObjectPool (self): + objpool = Indicator('ObjectPool', False, name='ObjectPool', + description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.') + self.indicators.append(objpool) + if self.ole.exists('ObjectPool'): + objpool.value = True + + + def check_flash (self): + flash = Indicator('flash', 0, _type=int, name='Flash objects', + description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.') + self.indicators.append(flash) + for stream in self.ole.listdir(): + data = self.ole.openstream(stream).read() + found = detect_flash(data) + # just add to the count of Flash objects: + flash.value += len(found) + #print stream, found + + +#=== MAIN ================================================================= + +def main(): + usage = 'usage: %prog [options] ' + parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) +## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') + + (options, args) = parser.parse_args() + + # Print help if no argurments are passed + if len(args) == 0: + parser.print_help() + return + + for filename in args: + print '\nFilename:', filename + oleid = OleID(filename) + indicators = oleid.check() + + #TODO: add description + #TODO: highlight suspicious indicators + t = prettytable.PrettyTable(['Indicator', 'Value']) + t.align = 'l' + t.max_width = 39 + #t.border = False + + for indicator in indicators: + #print '%s: %s' % (indicator.name, indicator.value) + t.add_row((indicator.name, indicator.value)) + + print t + +if __name__ == '__main__': + main() diff --git a/oletools/olemeta.py b/oletools/olemeta.py index 1729976..1240a98 100644 --- a/oletools/olemeta.py +++ b/oletools/olemeta.py @@ -1,73 +1,73 @@ -#!/usr/bin/env python -""" -olemeta.py - -olemeta is a script to parse OLE files such as MS Office documents (e.g. Word, -Excel), to extract all standard properties present in the OLE file. - -Usage: olemeta.py - -olemeta project website: http://www.decalage.info/python/olemeta - -olemeta is part of the python-oletools package: -http://www.decalage.info/python/oletools -""" - -#=== LICENSE ================================================================= - -# olemeta is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#------------------------------------------------------------------------------ -# CHANGELOG: -# 2013-07-24 v0.01 PL: - first version -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL -# - improved usage display - -__version__ = '0.02' - -#------------------------------------------------------------------------------ -# TODO: -# + optparse -# + nicer output: table with fixed columns, datetime, etc -# + CSV output -# + option to only show available properties (by default) - -#=== IMPORTS ================================================================= - -import sys -import thirdparty.olefile as olefile - - -#=== MAIN ================================================================= - -try: - ole = olefile.OleFileIO(sys.argv[1]) -except IndexError: - sys.exit(__doc__) - -# parse and display metadata: -meta = ole.get_metadata() -meta.dump() - -ole.close() +#!/usr/bin/env python +""" +olemeta.py + +olemeta is a script to parse OLE files such as MS Office documents (e.g. Word, +Excel), to extract all standard properties present in the OLE file. + +Usage: olemeta.py + +olemeta project website: http://www.decalage.info/python/olemeta + +olemeta is part of the python-oletools package: +http://www.decalage.info/python/oletools +""" + +#=== LICENSE ================================================================= + +# olemeta is copyright (c) 2013-2015, Philippe Lagadec (http://www.decalage.info) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#------------------------------------------------------------------------------ +# CHANGELOG: +# 2013-07-24 v0.01 PL: - first version +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL +# - improved usage display + +__version__ = '0.02' + +#------------------------------------------------------------------------------ +# TODO: +# + optparse +# + nicer output: table with fixed columns, datetime, etc +# + CSV output +# + option to only show available properties (by default) + +#=== IMPORTS ================================================================= + +import sys +import thirdparty.olefile as olefile + + +#=== MAIN ================================================================= + +try: + ole = olefile.OleFileIO(sys.argv[1]) +except IndexError: + sys.exit(__doc__) + +# parse and display metadata: +meta = ole.get_metadata() +meta.dump() + +ole.close() diff --git a/oletools/oletimes.py b/oletools/oletimes.py index 67e4841..3321e89 100644 --- a/oletools/oletimes.py +++ b/oletools/oletimes.py @@ -1,99 +1,99 @@ -#!/usr/bin/env python -""" -oletimes.py - -oletimes is a script to parse OLE files such as MS Office documents (e.g. Word, -Excel), to extract creation and modification times of all streams and storages -in the OLE file. - -Usage: oletimes.py - -oletimes project website: http://www.decalage.info/python/oletimes - -oletimes is part of the python-oletools package: -http://www.decalage.info/python/oletools -""" - -#=== LICENSE ================================================================= - -# oletimes is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -#------------------------------------------------------------------------------ -# CHANGELOG: -# 2013-07-24 v0.01 PL: - first version -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL -# - improved usage display -# 2014-11-30 v0.03 PL: - improved output with prettytable - -__version__ = '0.03' - -#------------------------------------------------------------------------------ -# TODO: -# + optparse -# + nicer output: table with fixed columns, datetime, etc -# + CSV output -# + option to only show available timestamps (by default?) - -#=== IMPORTS ================================================================= - -import sys, datetime -import thirdparty.olefile as olefile -from thirdparty.prettytable import prettytable - - -#=== MAIN ================================================================= - -try: - ole = olefile.OleFileIO(sys.argv[1]) -except IndexError: - sys.exit(__doc__) - -def dt2str (dt): - """ - Convert a datetime object to a string for display, without microseconds - - :param dt: datetime.datetime object, or None - :return: str, or None - """ - if dt is None: - return None - dt = dt.replace(microsecond = 0) - return str(dt) - -t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time']) -t.align = 'l' -t.max_width = 26 -#t.border = False - -#print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime()) -t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime()))) - -for obj in ole.listdir(streams=True, storages=True): - #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj)) - t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj)))) - -print t - -ole.close() +#!/usr/bin/env python +""" +oletimes.py + +oletimes is a script to parse OLE files such as MS Office documents (e.g. Word, +Excel), to extract creation and modification times of all streams and storages +in the OLE file. + +Usage: oletimes.py + +oletimes project website: http://www.decalage.info/python/oletimes + +oletimes is part of the python-oletools package: +http://www.decalage.info/python/oletools +""" + +#=== LICENSE ================================================================= + +# oletimes is copyright (c) 2013-2015, Philippe Lagadec (http://www.decalage.info) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#------------------------------------------------------------------------------ +# CHANGELOG: +# 2013-07-24 v0.01 PL: - first version +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL +# - improved usage display +# 2014-11-30 v0.03 PL: - improved output with prettytable + +__version__ = '0.03' + +#------------------------------------------------------------------------------ +# TODO: +# + optparse +# + nicer output: table with fixed columns, datetime, etc +# + CSV output +# + option to only show available timestamps (by default?) + +#=== IMPORTS ================================================================= + +import sys, datetime +import thirdparty.olefile as olefile +from thirdparty.prettytable import prettytable + + +#=== MAIN ================================================================= + +try: + ole = olefile.OleFileIO(sys.argv[1]) +except IndexError: + sys.exit(__doc__) + +def dt2str (dt): + """ + Convert a datetime object to a string for display, without microseconds + + :param dt: datetime.datetime object, or None + :return: str, or None + """ + if dt is None: + return None + dt = dt.replace(microsecond = 0) + return str(dt) + +t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time']) +t.align = 'l' +t.max_width = 26 +#t.border = False + +#print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime()) +t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime()))) + +for obj in ole.listdir(streams=True, storages=True): + #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj)) + t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj)))) + +print t + +ole.close() diff --git a/oletools/pyxswf.py b/oletools/pyxswf.py index ba21be4..9eb90d7 100644 --- a/oletools/pyxswf.py +++ b/oletools/pyxswf.py @@ -1,141 +1,141 @@ -#!/usr/bin/env python -""" -pyxswf.py - -pyxswf is a script to detect, extract and analyze Flash objects (SWF) that may -be embedded in files such as MS Office documents (e.g. Word, Excel), -which is especially useful for malware analysis. - -pyxswf is an extension to xxxswf.py published by Alexander Hanel on -http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html -Compared to xxxswf, it can extract streams from MS Office documents by parsing -their OLE structure properly (-o option), which is necessary when streams are -fragmented. -Stream fragmentation is a known obfuscation technique, as explained on -http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/ - -It can also extract Flash objects from RTF documents, by parsing embedded -objects encoded in hexadecimal format (-f option). - -pyxswf project website: http://www.decalage.info/python/pyxswf - -pyxswf is part of the python-oletools package: -http://www.decalage.info/python/oletools -""" - -#=== LICENSE ================================================================= - -# pyxswf is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#------------------------------------------------------------------------------ -# CHANGELOG: -# 2012-09-17 v0.01 PL: - first version -# 2012-11-09 v0.02 PL: - added RTF embedded objects extraction -# 2014-11-29 v0.03 PL: - use olefile instead of OleFileIO_PL -# - improved usage display with -h - -__version__ = '0.03' - -#------------------------------------------------------------------------------ -# TODO: -# + add support for LZMA-compressed flash files (ZWS header) -# references: http://blog.malwaretracker.com/2014/01/cve-2013-5331-evaded-av-by-using.html -# http://code.metager.de/source/xref/adobe/flash/crossbridge/tools/swf-info.py -# http://room32.dyndns.org/forums/showthread.php?766-SWFCompression -# sample code: http://room32.dyndns.org/SWFCompression.py -# - check if file is OLE -# - support -r - - -#=== IMPORTS ================================================================= - -import optparse, sys, os, rtfobj, StringIO -from thirdparty.xxxswf import xxxswf -import thirdparty.olefile as olefile - - -#=== MAIN ================================================================= - -def main(): - # Scenarios: - # Scan file for SWF(s) - # Scan file for SWF(s) and extract them - # Scan file for SWF(s) and scan them with Yara - # Scan file for SWF(s), extract them and scan with Yara - # Scan directory recursively for files that contain SWF(s) - # Scan directory recursively for files that contain SWF(s) and extract them - - usage = 'usage: %prog [options] ' - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) - parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed') - parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed') - parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed') - parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed') - parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)') - parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes') - parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib') - - parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') - parser.add_option('-f', '--rtf', action='store_true', dest='rtf', help='Parse an RTF file to look for SWF in each embedded object') - - - (options, args) = parser.parse_args() - - # Print help if no arguments are passed - if len(args) == 0: - parser.print_help() - return - - # OLE MODE: - if options.ole: - for filename in args: - ole = olefile.OleFileIO(filename) - for direntry in ole.direntries: - if direntry is not None and direntry.entry_type == olefile.STGTY_STREAM: - f = ole._open(direntry.isectStart, direntry.size) - # check if data contains the SWF magic: FWS or CWS - data = f.getvalue() - if 'FWS' in data or 'CWS' in data: - print 'OLE stream: %s' % repr(direntry.name) - # call xxxswf to scan or extract Flash files: - xxxswf.disneyland(f, direntry.name, options) - f.close() - ole.close() - - # RTF MODE: - elif options.rtf: - for filename in args: - for index, data in rtfobj.rtf_iter_objects(filename): - if 'FWS' in data or 'CWS' in data: - print 'RTF embedded object size %d at index %08X' % (len(data), index) - f = StringIO.StringIO(data) - name = 'RTF_embedded_object_%08X' % index - # call xxxswf to scan or extract Flash files: - xxxswf.disneyland(f, name, options) - - else: - xxxswf.main() - -if __name__ == '__main__': - main() +#!/usr/bin/env python +""" +pyxswf.py + +pyxswf is a script to detect, extract and analyze Flash objects (SWF) that may +be embedded in files such as MS Office documents (e.g. Word, Excel), +which is especially useful for malware analysis. + +pyxswf is an extension to xxxswf.py published by Alexander Hanel on +http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html +Compared to xxxswf, it can extract streams from MS Office documents by parsing +their OLE structure properly (-o option), which is necessary when streams are +fragmented. +Stream fragmentation is a known obfuscation technique, as explained on +http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/ + +It can also extract Flash objects from RTF documents, by parsing embedded +objects encoded in hexadecimal format (-f option). + +pyxswf project website: http://www.decalage.info/python/pyxswf + +pyxswf is part of the python-oletools package: +http://www.decalage.info/python/oletools +""" + +#=== LICENSE ================================================================= + +# pyxswf is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#------------------------------------------------------------------------------ +# CHANGELOG: +# 2012-09-17 v0.01 PL: - first version +# 2012-11-09 v0.02 PL: - added RTF embedded objects extraction +# 2014-11-29 v0.03 PL: - use olefile instead of OleFileIO_PL +# - improved usage display with -h + +__version__ = '0.03' + +#------------------------------------------------------------------------------ +# TODO: +# + add support for LZMA-compressed flash files (ZWS header) +# references: http://blog.malwaretracker.com/2014/01/cve-2013-5331-evaded-av-by-using.html +# http://code.metager.de/source/xref/adobe/flash/crossbridge/tools/swf-info.py +# http://room32.dyndns.org/forums/showthread.php?766-SWFCompression +# sample code: http://room32.dyndns.org/SWFCompression.py +# - check if file is OLE +# - support -r + + +#=== IMPORTS ================================================================= + +import optparse, sys, os, rtfobj, StringIO +from thirdparty.xxxswf import xxxswf +import thirdparty.olefile as olefile + + +#=== MAIN ================================================================= + +def main(): + # Scenarios: + # Scan file for SWF(s) + # Scan file for SWF(s) and extract them + # Scan file for SWF(s) and scan them with Yara + # Scan file for SWF(s), extract them and scan with Yara + # Scan directory recursively for files that contain SWF(s) + # Scan directory recursively for files that contain SWF(s) and extract them + + usage = 'usage: %prog [options] ' + parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) + parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed') + parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed') + parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed') + parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed') + parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)') + parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes') + parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib') + + parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') + parser.add_option('-f', '--rtf', action='store_true', dest='rtf', help='Parse an RTF file to look for SWF in each embedded object') + + + (options, args) = parser.parse_args() + + # Print help if no arguments are passed + if len(args) == 0: + parser.print_help() + return + + # OLE MODE: + if options.ole: + for filename in args: + ole = olefile.OleFileIO(filename) + for direntry in ole.direntries: + if direntry is not None and direntry.entry_type == olefile.STGTY_STREAM: + f = ole._open(direntry.isectStart, direntry.size) + # check if data contains the SWF magic: FWS or CWS + data = f.getvalue() + if 'FWS' in data or 'CWS' in data: + print 'OLE stream: %s' % repr(direntry.name) + # call xxxswf to scan or extract Flash files: + xxxswf.disneyland(f, direntry.name, options) + f.close() + ole.close() + + # RTF MODE: + elif options.rtf: + for filename in args: + for index, data in rtfobj.rtf_iter_objects(filename): + if 'FWS' in data or 'CWS' in data: + print 'RTF embedded object size %d at index %08X' % (len(data), index) + f = StringIO.StringIO(data) + name = 'RTF_embedded_object_%08X' % index + # call xxxswf to scan or extract Flash files: + xxxswf.disneyland(f, name, options) + + else: + xxxswf.main() + +if __name__ == '__main__': + main() diff --git a/oletools/rtfobj.py b/oletools/rtfobj.py index 24f5580..0e882bb 100644 --- a/oletools/rtfobj.py +++ b/oletools/rtfobj.py @@ -1,105 +1,105 @@ -#!/usr/bin/env python -""" -rtfobj.py - Philippe Lagadec 2013-04-02 - -rtfobj is a Python module to extract embedded objects from RTF files, such as -OLE ojects. It can be used as a Python library or a command-line tool. - -Usage: rtfobj.py - -rtfobj project website: http://www.decalage.info/python/rtfobj - -rtfobj is part of the python-oletools package: -http://www.decalage.info/python/oletools -""" - -#=== LICENSE ================================================================= - -# rtfobj is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -#------------------------------------------------------------------------------ -# CHANGELOG: -# 2012-11-09 v0.01 PL: - first version -# 2013-04-02 v0.02 PL: - fixed bug in main - -__version__ = '0.02' - -#------------------------------------------------------------------------------ -# TODO: -# - improve regex pattern for better performance? -# - allow semicolon within hex, as found in this sample: -# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html - -#=== IMPORTS ================================================================= - -import re, sys, string, binascii - - -#=== CONSTANTS================================================================= - -# REGEX pattern to extract embedded OLE objects in hexadecimal format: -# alphanum digit: [0-9A-Fa-f] -# hex char = two alphanum digits: [0-9A-Fa-f]{2} -# several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,} -# at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* -PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' -# improved pattern, allowing semicolons within hex: -#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' - -# a dummy translation table for str.translate, which does not change anythying: -TRANSTABLE_NOCHANGE = string.maketrans('', '') - - -#=== FUNCTIONS ================================================================= - -def rtf_iter_objects (filename, min_size=32): - """ - Open a RTF file, extract each embedded object encoded in hexadecimal of - size > min_size, yield the index of the object in the RTF file and its data - in binary format. - This is an iterator. - """ - data = open(filename, 'rb').read() - for m in re.finditer(PATTERN, data): - found = m.group(0) - # remove all whitespace and line feeds: - #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE - found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') - found = binascii.unhexlify(found) - #print repr(found) - if len(found)>min_size: - yield m.start(), found - - -#=== MAIN ================================================================= - -if __name__ == '__main__': - if len(sys.argv)<2: - sys.exit(__doc__) - for index, data in rtf_iter_objects(sys.argv[1]): - print 'found object size %d at index %08X' % (len(data), index) - fname = 'object_%08X.bin' % index - print 'saving to file %s' % fname - open(fname, 'wb').write(data) +#!/usr/bin/env python +""" +rtfobj.py - Philippe Lagadec 2013-04-02 + +rtfobj is a Python module to extract embedded objects from RTF files, such as +OLE ojects. It can be used as a Python library or a command-line tool. + +Usage: rtfobj.py + +rtfobj project website: http://www.decalage.info/python/rtfobj + +rtfobj is part of the python-oletools package: +http://www.decalage.info/python/oletools +""" + +#=== LICENSE ================================================================= + +# rtfobj is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#------------------------------------------------------------------------------ +# CHANGELOG: +# 2012-11-09 v0.01 PL: - first version +# 2013-04-02 v0.02 PL: - fixed bug in main + +__version__ = '0.02' + +#------------------------------------------------------------------------------ +# TODO: +# - improve regex pattern for better performance? +# - allow semicolon within hex, as found in this sample: +# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html + +#=== IMPORTS ================================================================= + +import re, sys, string, binascii + + +#=== CONSTANTS================================================================= + +# REGEX pattern to extract embedded OLE objects in hexadecimal format: +# alphanum digit: [0-9A-Fa-f] +# hex char = two alphanum digits: [0-9A-Fa-f]{2} +# several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,} +# at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* +PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' +# improved pattern, allowing semicolons within hex: +#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' + +# a dummy translation table for str.translate, which does not change anythying: +TRANSTABLE_NOCHANGE = string.maketrans('', '') + + +#=== FUNCTIONS ================================================================= + +def rtf_iter_objects (filename, min_size=32): + """ + Open a RTF file, extract each embedded object encoded in hexadecimal of + size > min_size, yield the index of the object in the RTF file and its data + in binary format. + This is an iterator. + """ + data = open(filename, 'rb').read() + for m in re.finditer(PATTERN, data): + found = m.group(0) + # remove all whitespace and line feeds: + #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE + found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') + found = binascii.unhexlify(found) + #print repr(found) + if len(found)>min_size: + yield m.start(), found + + +#=== MAIN ================================================================= + +if __name__ == '__main__': + if len(sys.argv)<2: + sys.exit(__doc__) + for index, data in rtf_iter_objects(sys.argv[1]): + print 'found object size %d at index %08X' % (len(data), index) + fname = 'object_%08X.bin' % index + print 'saving to file %s' % fname + open(fname, 'wb').write(data) -- libgit2 0.21.4