Commit cda797574d2076115cc8547c9ccc74aa5664a991
1 parent
a4ffb743
changed line endings from CRLF to LF in all scripts to improve Linux/Unix compatibility
Showing
7 changed files
with
961 additions
and
961 deletions
oletools/ezhexviewer.py
| 1 | -#!/usr/bin/env python | |
| 2 | -""" | |
| 3 | -ezhexviewer.py | |
| 4 | - | |
| 5 | -A simple hexadecimal viewer based on easygui. It should work on any platform | |
| 6 | -with Python 2.x. | |
| 7 | - | |
| 8 | -Usage: ezhexviewer.py [file] | |
| 9 | - | |
| 10 | -Usage in a python application: | |
| 11 | - | |
| 12 | - import ezhexviewer | |
| 13 | - ezhexviewer.hexview_file(filename) | |
| 14 | - ezhexviewer.hexview_data(data) | |
| 15 | - | |
| 16 | - | |
| 17 | -ezhexviewer project website: http://www.decalage.info/python/ezhexviewer | |
| 18 | - | |
| 19 | -ezhexviewer is copyright (c) 2012, Philippe Lagadec (http://www.decalage.info) | |
| 20 | -All rights reserved. | |
| 21 | - | |
| 22 | -Redistribution and use in source and binary forms, with or without modification, | |
| 23 | -are permitted provided that the following conditions are met: | |
| 24 | - | |
| 25 | - * Redistributions of source code must retain the above copyright notice, this | |
| 26 | - list of conditions and the following disclaimer. | |
| 27 | - * Redistributions in binary form must reproduce the above copyright notice, | |
| 28 | - this list of conditions and the following disclaimer in the documentation | |
| 29 | - and/or other materials provided with the distribution. | |
| 30 | - | |
| 31 | -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 32 | -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 33 | -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 34 | -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 35 | -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 36 | -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 37 | -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 38 | -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 39 | -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 40 | -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 41 | -""" | |
| 42 | - | |
| 43 | -__version__ = '0.01' | |
| 44 | - | |
| 45 | -#------------------------------------------------------------------------------ | |
| 46 | -# CHANGELOG: | |
| 47 | -# 2012-09-17 v0.01 PL: - first version | |
| 48 | -# 2012-10-04 v0.02 PL: - added license | |
| 49 | - | |
| 50 | -#------------------------------------------------------------------------------ | |
| 51 | -# TODO: | |
| 52 | -# + options to set title and msg | |
| 53 | - | |
| 54 | - | |
| 55 | -from thirdparty.easygui import easygui | |
| 56 | -import sys | |
| 57 | - | |
| 58 | -#------------------------------------------------------------------------------ | |
| 59 | -# The following code (hexdump3 only) is a modified version of the hex dumper | |
| 60 | -# recipe published on ASPN by Sebastien Keim and Raymond Hattinger under the | |
| 61 | -# PSF license. I added the startindex parameter. | |
| 62 | -# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812 | |
| 63 | -# PSF license: http://docs.python.org/license.html | |
| 64 | -# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved | |
| 65 | - | |
| 66 | -FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) | |
| 67 | - | |
| 68 | -def hexdump3(src, length=8, startindex=0): | |
| 69 | - """ | |
| 70 | - Returns a hexadecimal dump of a binary string. | |
| 71 | - length: number of bytes per row. | |
| 72 | - startindex: index of 1st byte. | |
| 73 | - """ | |
| 74 | - result=[] | |
| 75 | - for i in xrange(0, len(src), length): | |
| 76 | - s = src[i:i+length] | |
| 77 | - hexa = ' '.join(["%02X"%ord(x) for x in s]) | |
| 78 | - printable = s.translate(FILTER) | |
| 79 | - result.append("%08X %-*s %s\n" % (i+startindex, length*3, hexa, printable)) | |
| 80 | - return ''.join(result) | |
| 81 | - | |
| 82 | -# end of PSF-licensed code. | |
| 83 | -#------------------------------------------------------------------------------ | |
| 84 | - | |
| 85 | - | |
| 86 | -def hexview_data (data, msg='', title='ezhexviewer', length=16, startindex=0): | |
| 87 | - hex = hexdump3(data, length=length, startindex=startindex) | |
| 88 | - easygui.codebox(msg=msg, title=title, text=hex) | |
| 89 | - | |
| 90 | - | |
| 91 | -def hexview_file (filename, msg='', title='ezhexviewer', length=16, startindex=0): | |
| 92 | - data = open(filename, 'rb').read() | |
| 93 | - hexview_data(data, msg=msg, title=title, length=length, startindex=startindex) | |
| 94 | - | |
| 95 | - | |
| 96 | -if __name__ == '__main__': | |
| 97 | - try: | |
| 98 | - filename = sys.argv[1] | |
| 99 | - except: | |
| 100 | - filename = easygui.fileopenbox() | |
| 101 | - if filename: | |
| 102 | - try: | |
| 103 | - hexview_file(filename, msg='File: %s' % filename) | |
| 104 | - except: | |
| 105 | - easygui.exceptionbox(msg='Error:', title='ezhexviewer') | |
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +ezhexviewer.py | |
| 4 | + | |
| 5 | +A simple hexadecimal viewer based on easygui. It should work on any platform | |
| 6 | +with Python 2.x. | |
| 7 | + | |
| 8 | +Usage: ezhexviewer.py [file] | |
| 9 | + | |
| 10 | +Usage in a python application: | |
| 11 | + | |
| 12 | + import ezhexviewer | |
| 13 | + ezhexviewer.hexview_file(filename) | |
| 14 | + ezhexviewer.hexview_data(data) | |
| 15 | + | |
| 16 | + | |
| 17 | +ezhexviewer project website: http://www.decalage.info/python/ezhexviewer | |
| 18 | + | |
| 19 | +ezhexviewer is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) | |
| 20 | +All rights reserved. | |
| 21 | + | |
| 22 | +Redistribution and use in source and binary forms, with or without modification, | |
| 23 | +are permitted provided that the following conditions are met: | |
| 24 | + | |
| 25 | + * Redistributions of source code must retain the above copyright notice, this | |
| 26 | + list of conditions and the following disclaimer. | |
| 27 | + * Redistributions in binary form must reproduce the above copyright notice, | |
| 28 | + this list of conditions and the following disclaimer in the documentation | |
| 29 | + and/or other materials provided with the distribution. | |
| 30 | + | |
| 31 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 32 | +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 33 | +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 34 | +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 35 | +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 36 | +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 37 | +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 38 | +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 39 | +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 40 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 41 | +""" | |
| 42 | + | |
| 43 | +__version__ = '0.02' | |
| 44 | + | |
| 45 | +#------------------------------------------------------------------------------ | |
| 46 | +# CHANGELOG: | |
| 47 | +# 2012-09-17 v0.01 PL: - first version | |
| 48 | +# 2012-10-04 v0.02 PL: - added license | |
| 49 | + | |
| 50 | +#------------------------------------------------------------------------------ | |
| 51 | +# TODO: | |
| 52 | +# + options to set title and msg | |
| 53 | + | |
| 54 | + | |
| 55 | +from thirdparty.easygui import easygui | |
| 56 | +import sys | |
| 57 | + | |
| 58 | +#------------------------------------------------------------------------------ | |
| 59 | +# The following code (hexdump3 only) is a modified version of the hex dumper | |
| 60 | +# recipe published on ASPN by Sebastien Keim and Raymond Hattinger under the | |
| 61 | +# PSF license. I added the startindex parameter. | |
| 62 | +# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812 | |
| 63 | +# PSF license: http://docs.python.org/license.html | |
| 64 | +# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved | |
| 65 | + | |
| 66 | +FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) | |
| 67 | + | |
| 68 | +def hexdump3(src, length=8, startindex=0): | |
| 69 | + """ | |
| 70 | + Returns a hexadecimal dump of a binary string. | |
| 71 | + length: number of bytes per row. | |
| 72 | + startindex: index of 1st byte. | |
| 73 | + """ | |
| 74 | + result=[] | |
| 75 | + for i in xrange(0, len(src), length): | |
| 76 | + s = src[i:i+length] | |
| 77 | + hexa = ' '.join(["%02X"%ord(x) for x in s]) | |
| 78 | + printable = s.translate(FILTER) | |
| 79 | + result.append("%08X %-*s %s\n" % (i+startindex, length*3, hexa, printable)) | |
| 80 | + return ''.join(result) | |
| 81 | + | |
| 82 | +# end of PSF-licensed code. | |
| 83 | +#------------------------------------------------------------------------------ | |
| 84 | + | |
| 85 | + | |
| 86 | +def hexview_data (data, msg='', title='ezhexviewer', length=16, startindex=0): | |
| 87 | + hex = hexdump3(data, length=length, startindex=startindex) | |
| 88 | + easygui.codebox(msg=msg, title=title, text=hex) | |
| 89 | + | |
| 90 | + | |
| 91 | +def hexview_file (filename, msg='', title='ezhexviewer', length=16, startindex=0): | |
| 92 | + data = open(filename, 'rb').read() | |
| 93 | + hexview_data(data, msg=msg, title=title, length=length, startindex=startindex) | |
| 94 | + | |
| 95 | + | |
| 96 | +if __name__ == '__main__': | |
| 97 | + try: | |
| 98 | + filename = sys.argv[1] | |
| 99 | + except: | |
| 100 | + filename = easygui.fileopenbox() | |
| 101 | + if filename: | |
| 102 | + try: | |
| 103 | + hexview_file(filename, msg='File: %s' % filename) | |
| 104 | + except: | |
| 105 | + easygui.exceptionbox(msg='Error:', title='ezhexviewer') | ... | ... |
oletools/olebrowse.py
| 1 | -#!/usr/bin/env python | |
| 2 | -""" | |
| 3 | -olebrowse.py | |
| 4 | - | |
| 5 | -A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to | |
| 6 | -view and extract individual data streams. | |
| 7 | - | |
| 8 | -Usage: olebrowse.py [file] | |
| 9 | - | |
| 10 | -olebrowse project website: http://www.decalage.info/python/olebrowse | |
| 11 | - | |
| 12 | -olebrowse is part of the python-oletools package: | |
| 13 | -http://www.decalage.info/python/oletools | |
| 14 | - | |
| 15 | -olebrowse is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info) | |
| 16 | -All rights reserved. | |
| 17 | - | |
| 18 | -Redistribution and use in source and binary forms, with or without modification, | |
| 19 | -are permitted provided that the following conditions are met: | |
| 20 | - | |
| 21 | - * Redistributions of source code must retain the above copyright notice, this | |
| 22 | - list of conditions and the following disclaimer. | |
| 23 | - * Redistributions in binary form must reproduce the above copyright notice, | |
| 24 | - this list of conditions and the following disclaimer in the documentation | |
| 25 | - and/or other materials provided with the distribution. | |
| 26 | - | |
| 27 | -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 28 | -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 29 | -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 30 | -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 31 | -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 32 | -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 33 | -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 34 | -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 35 | -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 36 | -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 37 | -""" | |
| 38 | - | |
| 39 | -__version__ = '0.02' | |
| 40 | - | |
| 41 | -#------------------------------------------------------------------------------ | |
| 42 | -# CHANGELOG: | |
| 43 | -# 2012-09-17 v0.01 PL: - first version | |
| 44 | -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL | |
| 45 | - | |
| 46 | -#------------------------------------------------------------------------------ | |
| 47 | -# TODO: | |
| 48 | -# - menu option to open another file | |
| 49 | -# - menu option to display properties | |
| 50 | -# - menu option to run other oletools, external tools such as OfficeCat? | |
| 51 | -# - for a stream, display info: size, path, etc | |
| 52 | -# - stream info: magic, entropy, ... ? | |
| 53 | - | |
| 54 | -import optparse, sys, os | |
| 55 | -from thirdparty.easygui import easygui | |
| 56 | -import thirdparty.olefile as olefile | |
| 57 | -import ezhexviewer | |
| 58 | - | |
| 59 | -ABOUT = '~ About olebrowse' | |
| 60 | -QUIT = '~ Quit' | |
| 61 | - | |
| 62 | - | |
| 63 | -def about (): | |
| 64 | - """ | |
| 65 | - Display information about this tool | |
| 66 | - """ | |
| 67 | - easygui.textbox(title='About olebrowse', text=__doc__) | |
| 68 | - | |
| 69 | - | |
| 70 | -def browse_stream (ole, stream): | |
| 71 | - """ | |
| 72 | - Browse a stream (hex view or save to file) | |
| 73 | - """ | |
| 74 | - #print 'stream:', stream | |
| 75 | - while True: | |
| 76 | - msg ='Select an action for the stream "%s", or press Esc to exit' % repr(stream) | |
| 77 | - actions = [ | |
| 78 | - 'Hex view', | |
| 79 | -## 'Text view', | |
| 80 | -## 'Repr view', | |
| 81 | - 'Save stream to file', | |
| 82 | - '~ Back to main menu', | |
| 83 | - ] | |
| 84 | - action = easygui.choicebox(msg, title='olebrowse', choices=actions) | |
| 85 | - if action is None or 'Back' in action: | |
| 86 | - break | |
| 87 | - elif action.startswith('Hex'): | |
| 88 | - data = ole.openstream(stream).getvalue() | |
| 89 | - ezhexviewer.hexview_data(data, msg='Stream: %s' % stream, title='olebrowse') | |
| 90 | -## elif action.startswith('Text'): | |
| 91 | -## data = ole.openstream(stream).getvalue() | |
| 92 | -## easygui.codebox(title='Text view - %s' % stream, text=data) | |
| 93 | -## elif action.startswith('Repr'): | |
| 94 | -## data = ole.openstream(stream).getvalue() | |
| 95 | -## easygui.codebox(title='Repr view - %s' % stream, text=repr(data)) | |
| 96 | - elif action.startswith('Save'): | |
| 97 | - data = ole.openstream(stream).getvalue() | |
| 98 | - fname = easygui.filesavebox(default='stream.bin') | |
| 99 | - if fname is not None: | |
| 100 | - f = open(fname, 'wb') | |
| 101 | - f.write(data) | |
| 102 | - f.close() | |
| 103 | - easygui.msgbox('stream saved to file %s' % fname) | |
| 104 | - | |
| 105 | - | |
| 106 | - | |
| 107 | -def main(): | |
| 108 | - """ | |
| 109 | - Main function | |
| 110 | - """ | |
| 111 | - try: | |
| 112 | - filename = sys.argv[1] | |
| 113 | - except: | |
| 114 | - filename = easygui.fileopenbox() | |
| 115 | - try: | |
| 116 | - ole = olefile.OleFileIO(filename) | |
| 117 | - listdir = ole.listdir() | |
| 118 | - streams = [] | |
| 119 | - for direntry in listdir: | |
| 120 | - #print direntry | |
| 121 | - streams.append('/'.join(direntry)) | |
| 122 | - streams.append(ABOUT) | |
| 123 | - streams.append(QUIT) | |
| 124 | - stream = True | |
| 125 | - while stream is not None: | |
| 126 | - msg ="Select a stream, or press Esc to exit" | |
| 127 | - title = "olebrowse" | |
| 128 | - stream = easygui.choicebox(msg, title, streams) | |
| 129 | - if stream is None or stream == QUIT: | |
| 130 | - break | |
| 131 | - if stream == ABOUT: | |
| 132 | - about() | |
| 133 | - else: | |
| 134 | - browse_stream(ole, stream) | |
| 135 | - except: | |
| 136 | - easygui.exceptionbox() | |
| 137 | - | |
| 138 | - | |
| 139 | - | |
| 140 | - | |
| 141 | -if __name__ == '__main__': | |
| 142 | - main() | |
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +olebrowse.py | |
| 4 | + | |
| 5 | +A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to | |
| 6 | +view and extract individual data streams. | |
| 7 | + | |
| 8 | +Usage: olebrowse.py [file] | |
| 9 | + | |
| 10 | +olebrowse project website: http://www.decalage.info/python/olebrowse | |
| 11 | + | |
| 12 | +olebrowse is part of the python-oletools package: | |
| 13 | +http://www.decalage.info/python/oletools | |
| 14 | + | |
| 15 | +olebrowse is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) | |
| 16 | +All rights reserved. | |
| 17 | + | |
| 18 | +Redistribution and use in source and binary forms, with or without modification, | |
| 19 | +are permitted provided that the following conditions are met: | |
| 20 | + | |
| 21 | + * Redistributions of source code must retain the above copyright notice, this | |
| 22 | + list of conditions and the following disclaimer. | |
| 23 | + * Redistributions in binary form must reproduce the above copyright notice, | |
| 24 | + this list of conditions and the following disclaimer in the documentation | |
| 25 | + and/or other materials provided with the distribution. | |
| 26 | + | |
| 27 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 28 | +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 29 | +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 30 | +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 31 | +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 32 | +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 33 | +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 34 | +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 35 | +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 36 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 37 | +""" | |
| 38 | + | |
| 39 | +__version__ = '0.02' | |
| 40 | + | |
| 41 | +#------------------------------------------------------------------------------ | |
| 42 | +# CHANGELOG: | |
| 43 | +# 2012-09-17 v0.01 PL: - first version | |
| 44 | +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL | |
| 45 | + | |
| 46 | +#------------------------------------------------------------------------------ | |
| 47 | +# TODO: | |
| 48 | +# - menu option to open another file | |
| 49 | +# - menu option to display properties | |
| 50 | +# - menu option to run other oletools, external tools such as OfficeCat? | |
| 51 | +# - for a stream, display info: size, path, etc | |
| 52 | +# - stream info: magic, entropy, ... ? | |
| 53 | + | |
| 54 | +import optparse, sys, os | |
| 55 | +from thirdparty.easygui import easygui | |
| 56 | +import thirdparty.olefile as olefile | |
| 57 | +import ezhexviewer | |
| 58 | + | |
| 59 | +ABOUT = '~ About olebrowse' | |
| 60 | +QUIT = '~ Quit' | |
| 61 | + | |
| 62 | + | |
| 63 | +def about (): | |
| 64 | + """ | |
| 65 | + Display information about this tool | |
| 66 | + """ | |
| 67 | + easygui.textbox(title='About olebrowse', text=__doc__) | |
| 68 | + | |
| 69 | + | |
| 70 | +def browse_stream (ole, stream): | |
| 71 | + """ | |
| 72 | + Browse a stream (hex view or save to file) | |
| 73 | + """ | |
| 74 | + #print 'stream:', stream | |
| 75 | + while True: | |
| 76 | + msg ='Select an action for the stream "%s", or press Esc to exit' % repr(stream) | |
| 77 | + actions = [ | |
| 78 | + 'Hex view', | |
| 79 | +## 'Text view', | |
| 80 | +## 'Repr view', | |
| 81 | + 'Save stream to file', | |
| 82 | + '~ Back to main menu', | |
| 83 | + ] | |
| 84 | + action = easygui.choicebox(msg, title='olebrowse', choices=actions) | |
| 85 | + if action is None or 'Back' in action: | |
| 86 | + break | |
| 87 | + elif action.startswith('Hex'): | |
| 88 | + data = ole.openstream(stream).getvalue() | |
| 89 | + ezhexviewer.hexview_data(data, msg='Stream: %s' % stream, title='olebrowse') | |
| 90 | +## elif action.startswith('Text'): | |
| 91 | +## data = ole.openstream(stream).getvalue() | |
| 92 | +## easygui.codebox(title='Text view - %s' % stream, text=data) | |
| 93 | +## elif action.startswith('Repr'): | |
| 94 | +## data = ole.openstream(stream).getvalue() | |
| 95 | +## easygui.codebox(title='Repr view - %s' % stream, text=repr(data)) | |
| 96 | + elif action.startswith('Save'): | |
| 97 | + data = ole.openstream(stream).getvalue() | |
| 98 | + fname = easygui.filesavebox(default='stream.bin') | |
| 99 | + if fname is not None: | |
| 100 | + f = open(fname, 'wb') | |
| 101 | + f.write(data) | |
| 102 | + f.close() | |
| 103 | + easygui.msgbox('stream saved to file %s' % fname) | |
| 104 | + | |
| 105 | + | |
| 106 | + | |
| 107 | +def main(): | |
| 108 | + """ | |
| 109 | + Main function | |
| 110 | + """ | |
| 111 | + try: | |
| 112 | + filename = sys.argv[1] | |
| 113 | + except: | |
| 114 | + filename = easygui.fileopenbox() | |
| 115 | + try: | |
| 116 | + ole = olefile.OleFileIO(filename) | |
| 117 | + listdir = ole.listdir() | |
| 118 | + streams = [] | |
| 119 | + for direntry in listdir: | |
| 120 | + #print direntry | |
| 121 | + streams.append('/'.join(direntry)) | |
| 122 | + streams.append(ABOUT) | |
| 123 | + streams.append(QUIT) | |
| 124 | + stream = True | |
| 125 | + while stream is not None: | |
| 126 | + msg ="Select a stream, or press Esc to exit" | |
| 127 | + title = "olebrowse" | |
| 128 | + stream = easygui.choicebox(msg, title, streams) | |
| 129 | + if stream is None or stream == QUIT: | |
| 130 | + break | |
| 131 | + if stream == ABOUT: | |
| 132 | + about() | |
| 133 | + else: | |
| 134 | + browse_stream(ole, stream) | |
| 135 | + except: | |
| 136 | + easygui.exceptionbox() | |
| 137 | + | |
| 138 | + | |
| 139 | + | |
| 140 | + | |
| 141 | +if __name__ == '__main__': | |
| 142 | + main() | ... | ... |
oletools/oleid.py
| 1 | -#!/usr/bin/env python | |
| 2 | -""" | |
| 3 | -oleid.py | |
| 4 | - | |
| 5 | -oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, | |
| 6 | -Excel), to detect specific characteristics that could potentially indicate that | |
| 7 | -the file is suspicious or malicious, in terms of security (e.g. malware). | |
| 8 | -For example it can detect VBA macros, embedded Flash objects, fragmentation. | |
| 9 | -The results can be displayed or returned as XML for further processing. | |
| 10 | - | |
| 11 | -Usage: oleid.py <file> | |
| 12 | - | |
| 13 | -oleid project website: http://www.decalage.info/python/oleid | |
| 14 | - | |
| 15 | -oleid is part of the python-oletools package: | |
| 16 | -http://www.decalage.info/python/oletools | |
| 17 | -""" | |
| 18 | - | |
| 19 | -#=== LICENSE ================================================================= | |
| 20 | - | |
| 21 | -# oleid is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info) | |
| 22 | -# All rights reserved. | |
| 23 | -# | |
| 24 | -# Redistribution and use in source and binary forms, with or without modification, | |
| 25 | -# are permitted provided that the following conditions are met: | |
| 26 | -# | |
| 27 | -# * Redistributions of source code must retain the above copyright notice, this | |
| 28 | -# list of conditions and the following disclaimer. | |
| 29 | -# * Redistributions in binary form must reproduce the above copyright notice, | |
| 30 | -# this list of conditions and the following disclaimer in the documentation | |
| 31 | -# and/or other materials provided with the distribution. | |
| 32 | -# | |
| 33 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 34 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 35 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 36 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 37 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 38 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 39 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 40 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 41 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 42 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 43 | - | |
| 44 | - | |
| 45 | -#------------------------------------------------------------------------------ | |
| 46 | -# CHANGELOG: | |
| 47 | -# 2012-10-29 v0.01 PL: - first version | |
| 48 | -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL | |
| 49 | -# - improved usage display with -h | |
| 50 | -# 2014-11-30 v0.03 PL: - improved output with prettytable | |
| 51 | - | |
| 52 | -__version__ = '0.03' | |
| 53 | - | |
| 54 | - | |
| 55 | -#------------------------------------------------------------------------------ | |
| 56 | -# TODO: | |
| 57 | -# + extract relevant metadata: codepage, author, application, timestamps, etc | |
| 58 | -# - detect RTF and OpenXML | |
| 59 | -# - fragmentation | |
| 60 | -# - OLE package | |
| 61 | -# - entropy | |
| 62 | -# - detect PE header? | |
| 63 | -# - detect NOPs? | |
| 64 | -# - list type of each object in object pool? | |
| 65 | -# - criticality for each indicator?: info, low, medium, high | |
| 66 | -# - support wildcards with glob? | |
| 67 | -# - verbose option | |
| 68 | -# - csv, xml output | |
| 69 | - | |
| 70 | - | |
| 71 | -#=== IMPORTS ================================================================= | |
| 72 | - | |
| 73 | -import optparse, sys, os, re, zlib, struct | |
| 74 | -import thirdparty.olefile as olefile | |
| 75 | -from thirdparty.prettytable import prettytable | |
| 76 | - | |
| 77 | - | |
| 78 | -#=== FUNCTIONS =============================================================== | |
| 79 | - | |
| 80 | -def detect_flash (data): | |
| 81 | - """ | |
| 82 | - Detect Flash objects (SWF files) within a binary string of data | |
| 83 | - return a list of (start_index, length, compressed) tuples, or [] if nothing | |
| 84 | - found. | |
| 85 | - | |
| 86 | - Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked) | |
| 87 | - http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html | |
| 88 | - """ | |
| 89 | - #TODO: report | |
| 90 | - found = [] | |
| 91 | - for match in re.finditer('CWS|FWS', data): | |
| 92 | - start = match.start() | |
| 93 | - if start+8 > len(data): | |
| 94 | - # header size larger than remaining data, this is not a SWF | |
| 95 | - continue | |
| 96 | - #TODO: one struct.unpack should be simpler | |
| 97 | - # Read Header | |
| 98 | - header = data[start:start+3] | |
| 99 | - # Read Version | |
| 100 | - ver = struct.unpack('<b', data[start+3])[0] | |
| 101 | - # Error check for version above 20 | |
| 102 | - #TODO: is this accurate? (check SWF specifications) | |
| 103 | - if ver > 20: | |
| 104 | - continue | |
| 105 | - # Read SWF Size | |
| 106 | - size = struct.unpack('<i', data[start+4:start+8])[0] | |
| 107 | - if start+size > len(data) or size < 1024: | |
| 108 | - # declared size larger than remaining data, this is not a SWF | |
| 109 | - # or declared size too small for a usual SWF | |
| 110 | - continue | |
| 111 | - # Read SWF into buffer. If compressed read uncompressed size. | |
| 112 | - swf = data[start:start+size] | |
| 113 | - compressed = False | |
| 114 | - if 'CWS' in header: | |
| 115 | - compressed = True | |
| 116 | - # compressed SWF: data after header (8 bytes) until the end is | |
| 117 | - # compressed with zlib. Attempt to decompress it to check if it is | |
| 118 | - # valid | |
| 119 | - compressed_data = swf[8:] | |
| 120 | - try: | |
| 121 | - zlib.decompress(compressed_data) | |
| 122 | - except: | |
| 123 | - continue | |
| 124 | - # else we don't check anything at this stage, we only assume it is a | |
| 125 | - # valid SWF. So there might be false positives for uncompressed SWF. | |
| 126 | - found.append((start, size, compressed)) | |
| 127 | - #print 'Found SWF start=%x, length=%d' % (start, size) | |
| 128 | - return found | |
| 129 | - | |
| 130 | - | |
| 131 | -#=== CLASSES ================================================================= | |
| 132 | - | |
| 133 | -class Indicator (object): | |
| 134 | - | |
| 135 | - def __init__(self, _id, value=None, _type=bool, name=None, description=None): | |
| 136 | - self.id = _id | |
| 137 | - self.value = value | |
| 138 | - self.type = _type | |
| 139 | - self.name = name | |
| 140 | - if name == None: | |
| 141 | - self.name = _id | |
| 142 | - self.description = description | |
| 143 | - | |
| 144 | - | |
| 145 | -class OleID: | |
| 146 | - | |
| 147 | - def __init__(self, filename): | |
| 148 | - self.filename = filename | |
| 149 | - self.indicators = [] | |
| 150 | - | |
| 151 | - def check(self): | |
| 152 | - # check if it is actually an OLE file: | |
| 153 | - oleformat = Indicator('ole_format', True, name='OLE format') | |
| 154 | - self.indicators.append(oleformat) | |
| 155 | - if not olefile.isOleFile(self.filename): | |
| 156 | - oleformat.value = False | |
| 157 | - return self.indicators | |
| 158 | - # parse file: | |
| 159 | - self.ole = olefile.OleFileIO(self.filename) | |
| 160 | - # checks: | |
| 161 | - self.check_properties() | |
| 162 | - self.check_encrypted() | |
| 163 | - self.check_word() | |
| 164 | - self.check_excel() | |
| 165 | - self.check_powerpoint() | |
| 166 | - self.check_visio() | |
| 167 | - self.check_ObjectPool() | |
| 168 | - self.check_flash() | |
| 169 | - self.ole.close() | |
| 170 | - return self.indicators | |
| 171 | - | |
| 172 | - def check_properties (self): | |
| 173 | - suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream') | |
| 174 | - self.indicators.append(suminfo) | |
| 175 | - appname = Indicator('appname', 'unknown', _type=str, name='Application name') | |
| 176 | - self.indicators.append(appname) | |
| 177 | - self.suminfo = {} | |
| 178 | - # check stream SummaryInformation | |
| 179 | - if self.ole.exists("\x05SummaryInformation"): | |
| 180 | - suminfo.value = True | |
| 181 | - self.suminfo = self.ole.getproperties("\x05SummaryInformation") | |
| 182 | - # check application name: | |
| 183 | - appname.value = self.suminfo.get(0x12, 'unknown') | |
| 184 | - | |
| 185 | - def check_encrypted (self): | |
| 186 | - # we keep the pointer to the indicator, can be modified by other checks: | |
| 187 | - self.encrypted = Indicator('encrypted', False, name='Encrypted') | |
| 188 | - self.indicators.append(self.encrypted) | |
| 189 | - # check if bit 1 of security field = 1: | |
| 190 | - # (this field may be missing for Powerpoint2000, for example) | |
| 191 | - if 0x13 in self.suminfo: | |
| 192 | - if self.suminfo[0x13] & 1: | |
| 193 | - self.encrypted.value = True | |
| 194 | - | |
| 195 | - def check_word (self): | |
| 196 | - word = Indicator('word', False, name='Word Document', | |
| 197 | - description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.') | |
| 198 | - self.indicators.append(word) | |
| 199 | - self.macros = Indicator('vba_macros', False, name='VBA Macros') | |
| 200 | - self.indicators.append(self.macros) | |
| 201 | - if self.ole.exists('WordDocument'): | |
| 202 | - word.value = True | |
| 203 | - # check for Word-specific encryption flag: | |
| 204 | - s = self.ole.openstream(["WordDocument"]) | |
| 205 | - # pass header 10 bytes | |
| 206 | - s.read(10) | |
| 207 | - # read flag structure: | |
| 208 | - temp16 = struct.unpack("H", s.read(2))[0] | |
| 209 | - fEncrypted = (temp16 & 0x0100) >> 8 | |
| 210 | - if fEncrypted: | |
| 211 | - self.encrypted.value = True | |
| 212 | - s.close() | |
| 213 | - # check for VBA macros: | |
| 214 | - if self.ole.exists('Macros'): | |
| 215 | - self.macros.value = True | |
| 216 | - | |
| 217 | - def check_excel (self): | |
| 218 | - excel = Indicator('excel', False, name='Excel Workbook', | |
| 219 | - description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.') | |
| 220 | - self.indicators.append(excel) | |
| 221 | - #self.macros = Indicator('vba_macros', False, name='VBA Macros') | |
| 222 | - #self.indicators.append(self.macros) | |
| 223 | - if self.ole.exists('Workbook') or self.ole.exists('Book'): | |
| 224 | - excel.value = True | |
| 225 | - # check for VBA macros: | |
| 226 | - if self.ole.exists('_VBA_PROJECT_CUR'): | |
| 227 | - self.macros.value = True | |
| 228 | - | |
| 229 | - def check_powerpoint (self): | |
| 230 | - ppt = Indicator('ppt', False, name='PowerPoint Presentation', | |
| 231 | - description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.') | |
| 232 | - self.indicators.append(ppt) | |
| 233 | - if self.ole.exists('PowerPoint Document'): | |
| 234 | - ppt.value = True | |
| 235 | - | |
| 236 | - def check_visio (self): | |
| 237 | - visio = Indicator('visio', False, name='Visio Drawing', | |
| 238 | - description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.') | |
| 239 | - self.indicators.append(visio) | |
| 240 | - if self.ole.exists('VisioDocument'): | |
| 241 | - visio.value = True | |
| 242 | - | |
| 243 | - def check_ObjectPool (self): | |
| 244 | - objpool = Indicator('ObjectPool', False, name='ObjectPool', | |
| 245 | - description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.') | |
| 246 | - self.indicators.append(objpool) | |
| 247 | - if self.ole.exists('ObjectPool'): | |
| 248 | - objpool.value = True | |
| 249 | - | |
| 250 | - | |
| 251 | - def check_flash (self): | |
| 252 | - flash = Indicator('flash', 0, _type=int, name='Flash objects', | |
| 253 | - description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.') | |
| 254 | - self.indicators.append(flash) | |
| 255 | - for stream in self.ole.listdir(): | |
| 256 | - data = self.ole.openstream(stream).read() | |
| 257 | - found = detect_flash(data) | |
| 258 | - # just add to the count of Flash objects: | |
| 259 | - flash.value += len(found) | |
| 260 | - #print stream, found | |
| 261 | - | |
| 262 | - | |
| 263 | -#=== MAIN ================================================================= | |
| 264 | - | |
| 265 | -def main(): | |
| 266 | - usage = 'usage: %prog [options] <file>' | |
| 267 | - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) | |
| 268 | -## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') | |
| 269 | - | |
| 270 | - (options, args) = parser.parse_args() | |
| 271 | - | |
| 272 | - # Print help if no argurments are passed | |
| 273 | - if len(args) == 0: | |
| 274 | - parser.print_help() | |
| 275 | - return | |
| 276 | - | |
| 277 | - for filename in args: | |
| 278 | - print '\nFilename:', filename | |
| 279 | - oleid = OleID(filename) | |
| 280 | - indicators = oleid.check() | |
| 281 | - | |
| 282 | - #TODO: add description | |
| 283 | - #TODO: highlight suspicious indicators | |
| 284 | - t = prettytable.PrettyTable(['Indicator', 'Value']) | |
| 285 | - t.align = 'l' | |
| 286 | - t.max_width = 39 | |
| 287 | - #t.border = False | |
| 288 | - | |
| 289 | - for indicator in indicators: | |
| 290 | - #print '%s: %s' % (indicator.name, indicator.value) | |
| 291 | - t.add_row((indicator.name, indicator.value)) | |
| 292 | - | |
| 293 | - print t | |
| 294 | - | |
| 295 | -if __name__ == '__main__': | |
| 296 | - main() | |
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +oleid.py | |
| 4 | + | |
| 5 | +oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, | |
| 6 | +Excel), to detect specific characteristics that could potentially indicate that | |
| 7 | +the file is suspicious or malicious, in terms of security (e.g. malware). | |
| 8 | +For example it can detect VBA macros, embedded Flash objects, fragmentation. | |
| 9 | +The results can be displayed or returned as XML for further processing. | |
| 10 | + | |
| 11 | +Usage: oleid.py <file> | |
| 12 | + | |
| 13 | +oleid project website: http://www.decalage.info/python/oleid | |
| 14 | + | |
| 15 | +oleid is part of the python-oletools package: | |
| 16 | +http://www.decalage.info/python/oletools | |
| 17 | +""" | |
| 18 | + | |
| 19 | +#=== LICENSE ================================================================= | |
| 20 | + | |
| 21 | +# oleid is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) | |
| 22 | +# All rights reserved. | |
| 23 | +# | |
| 24 | +# Redistribution and use in source and binary forms, with or without modification, | |
| 25 | +# are permitted provided that the following conditions are met: | |
| 26 | +# | |
| 27 | +# * Redistributions of source code must retain the above copyright notice, this | |
| 28 | +# list of conditions and the following disclaimer. | |
| 29 | +# * Redistributions in binary form must reproduce the above copyright notice, | |
| 30 | +# this list of conditions and the following disclaimer in the documentation | |
| 31 | +# and/or other materials provided with the distribution. | |
| 32 | +# | |
| 33 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 34 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 35 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 36 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 37 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 38 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 39 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 40 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 41 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 42 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 43 | + | |
| 44 | + | |
| 45 | +#------------------------------------------------------------------------------ | |
| 46 | +# CHANGELOG: | |
| 47 | +# 2012-10-29 v0.01 PL: - first version | |
| 48 | +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL | |
| 49 | +# - improved usage display with -h | |
| 50 | +# 2014-11-30 v0.03 PL: - improved output with prettytable | |
| 51 | + | |
| 52 | +__version__ = '0.03' | |
| 53 | + | |
| 54 | + | |
| 55 | +#------------------------------------------------------------------------------ | |
| 56 | +# TODO: | |
| 57 | +# + extract relevant metadata: codepage, author, application, timestamps, etc | |
| 58 | +# - detect RTF and OpenXML | |
| 59 | +# - fragmentation | |
| 60 | +# - OLE package | |
| 61 | +# - entropy | |
| 62 | +# - detect PE header? | |
| 63 | +# - detect NOPs? | |
| 64 | +# - list type of each object in object pool? | |
| 65 | +# - criticality for each indicator?: info, low, medium, high | |
| 66 | +# - support wildcards with glob? | |
| 67 | +# - verbose option | |
| 68 | +# - csv, xml output | |
| 69 | + | |
| 70 | + | |
| 71 | +#=== IMPORTS ================================================================= | |
| 72 | + | |
| 73 | +import optparse, sys, os, re, zlib, struct | |
| 74 | +import thirdparty.olefile as olefile | |
| 75 | +from thirdparty.prettytable import prettytable | |
| 76 | + | |
| 77 | + | |
| 78 | +#=== FUNCTIONS =============================================================== | |
| 79 | + | |
| 80 | +def detect_flash (data): | |
| 81 | + """ | |
| 82 | + Detect Flash objects (SWF files) within a binary string of data | |
| 83 | + return a list of (start_index, length, compressed) tuples, or [] if nothing | |
| 84 | + found. | |
| 85 | + | |
| 86 | + Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked) | |
| 87 | + http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html | |
| 88 | + """ | |
| 89 | + #TODO: report | |
| 90 | + found = [] | |
| 91 | + for match in re.finditer('CWS|FWS', data): | |
| 92 | + start = match.start() | |
| 93 | + if start+8 > len(data): | |
| 94 | + # header size larger than remaining data, this is not a SWF | |
| 95 | + continue | |
| 96 | + #TODO: one struct.unpack should be simpler | |
| 97 | + # Read Header | |
| 98 | + header = data[start:start+3] | |
| 99 | + # Read Version | |
| 100 | + ver = struct.unpack('<b', data[start+3])[0] | |
| 101 | + # Error check for version above 20 | |
| 102 | + #TODO: is this accurate? (check SWF specifications) | |
| 103 | + if ver > 20: | |
| 104 | + continue | |
| 105 | + # Read SWF Size | |
| 106 | + size = struct.unpack('<i', data[start+4:start+8])[0] | |
| 107 | + if start+size > len(data) or size < 1024: | |
| 108 | + # declared size larger than remaining data, this is not a SWF | |
| 109 | + # or declared size too small for a usual SWF | |
| 110 | + continue | |
| 111 | + # Read SWF into buffer. If compressed read uncompressed size. | |
| 112 | + swf = data[start:start+size] | |
| 113 | + compressed = False | |
| 114 | + if 'CWS' in header: | |
| 115 | + compressed = True | |
| 116 | + # compressed SWF: data after header (8 bytes) until the end is | |
| 117 | + # compressed with zlib. Attempt to decompress it to check if it is | |
| 118 | + # valid | |
| 119 | + compressed_data = swf[8:] | |
| 120 | + try: | |
| 121 | + zlib.decompress(compressed_data) | |
| 122 | + except: | |
| 123 | + continue | |
| 124 | + # else we don't check anything at this stage, we only assume it is a | |
| 125 | + # valid SWF. So there might be false positives for uncompressed SWF. | |
| 126 | + found.append((start, size, compressed)) | |
| 127 | + #print 'Found SWF start=%x, length=%d' % (start, size) | |
| 128 | + return found | |
| 129 | + | |
| 130 | + | |
| 131 | +#=== CLASSES ================================================================= | |
| 132 | + | |
| 133 | +class Indicator (object): | |
| 134 | + | |
| 135 | + def __init__(self, _id, value=None, _type=bool, name=None, description=None): | |
| 136 | + self.id = _id | |
| 137 | + self.value = value | |
| 138 | + self.type = _type | |
| 139 | + self.name = name | |
| 140 | + if name == None: | |
| 141 | + self.name = _id | |
| 142 | + self.description = description | |
| 143 | + | |
| 144 | + | |
| 145 | +class OleID: | |
| 146 | + | |
| 147 | + def __init__(self, filename): | |
| 148 | + self.filename = filename | |
| 149 | + self.indicators = [] | |
| 150 | + | |
| 151 | + def check(self): | |
| 152 | + # check if it is actually an OLE file: | |
| 153 | + oleformat = Indicator('ole_format', True, name='OLE format') | |
| 154 | + self.indicators.append(oleformat) | |
| 155 | + if not olefile.isOleFile(self.filename): | |
| 156 | + oleformat.value = False | |
| 157 | + return self.indicators | |
| 158 | + # parse file: | |
| 159 | + self.ole = olefile.OleFileIO(self.filename) | |
| 160 | + # checks: | |
| 161 | + self.check_properties() | |
| 162 | + self.check_encrypted() | |
| 163 | + self.check_word() | |
| 164 | + self.check_excel() | |
| 165 | + self.check_powerpoint() | |
| 166 | + self.check_visio() | |
| 167 | + self.check_ObjectPool() | |
| 168 | + self.check_flash() | |
| 169 | + self.ole.close() | |
| 170 | + return self.indicators | |
| 171 | + | |
| 172 | + def check_properties (self): | |
| 173 | + suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream') | |
| 174 | + self.indicators.append(suminfo) | |
| 175 | + appname = Indicator('appname', 'unknown', _type=str, name='Application name') | |
| 176 | + self.indicators.append(appname) | |
| 177 | + self.suminfo = {} | |
| 178 | + # check stream SummaryInformation | |
| 179 | + if self.ole.exists("\x05SummaryInformation"): | |
| 180 | + suminfo.value = True | |
| 181 | + self.suminfo = self.ole.getproperties("\x05SummaryInformation") | |
| 182 | + # check application name: | |
| 183 | + appname.value = self.suminfo.get(0x12, 'unknown') | |
| 184 | + | |
| 185 | + def check_encrypted (self): | |
| 186 | + # we keep the pointer to the indicator, can be modified by other checks: | |
| 187 | + self.encrypted = Indicator('encrypted', False, name='Encrypted') | |
| 188 | + self.indicators.append(self.encrypted) | |
| 189 | + # check if bit 1 of security field = 1: | |
| 190 | + # (this field may be missing for Powerpoint2000, for example) | |
| 191 | + if 0x13 in self.suminfo: | |
| 192 | + if self.suminfo[0x13] & 1: | |
| 193 | + self.encrypted.value = True | |
| 194 | + | |
| 195 | + def check_word (self): | |
| 196 | + word = Indicator('word', False, name='Word Document', | |
| 197 | + description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.') | |
| 198 | + self.indicators.append(word) | |
| 199 | + self.macros = Indicator('vba_macros', False, name='VBA Macros') | |
| 200 | + self.indicators.append(self.macros) | |
| 201 | + if self.ole.exists('WordDocument'): | |
| 202 | + word.value = True | |
| 203 | + # check for Word-specific encryption flag: | |
| 204 | + s = self.ole.openstream(["WordDocument"]) | |
| 205 | + # pass header 10 bytes | |
| 206 | + s.read(10) | |
| 207 | + # read flag structure: | |
| 208 | + temp16 = struct.unpack("H", s.read(2))[0] | |
| 209 | + fEncrypted = (temp16 & 0x0100) >> 8 | |
| 210 | + if fEncrypted: | |
| 211 | + self.encrypted.value = True | |
| 212 | + s.close() | |
| 213 | + # check for VBA macros: | |
| 214 | + if self.ole.exists('Macros'): | |
| 215 | + self.macros.value = True | |
| 216 | + | |
| 217 | + def check_excel (self): | |
| 218 | + excel = Indicator('excel', False, name='Excel Workbook', | |
| 219 | + description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.') | |
| 220 | + self.indicators.append(excel) | |
| 221 | + #self.macros = Indicator('vba_macros', False, name='VBA Macros') | |
| 222 | + #self.indicators.append(self.macros) | |
| 223 | + if self.ole.exists('Workbook') or self.ole.exists('Book'): | |
| 224 | + excel.value = True | |
| 225 | + # check for VBA macros: | |
| 226 | + if self.ole.exists('_VBA_PROJECT_CUR'): | |
| 227 | + self.macros.value = True | |
| 228 | + | |
| 229 | + def check_powerpoint (self): | |
| 230 | + ppt = Indicator('ppt', False, name='PowerPoint Presentation', | |
| 231 | + description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.') | |
| 232 | + self.indicators.append(ppt) | |
| 233 | + if self.ole.exists('PowerPoint Document'): | |
| 234 | + ppt.value = True | |
| 235 | + | |
| 236 | + def check_visio (self): | |
| 237 | + visio = Indicator('visio', False, name='Visio Drawing', | |
| 238 | + description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.') | |
| 239 | + self.indicators.append(visio) | |
| 240 | + if self.ole.exists('VisioDocument'): | |
| 241 | + visio.value = True | |
| 242 | + | |
| 243 | + def check_ObjectPool (self): | |
| 244 | + objpool = Indicator('ObjectPool', False, name='ObjectPool', | |
| 245 | + description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.') | |
| 246 | + self.indicators.append(objpool) | |
| 247 | + if self.ole.exists('ObjectPool'): | |
| 248 | + objpool.value = True | |
| 249 | + | |
| 250 | + | |
| 251 | + def check_flash (self): | |
| 252 | + flash = Indicator('flash', 0, _type=int, name='Flash objects', | |
| 253 | + description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.') | |
| 254 | + self.indicators.append(flash) | |
| 255 | + for stream in self.ole.listdir(): | |
| 256 | + data = self.ole.openstream(stream).read() | |
| 257 | + found = detect_flash(data) | |
| 258 | + # just add to the count of Flash objects: | |
| 259 | + flash.value += len(found) | |
| 260 | + #print stream, found | |
| 261 | + | |
| 262 | + | |
| 263 | +#=== MAIN ================================================================= | |
| 264 | + | |
| 265 | +def main(): | |
| 266 | + usage = 'usage: %prog [options] <file>' | |
| 267 | + parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) | |
| 268 | +## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') | |
| 269 | + | |
| 270 | + (options, args) = parser.parse_args() | |
| 271 | + | |
| 272 | + # Print help if no argurments are passed | |
| 273 | + if len(args) == 0: | |
| 274 | + parser.print_help() | |
| 275 | + return | |
| 276 | + | |
| 277 | + for filename in args: | |
| 278 | + print '\nFilename:', filename | |
| 279 | + oleid = OleID(filename) | |
| 280 | + indicators = oleid.check() | |
| 281 | + | |
| 282 | + #TODO: add description | |
| 283 | + #TODO: highlight suspicious indicators | |
| 284 | + t = prettytable.PrettyTable(['Indicator', 'Value']) | |
| 285 | + t.align = 'l' | |
| 286 | + t.max_width = 39 | |
| 287 | + #t.border = False | |
| 288 | + | |
| 289 | + for indicator in indicators: | |
| 290 | + #print '%s: %s' % (indicator.name, indicator.value) | |
| 291 | + t.add_row((indicator.name, indicator.value)) | |
| 292 | + | |
| 293 | + print t | |
| 294 | + | |
| 295 | +if __name__ == '__main__': | |
| 296 | + main() | ... | ... |
oletools/olemeta.py
| 1 | -#!/usr/bin/env python | |
| 2 | -""" | |
| 3 | -olemeta.py | |
| 4 | - | |
| 5 | -olemeta is a script to parse OLE files such as MS Office documents (e.g. Word, | |
| 6 | -Excel), to extract all standard properties present in the OLE file. | |
| 7 | - | |
| 8 | -Usage: olemeta.py <file> | |
| 9 | - | |
| 10 | -olemeta project website: http://www.decalage.info/python/olemeta | |
| 11 | - | |
| 12 | -olemeta is part of the python-oletools package: | |
| 13 | -http://www.decalage.info/python/oletools | |
| 14 | -""" | |
| 15 | - | |
| 16 | -#=== LICENSE ================================================================= | |
| 17 | - | |
| 18 | -# olemeta is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info) | |
| 19 | -# All rights reserved. | |
| 20 | -# | |
| 21 | -# Redistribution and use in source and binary forms, with or without modification, | |
| 22 | -# are permitted provided that the following conditions are met: | |
| 23 | -# | |
| 24 | -# * Redistributions of source code must retain the above copyright notice, this | |
| 25 | -# list of conditions and the following disclaimer. | |
| 26 | -# * Redistributions in binary form must reproduce the above copyright notice, | |
| 27 | -# this list of conditions and the following disclaimer in the documentation | |
| 28 | -# and/or other materials provided with the distribution. | |
| 29 | -# | |
| 30 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 31 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 32 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 33 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 34 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 35 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 36 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 37 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 38 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 39 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 40 | - | |
| 41 | -#------------------------------------------------------------------------------ | |
| 42 | -# CHANGELOG: | |
| 43 | -# 2013-07-24 v0.01 PL: - first version | |
| 44 | -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL | |
| 45 | -# - improved usage display | |
| 46 | - | |
| 47 | -__version__ = '0.02' | |
| 48 | - | |
| 49 | -#------------------------------------------------------------------------------ | |
| 50 | -# TODO: | |
| 51 | -# + optparse | |
| 52 | -# + nicer output: table with fixed columns, datetime, etc | |
| 53 | -# + CSV output | |
| 54 | -# + option to only show available properties (by default) | |
| 55 | - | |
| 56 | -#=== IMPORTS ================================================================= | |
| 57 | - | |
| 58 | -import sys | |
| 59 | -import thirdparty.olefile as olefile | |
| 60 | - | |
| 61 | - | |
| 62 | -#=== MAIN ================================================================= | |
| 63 | - | |
| 64 | -try: | |
| 65 | - ole = olefile.OleFileIO(sys.argv[1]) | |
| 66 | -except IndexError: | |
| 67 | - sys.exit(__doc__) | |
| 68 | - | |
| 69 | -# parse and display metadata: | |
| 70 | -meta = ole.get_metadata() | |
| 71 | -meta.dump() | |
| 72 | - | |
| 73 | -ole.close() | |
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +olemeta.py | |
| 4 | + | |
| 5 | +olemeta is a script to parse OLE files such as MS Office documents (e.g. Word, | |
| 6 | +Excel), to extract all standard properties present in the OLE file. | |
| 7 | + | |
| 8 | +Usage: olemeta.py <file> | |
| 9 | + | |
| 10 | +olemeta project website: http://www.decalage.info/python/olemeta | |
| 11 | + | |
| 12 | +olemeta is part of the python-oletools package: | |
| 13 | +http://www.decalage.info/python/oletools | |
| 14 | +""" | |
| 15 | + | |
| 16 | +#=== LICENSE ================================================================= | |
| 17 | + | |
| 18 | +# olemeta is copyright (c) 2013-2015, Philippe Lagadec (http://www.decalage.info) | |
| 19 | +# All rights reserved. | |
| 20 | +# | |
| 21 | +# Redistribution and use in source and binary forms, with or without modification, | |
| 22 | +# are permitted provided that the following conditions are met: | |
| 23 | +# | |
| 24 | +# * Redistributions of source code must retain the above copyright notice, this | |
| 25 | +# list of conditions and the following disclaimer. | |
| 26 | +# * Redistributions in binary form must reproduce the above copyright notice, | |
| 27 | +# this list of conditions and the following disclaimer in the documentation | |
| 28 | +# and/or other materials provided with the distribution. | |
| 29 | +# | |
| 30 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 31 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 32 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 33 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 34 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 35 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 36 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 37 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 38 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 39 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 40 | + | |
| 41 | +#------------------------------------------------------------------------------ | |
| 42 | +# CHANGELOG: | |
| 43 | +# 2013-07-24 v0.01 PL: - first version | |
| 44 | +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL | |
| 45 | +# - improved usage display | |
| 46 | + | |
| 47 | +__version__ = '0.02' | |
| 48 | + | |
| 49 | +#------------------------------------------------------------------------------ | |
| 50 | +# TODO: | |
| 51 | +# + optparse | |
| 52 | +# + nicer output: table with fixed columns, datetime, etc | |
| 53 | +# + CSV output | |
| 54 | +# + option to only show available properties (by default) | |
| 55 | + | |
| 56 | +#=== IMPORTS ================================================================= | |
| 57 | + | |
| 58 | +import sys | |
| 59 | +import thirdparty.olefile as olefile | |
| 60 | + | |
| 61 | + | |
| 62 | +#=== MAIN ================================================================= | |
| 63 | + | |
| 64 | +try: | |
| 65 | + ole = olefile.OleFileIO(sys.argv[1]) | |
| 66 | +except IndexError: | |
| 67 | + sys.exit(__doc__) | |
| 68 | + | |
| 69 | +# parse and display metadata: | |
| 70 | +meta = ole.get_metadata() | |
| 71 | +meta.dump() | |
| 72 | + | |
| 73 | +ole.close() | ... | ... |
oletools/oletimes.py
| 1 | -#!/usr/bin/env python | |
| 2 | -""" | |
| 3 | -oletimes.py | |
| 4 | - | |
| 5 | -oletimes is a script to parse OLE files such as MS Office documents (e.g. Word, | |
| 6 | -Excel), to extract creation and modification times of all streams and storages | |
| 7 | -in the OLE file. | |
| 8 | - | |
| 9 | -Usage: oletimes.py <file> | |
| 10 | - | |
| 11 | -oletimes project website: http://www.decalage.info/python/oletimes | |
| 12 | - | |
| 13 | -oletimes is part of the python-oletools package: | |
| 14 | -http://www.decalage.info/python/oletools | |
| 15 | -""" | |
| 16 | - | |
| 17 | -#=== LICENSE ================================================================= | |
| 18 | - | |
| 19 | -# oletimes is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info) | |
| 20 | -# All rights reserved. | |
| 21 | -# | |
| 22 | -# Redistribution and use in source and binary forms, with or without modification, | |
| 23 | -# are permitted provided that the following conditions are met: | |
| 24 | -# | |
| 25 | -# * Redistributions of source code must retain the above copyright notice, this | |
| 26 | -# list of conditions and the following disclaimer. | |
| 27 | -# * Redistributions in binary form must reproduce the above copyright notice, | |
| 28 | -# this list of conditions and the following disclaimer in the documentation | |
| 29 | -# and/or other materials provided with the distribution. | |
| 30 | -# | |
| 31 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 32 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 33 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 34 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 35 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 36 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 37 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 38 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 39 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 40 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 41 | - | |
| 42 | - | |
| 43 | -#------------------------------------------------------------------------------ | |
| 44 | -# CHANGELOG: | |
| 45 | -# 2013-07-24 v0.01 PL: - first version | |
| 46 | -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL | |
| 47 | -# - improved usage display | |
| 48 | -# 2014-11-30 v0.03 PL: - improved output with prettytable | |
| 49 | - | |
| 50 | -__version__ = '0.03' | |
| 51 | - | |
| 52 | -#------------------------------------------------------------------------------ | |
| 53 | -# TODO: | |
| 54 | -# + optparse | |
| 55 | -# + nicer output: table with fixed columns, datetime, etc | |
| 56 | -# + CSV output | |
| 57 | -# + option to only show available timestamps (by default?) | |
| 58 | - | |
| 59 | -#=== IMPORTS ================================================================= | |
| 60 | - | |
| 61 | -import sys, datetime | |
| 62 | -import thirdparty.olefile as olefile | |
| 63 | -from thirdparty.prettytable import prettytable | |
| 64 | - | |
| 65 | - | |
| 66 | -#=== MAIN ================================================================= | |
| 67 | - | |
| 68 | -try: | |
| 69 | - ole = olefile.OleFileIO(sys.argv[1]) | |
| 70 | -except IndexError: | |
| 71 | - sys.exit(__doc__) | |
| 72 | - | |
| 73 | -def dt2str (dt): | |
| 74 | - """ | |
| 75 | - Convert a datetime object to a string for display, without microseconds | |
| 76 | - | |
| 77 | - :param dt: datetime.datetime object, or None | |
| 78 | - :return: str, or None | |
| 79 | - """ | |
| 80 | - if dt is None: | |
| 81 | - return None | |
| 82 | - dt = dt.replace(microsecond = 0) | |
| 83 | - return str(dt) | |
| 84 | - | |
| 85 | -t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time']) | |
| 86 | -t.align = 'l' | |
| 87 | -t.max_width = 26 | |
| 88 | -#t.border = False | |
| 89 | - | |
| 90 | -#print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime()) | |
| 91 | -t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime()))) | |
| 92 | - | |
| 93 | -for obj in ole.listdir(streams=True, storages=True): | |
| 94 | - #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj)) | |
| 95 | - t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj)))) | |
| 96 | - | |
| 97 | -print t | |
| 98 | - | |
| 99 | -ole.close() | |
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +oletimes.py | |
| 4 | + | |
| 5 | +oletimes is a script to parse OLE files such as MS Office documents (e.g. Word, | |
| 6 | +Excel), to extract creation and modification times of all streams and storages | |
| 7 | +in the OLE file. | |
| 8 | + | |
| 9 | +Usage: oletimes.py <file> | |
| 10 | + | |
| 11 | +oletimes project website: http://www.decalage.info/python/oletimes | |
| 12 | + | |
| 13 | +oletimes is part of the python-oletools package: | |
| 14 | +http://www.decalage.info/python/oletools | |
| 15 | +""" | |
| 16 | + | |
| 17 | +#=== LICENSE ================================================================= | |
| 18 | + | |
| 19 | +# oletimes is copyright (c) 2013-2015, Philippe Lagadec (http://www.decalage.info) | |
| 20 | +# All rights reserved. | |
| 21 | +# | |
| 22 | +# Redistribution and use in source and binary forms, with or without modification, | |
| 23 | +# are permitted provided that the following conditions are met: | |
| 24 | +# | |
| 25 | +# * Redistributions of source code must retain the above copyright notice, this | |
| 26 | +# list of conditions and the following disclaimer. | |
| 27 | +# * Redistributions in binary form must reproduce the above copyright notice, | |
| 28 | +# this list of conditions and the following disclaimer in the documentation | |
| 29 | +# and/or other materials provided with the distribution. | |
| 30 | +# | |
| 31 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 32 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 33 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 34 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 35 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 36 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 37 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 38 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 39 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 40 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 41 | + | |
| 42 | + | |
| 43 | +#------------------------------------------------------------------------------ | |
| 44 | +# CHANGELOG: | |
| 45 | +# 2013-07-24 v0.01 PL: - first version | |
| 46 | +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL | |
| 47 | +# - improved usage display | |
| 48 | +# 2014-11-30 v0.03 PL: - improved output with prettytable | |
| 49 | + | |
| 50 | +__version__ = '0.03' | |
| 51 | + | |
| 52 | +#------------------------------------------------------------------------------ | |
| 53 | +# TODO: | |
| 54 | +# + optparse | |
| 55 | +# + nicer output: table with fixed columns, datetime, etc | |
| 56 | +# + CSV output | |
| 57 | +# + option to only show available timestamps (by default?) | |
| 58 | + | |
| 59 | +#=== IMPORTS ================================================================= | |
| 60 | + | |
| 61 | +import sys, datetime | |
| 62 | +import thirdparty.olefile as olefile | |
| 63 | +from thirdparty.prettytable import prettytable | |
| 64 | + | |
| 65 | + | |
| 66 | +#=== MAIN ================================================================= | |
| 67 | + | |
| 68 | +try: | |
| 69 | + ole = olefile.OleFileIO(sys.argv[1]) | |
| 70 | +except IndexError: | |
| 71 | + sys.exit(__doc__) | |
| 72 | + | |
| 73 | +def dt2str (dt): | |
| 74 | + """ | |
| 75 | + Convert a datetime object to a string for display, without microseconds | |
| 76 | + | |
| 77 | + :param dt: datetime.datetime object, or None | |
| 78 | + :return: str, or None | |
| 79 | + """ | |
| 80 | + if dt is None: | |
| 81 | + return None | |
| 82 | + dt = dt.replace(microsecond = 0) | |
| 83 | + return str(dt) | |
| 84 | + | |
| 85 | +t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time']) | |
| 86 | +t.align = 'l' | |
| 87 | +t.max_width = 26 | |
| 88 | +#t.border = False | |
| 89 | + | |
| 90 | +#print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime()) | |
| 91 | +t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime()))) | |
| 92 | + | |
| 93 | +for obj in ole.listdir(streams=True, storages=True): | |
| 94 | + #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj)) | |
| 95 | + t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj)))) | |
| 96 | + | |
| 97 | +print t | |
| 98 | + | |
| 99 | +ole.close() | ... | ... |
oletools/pyxswf.py
| 1 | -#!/usr/bin/env python | |
| 2 | -""" | |
| 3 | -pyxswf.py | |
| 4 | - | |
| 5 | -pyxswf is a script to detect, extract and analyze Flash objects (SWF) that may | |
| 6 | -be embedded in files such as MS Office documents (e.g. Word, Excel), | |
| 7 | -which is especially useful for malware analysis. | |
| 8 | - | |
| 9 | -pyxswf is an extension to xxxswf.py published by Alexander Hanel on | |
| 10 | -http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html | |
| 11 | -Compared to xxxswf, it can extract streams from MS Office documents by parsing | |
| 12 | -their OLE structure properly (-o option), which is necessary when streams are | |
| 13 | -fragmented. | |
| 14 | -Stream fragmentation is a known obfuscation technique, as explained on | |
| 15 | -http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/ | |
| 16 | - | |
| 17 | -It can also extract Flash objects from RTF documents, by parsing embedded | |
| 18 | -objects encoded in hexadecimal format (-f option). | |
| 19 | - | |
| 20 | -pyxswf project website: http://www.decalage.info/python/pyxswf | |
| 21 | - | |
| 22 | -pyxswf is part of the python-oletools package: | |
| 23 | -http://www.decalage.info/python/oletools | |
| 24 | -""" | |
| 25 | - | |
| 26 | -#=== LICENSE ================================================================= | |
| 27 | - | |
| 28 | -# pyxswf is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info) | |
| 29 | -# All rights reserved. | |
| 30 | -# | |
| 31 | -# Redistribution and use in source and binary forms, with or without modification, | |
| 32 | -# are permitted provided that the following conditions are met: | |
| 33 | -# | |
| 34 | -# * Redistributions of source code must retain the above copyright notice, this | |
| 35 | -# list of conditions and the following disclaimer. | |
| 36 | -# * Redistributions in binary form must reproduce the above copyright notice, | |
| 37 | -# this list of conditions and the following disclaimer in the documentation | |
| 38 | -# and/or other materials provided with the distribution. | |
| 39 | -# | |
| 40 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 41 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 42 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 43 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 44 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 45 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 46 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 47 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 48 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 49 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 50 | - | |
| 51 | -#------------------------------------------------------------------------------ | |
| 52 | -# CHANGELOG: | |
| 53 | -# 2012-09-17 v0.01 PL: - first version | |
| 54 | -# 2012-11-09 v0.02 PL: - added RTF embedded objects extraction | |
| 55 | -# 2014-11-29 v0.03 PL: - use olefile instead of OleFileIO_PL | |
| 56 | -# - improved usage display with -h | |
| 57 | - | |
| 58 | -__version__ = '0.03' | |
| 59 | - | |
| 60 | -#------------------------------------------------------------------------------ | |
| 61 | -# TODO: | |
| 62 | -# + add support for LZMA-compressed flash files (ZWS header) | |
| 63 | -# references: http://blog.malwaretracker.com/2014/01/cve-2013-5331-evaded-av-by-using.html | |
| 64 | -# http://code.metager.de/source/xref/adobe/flash/crossbridge/tools/swf-info.py | |
| 65 | -# http://room32.dyndns.org/forums/showthread.php?766-SWFCompression | |
| 66 | -# sample code: http://room32.dyndns.org/SWFCompression.py | |
| 67 | -# - check if file is OLE | |
| 68 | -# - support -r | |
| 69 | - | |
| 70 | - | |
| 71 | -#=== IMPORTS ================================================================= | |
| 72 | - | |
| 73 | -import optparse, sys, os, rtfobj, StringIO | |
| 74 | -from thirdparty.xxxswf import xxxswf | |
| 75 | -import thirdparty.olefile as olefile | |
| 76 | - | |
| 77 | - | |
| 78 | -#=== MAIN ================================================================= | |
| 79 | - | |
| 80 | -def main(): | |
| 81 | - # Scenarios: | |
| 82 | - # Scan file for SWF(s) | |
| 83 | - # Scan file for SWF(s) and extract them | |
| 84 | - # Scan file for SWF(s) and scan them with Yara | |
| 85 | - # Scan file for SWF(s), extract them and scan with Yara | |
| 86 | - # Scan directory recursively for files that contain SWF(s) | |
| 87 | - # Scan directory recursively for files that contain SWF(s) and extract them | |
| 88 | - | |
| 89 | - usage = 'usage: %prog [options] <file.bad>' | |
| 90 | - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) | |
| 91 | - parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed') | |
| 92 | - parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed') | |
| 93 | - parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed') | |
| 94 | - parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed') | |
| 95 | - parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)') | |
| 96 | - parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes') | |
| 97 | - parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib') | |
| 98 | - | |
| 99 | - parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') | |
| 100 | - parser.add_option('-f', '--rtf', action='store_true', dest='rtf', help='Parse an RTF file to look for SWF in each embedded object') | |
| 101 | - | |
| 102 | - | |
| 103 | - (options, args) = parser.parse_args() | |
| 104 | - | |
| 105 | - # Print help if no arguments are passed | |
| 106 | - if len(args) == 0: | |
| 107 | - parser.print_help() | |
| 108 | - return | |
| 109 | - | |
| 110 | - # OLE MODE: | |
| 111 | - if options.ole: | |
| 112 | - for filename in args: | |
| 113 | - ole = olefile.OleFileIO(filename) | |
| 114 | - for direntry in ole.direntries: | |
| 115 | - if direntry is not None and direntry.entry_type == olefile.STGTY_STREAM: | |
| 116 | - f = ole._open(direntry.isectStart, direntry.size) | |
| 117 | - # check if data contains the SWF magic: FWS or CWS | |
| 118 | - data = f.getvalue() | |
| 119 | - if 'FWS' in data or 'CWS' in data: | |
| 120 | - print 'OLE stream: %s' % repr(direntry.name) | |
| 121 | - # call xxxswf to scan or extract Flash files: | |
| 122 | - xxxswf.disneyland(f, direntry.name, options) | |
| 123 | - f.close() | |
| 124 | - ole.close() | |
| 125 | - | |
| 126 | - # RTF MODE: | |
| 127 | - elif options.rtf: | |
| 128 | - for filename in args: | |
| 129 | - for index, data in rtfobj.rtf_iter_objects(filename): | |
| 130 | - if 'FWS' in data or 'CWS' in data: | |
| 131 | - print 'RTF embedded object size %d at index %08X' % (len(data), index) | |
| 132 | - f = StringIO.StringIO(data) | |
| 133 | - name = 'RTF_embedded_object_%08X' % index | |
| 134 | - # call xxxswf to scan or extract Flash files: | |
| 135 | - xxxswf.disneyland(f, name, options) | |
| 136 | - | |
| 137 | - else: | |
| 138 | - xxxswf.main() | |
| 139 | - | |
| 140 | -if __name__ == '__main__': | |
| 141 | - main() | |
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +pyxswf.py | |
| 4 | + | |
| 5 | +pyxswf is a script to detect, extract and analyze Flash objects (SWF) that may | |
| 6 | +be embedded in files such as MS Office documents (e.g. Word, Excel), | |
| 7 | +which is especially useful for malware analysis. | |
| 8 | + | |
| 9 | +pyxswf is an extension to xxxswf.py published by Alexander Hanel on | |
| 10 | +http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html | |
| 11 | +Compared to xxxswf, it can extract streams from MS Office documents by parsing | |
| 12 | +their OLE structure properly (-o option), which is necessary when streams are | |
| 13 | +fragmented. | |
| 14 | +Stream fragmentation is a known obfuscation technique, as explained on | |
| 15 | +http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/ | |
| 16 | + | |
| 17 | +It can also extract Flash objects from RTF documents, by parsing embedded | |
| 18 | +objects encoded in hexadecimal format (-f option). | |
| 19 | + | |
| 20 | +pyxswf project website: http://www.decalage.info/python/pyxswf | |
| 21 | + | |
| 22 | +pyxswf is part of the python-oletools package: | |
| 23 | +http://www.decalage.info/python/oletools | |
| 24 | +""" | |
| 25 | + | |
| 26 | +#=== LICENSE ================================================================= | |
| 27 | + | |
| 28 | +# pyxswf is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) | |
| 29 | +# All rights reserved. | |
| 30 | +# | |
| 31 | +# Redistribution and use in source and binary forms, with or without modification, | |
| 32 | +# are permitted provided that the following conditions are met: | |
| 33 | +# | |
| 34 | +# * Redistributions of source code must retain the above copyright notice, this | |
| 35 | +# list of conditions and the following disclaimer. | |
| 36 | +# * Redistributions in binary form must reproduce the above copyright notice, | |
| 37 | +# this list of conditions and the following disclaimer in the documentation | |
| 38 | +# and/or other materials provided with the distribution. | |
| 39 | +# | |
| 40 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 41 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 42 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 43 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 44 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 45 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 46 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 47 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 48 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 49 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 50 | + | |
| 51 | +#------------------------------------------------------------------------------ | |
| 52 | +# CHANGELOG: | |
| 53 | +# 2012-09-17 v0.01 PL: - first version | |
| 54 | +# 2012-11-09 v0.02 PL: - added RTF embedded objects extraction | |
| 55 | +# 2014-11-29 v0.03 PL: - use olefile instead of OleFileIO_PL | |
| 56 | +# - improved usage display with -h | |
| 57 | + | |
| 58 | +__version__ = '0.03' | |
| 59 | + | |
| 60 | +#------------------------------------------------------------------------------ | |
| 61 | +# TODO: | |
| 62 | +# + add support for LZMA-compressed flash files (ZWS header) | |
| 63 | +# references: http://blog.malwaretracker.com/2014/01/cve-2013-5331-evaded-av-by-using.html | |
| 64 | +# http://code.metager.de/source/xref/adobe/flash/crossbridge/tools/swf-info.py | |
| 65 | +# http://room32.dyndns.org/forums/showthread.php?766-SWFCompression | |
| 66 | +# sample code: http://room32.dyndns.org/SWFCompression.py | |
| 67 | +# - check if file is OLE | |
| 68 | +# - support -r | |
| 69 | + | |
| 70 | + | |
| 71 | +#=== IMPORTS ================================================================= | |
| 72 | + | |
| 73 | +import optparse, sys, os, rtfobj, StringIO | |
| 74 | +from thirdparty.xxxswf import xxxswf | |
| 75 | +import thirdparty.olefile as olefile | |
| 76 | + | |
| 77 | + | |
| 78 | +#=== MAIN ================================================================= | |
| 79 | + | |
| 80 | +def main(): | |
| 81 | + # Scenarios: | |
| 82 | + # Scan file for SWF(s) | |
| 83 | + # Scan file for SWF(s) and extract them | |
| 84 | + # Scan file for SWF(s) and scan them with Yara | |
| 85 | + # Scan file for SWF(s), extract them and scan with Yara | |
| 86 | + # Scan directory recursively for files that contain SWF(s) | |
| 87 | + # Scan directory recursively for files that contain SWF(s) and extract them | |
| 88 | + | |
| 89 | + usage = 'usage: %prog [options] <file.bad>' | |
| 90 | + parser = optparse.OptionParser(usage=__doc__ + '\n' + usage) | |
| 91 | + parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed') | |
| 92 | + parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed') | |
| 93 | + parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed') | |
| 94 | + parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed') | |
| 95 | + parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)') | |
| 96 | + parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes') | |
| 97 | + parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib') | |
| 98 | + | |
| 99 | + parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') | |
| 100 | + parser.add_option('-f', '--rtf', action='store_true', dest='rtf', help='Parse an RTF file to look for SWF in each embedded object') | |
| 101 | + | |
| 102 | + | |
| 103 | + (options, args) = parser.parse_args() | |
| 104 | + | |
| 105 | + # Print help if no arguments are passed | |
| 106 | + if len(args) == 0: | |
| 107 | + parser.print_help() | |
| 108 | + return | |
| 109 | + | |
| 110 | + # OLE MODE: | |
| 111 | + if options.ole: | |
| 112 | + for filename in args: | |
| 113 | + ole = olefile.OleFileIO(filename) | |
| 114 | + for direntry in ole.direntries: | |
| 115 | + if direntry is not None and direntry.entry_type == olefile.STGTY_STREAM: | |
| 116 | + f = ole._open(direntry.isectStart, direntry.size) | |
| 117 | + # check if data contains the SWF magic: FWS or CWS | |
| 118 | + data = f.getvalue() | |
| 119 | + if 'FWS' in data or 'CWS' in data: | |
| 120 | + print 'OLE stream: %s' % repr(direntry.name) | |
| 121 | + # call xxxswf to scan or extract Flash files: | |
| 122 | + xxxswf.disneyland(f, direntry.name, options) | |
| 123 | + f.close() | |
| 124 | + ole.close() | |
| 125 | + | |
| 126 | + # RTF MODE: | |
| 127 | + elif options.rtf: | |
| 128 | + for filename in args: | |
| 129 | + for index, data in rtfobj.rtf_iter_objects(filename): | |
| 130 | + if 'FWS' in data or 'CWS' in data: | |
| 131 | + print 'RTF embedded object size %d at index %08X' % (len(data), index) | |
| 132 | + f = StringIO.StringIO(data) | |
| 133 | + name = 'RTF_embedded_object_%08X' % index | |
| 134 | + # call xxxswf to scan or extract Flash files: | |
| 135 | + xxxswf.disneyland(f, name, options) | |
| 136 | + | |
| 137 | + else: | |
| 138 | + xxxswf.main() | |
| 139 | + | |
| 140 | +if __name__ == '__main__': | |
| 141 | + main() | ... | ... |
oletools/rtfobj.py
| 1 | -#!/usr/bin/env python | |
| 2 | -""" | |
| 3 | -rtfobj.py - Philippe Lagadec 2013-04-02 | |
| 4 | - | |
| 5 | -rtfobj is a Python module to extract embedded objects from RTF files, such as | |
| 6 | -OLE ojects. It can be used as a Python library or a command-line tool. | |
| 7 | - | |
| 8 | -Usage: rtfobj.py <file.rtf> | |
| 9 | - | |
| 10 | -rtfobj project website: http://www.decalage.info/python/rtfobj | |
| 11 | - | |
| 12 | -rtfobj is part of the python-oletools package: | |
| 13 | -http://www.decalage.info/python/oletools | |
| 14 | -""" | |
| 15 | - | |
| 16 | -#=== LICENSE ================================================================= | |
| 17 | - | |
| 18 | -# rtfobj is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info) | |
| 19 | -# All rights reserved. | |
| 20 | -# | |
| 21 | -# Redistribution and use in source and binary forms, with or without modification, | |
| 22 | -# are permitted provided that the following conditions are met: | |
| 23 | -# | |
| 24 | -# * Redistributions of source code must retain the above copyright notice, this | |
| 25 | -# list of conditions and the following disclaimer. | |
| 26 | -# * Redistributions in binary form must reproduce the above copyright notice, | |
| 27 | -# this list of conditions and the following disclaimer in the documentation | |
| 28 | -# and/or other materials provided with the distribution. | |
| 29 | -# | |
| 30 | -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 31 | -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 32 | -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 33 | -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 34 | -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 35 | -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 36 | -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 37 | -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 38 | -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 39 | -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 40 | - | |
| 41 | - | |
| 42 | -#------------------------------------------------------------------------------ | |
| 43 | -# CHANGELOG: | |
| 44 | -# 2012-11-09 v0.01 PL: - first version | |
| 45 | -# 2013-04-02 v0.02 PL: - fixed bug in main | |
| 46 | - | |
| 47 | -__version__ = '0.02' | |
| 48 | - | |
| 49 | -#------------------------------------------------------------------------------ | |
| 50 | -# TODO: | |
| 51 | -# - improve regex pattern for better performance? | |
| 52 | -# - allow semicolon within hex, as found in this sample: | |
| 53 | -# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html | |
| 54 | - | |
| 55 | -#=== IMPORTS ================================================================= | |
| 56 | - | |
| 57 | -import re, sys, string, binascii | |
| 58 | - | |
| 59 | - | |
| 60 | -#=== CONSTANTS================================================================= | |
| 61 | - | |
| 62 | -# REGEX pattern to extract embedded OLE objects in hexadecimal format: | |
| 63 | -# alphanum digit: [0-9A-Fa-f] | |
| 64 | -# hex char = two alphanum digits: [0-9A-Fa-f]{2} | |
| 65 | -# several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,} | |
| 66 | -# at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* | |
| 67 | -PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' | |
| 68 | -# improved pattern, allowing semicolons within hex: | |
| 69 | -#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' | |
| 70 | - | |
| 71 | -# a dummy translation table for str.translate, which does not change anythying: | |
| 72 | -TRANSTABLE_NOCHANGE = string.maketrans('', '') | |
| 73 | - | |
| 74 | - | |
| 75 | -#=== FUNCTIONS ================================================================= | |
| 76 | - | |
| 77 | -def rtf_iter_objects (filename, min_size=32): | |
| 78 | - """ | |
| 79 | - Open a RTF file, extract each embedded object encoded in hexadecimal of | |
| 80 | - size > min_size, yield the index of the object in the RTF file and its data | |
| 81 | - in binary format. | |
| 82 | - This is an iterator. | |
| 83 | - """ | |
| 84 | - data = open(filename, 'rb').read() | |
| 85 | - for m in re.finditer(PATTERN, data): | |
| 86 | - found = m.group(0) | |
| 87 | - # remove all whitespace and line feeds: | |
| 88 | - #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | |
| 89 | - found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') | |
| 90 | - found = binascii.unhexlify(found) | |
| 91 | - #print repr(found) | |
| 92 | - if len(found)>min_size: | |
| 93 | - yield m.start(), found | |
| 94 | - | |
| 95 | - | |
| 96 | -#=== MAIN ================================================================= | |
| 97 | - | |
| 98 | -if __name__ == '__main__': | |
| 99 | - if len(sys.argv)<2: | |
| 100 | - sys.exit(__doc__) | |
| 101 | - for index, data in rtf_iter_objects(sys.argv[1]): | |
| 102 | - print 'found object size %d at index %08X' % (len(data), index) | |
| 103 | - fname = 'object_%08X.bin' % index | |
| 104 | - print 'saving to file %s' % fname | |
| 105 | - open(fname, 'wb').write(data) | |
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +rtfobj.py - Philippe Lagadec 2013-04-02 | |
| 4 | + | |
| 5 | +rtfobj is a Python module to extract embedded objects from RTF files, such as | |
| 6 | +OLE ojects. It can be used as a Python library or a command-line tool. | |
| 7 | + | |
| 8 | +Usage: rtfobj.py <file.rtf> | |
| 9 | + | |
| 10 | +rtfobj project website: http://www.decalage.info/python/rtfobj | |
| 11 | + | |
| 12 | +rtfobj is part of the python-oletools package: | |
| 13 | +http://www.decalage.info/python/oletools | |
| 14 | +""" | |
| 15 | + | |
| 16 | +#=== LICENSE ================================================================= | |
| 17 | + | |
| 18 | +# rtfobj is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) | |
| 19 | +# All rights reserved. | |
| 20 | +# | |
| 21 | +# Redistribution and use in source and binary forms, with or without modification, | |
| 22 | +# are permitted provided that the following conditions are met: | |
| 23 | +# | |
| 24 | +# * Redistributions of source code must retain the above copyright notice, this | |
| 25 | +# list of conditions and the following disclaimer. | |
| 26 | +# * Redistributions in binary form must reproduce the above copyright notice, | |
| 27 | +# this list of conditions and the following disclaimer in the documentation | |
| 28 | +# and/or other materials provided with the distribution. | |
| 29 | +# | |
| 30 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 31 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 32 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 33 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 34 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 35 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 36 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 37 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 38 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 39 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 40 | + | |
| 41 | + | |
| 42 | +#------------------------------------------------------------------------------ | |
| 43 | +# CHANGELOG: | |
| 44 | +# 2012-11-09 v0.01 PL: - first version | |
| 45 | +# 2013-04-02 v0.02 PL: - fixed bug in main | |
| 46 | + | |
| 47 | +__version__ = '0.02' | |
| 48 | + | |
| 49 | +#------------------------------------------------------------------------------ | |
| 50 | +# TODO: | |
| 51 | +# - improve regex pattern for better performance? | |
| 52 | +# - allow semicolon within hex, as found in this sample: | |
| 53 | +# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html | |
| 54 | + | |
| 55 | +#=== IMPORTS ================================================================= | |
| 56 | + | |
| 57 | +import re, sys, string, binascii | |
| 58 | + | |
| 59 | + | |
| 60 | +#=== CONSTANTS================================================================= | |
| 61 | + | |
| 62 | +# REGEX pattern to extract embedded OLE objects in hexadecimal format: | |
| 63 | +# alphanum digit: [0-9A-Fa-f] | |
| 64 | +# hex char = two alphanum digits: [0-9A-Fa-f]{2} | |
| 65 | +# several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,} | |
| 66 | +# at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s* | |
| 67 | +PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' | |
| 68 | +# improved pattern, allowing semicolons within hex: | |
| 69 | +#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}' | |
| 70 | + | |
| 71 | +# a dummy translation table for str.translate, which does not change anythying: | |
| 72 | +TRANSTABLE_NOCHANGE = string.maketrans('', '') | |
| 73 | + | |
| 74 | + | |
| 75 | +#=== FUNCTIONS ================================================================= | |
| 76 | + | |
| 77 | +def rtf_iter_objects (filename, min_size=32): | |
| 78 | + """ | |
| 79 | + Open a RTF file, extract each embedded object encoded in hexadecimal of | |
| 80 | + size > min_size, yield the index of the object in the RTF file and its data | |
| 81 | + in binary format. | |
| 82 | + This is an iterator. | |
| 83 | + """ | |
| 84 | + data = open(filename, 'rb').read() | |
| 85 | + for m in re.finditer(PATTERN, data): | |
| 86 | + found = m.group(0) | |
| 87 | + # remove all whitespace and line feeds: | |
| 88 | + #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE | |
| 89 | + found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v') | |
| 90 | + found = binascii.unhexlify(found) | |
| 91 | + #print repr(found) | |
| 92 | + if len(found)>min_size: | |
| 93 | + yield m.start(), found | |
| 94 | + | |
| 95 | + | |
| 96 | +#=== MAIN ================================================================= | |
| 97 | + | |
| 98 | +if __name__ == '__main__': | |
| 99 | + if len(sys.argv)<2: | |
| 100 | + sys.exit(__doc__) | |
| 101 | + for index, data in rtf_iter_objects(sys.argv[1]): | |
| 102 | + print 'found object size %d at index %08X' % (len(data), index) | |
| 103 | + fname = 'object_%08X.bin' % index | |
| 104 | + print 'saving to file %s' % fname | |
| 105 | + open(fname, 'wb').write(data) | ... | ... |