Commit cda797574d2076115cc8547c9ccc74aa5664a991

Authored by Philippe Lagadec
1 parent a4ffb743

changed line endings from CRLF to LF in all scripts to improve Linux/Unix compatibility

oletools/ezhexviewer.py
1 -#!/usr/bin/env python  
2 -"""  
3 -ezhexviewer.py  
4 -  
5 -A simple hexadecimal viewer based on easygui. It should work on any platform  
6 -with Python 2.x.  
7 -  
8 -Usage: ezhexviewer.py [file]  
9 -  
10 -Usage in a python application:  
11 -  
12 - import ezhexviewer  
13 - ezhexviewer.hexview_file(filename)  
14 - ezhexviewer.hexview_data(data)  
15 -  
16 -  
17 -ezhexviewer project website: http://www.decalage.info/python/ezhexviewer  
18 -  
19 -ezhexviewer is copyright (c) 2012, Philippe Lagadec (http://www.decalage.info)  
20 -All rights reserved.  
21 -  
22 -Redistribution and use in source and binary forms, with or without modification,  
23 -are permitted provided that the following conditions are met:  
24 -  
25 - * Redistributions of source code must retain the above copyright notice, this  
26 - list of conditions and the following disclaimer.  
27 - * Redistributions in binary form must reproduce the above copyright notice,  
28 - this list of conditions and the following disclaimer in the documentation  
29 - and/or other materials provided with the distribution.  
30 -  
31 -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND  
32 -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
33 -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE  
34 -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE  
35 -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
36 -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  
37 -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  
38 -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,  
39 -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  
40 -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
41 -"""  
42 -  
43 -__version__ = '0.01'  
44 -  
45 -#------------------------------------------------------------------------------  
46 -# CHANGELOG:  
47 -# 2012-09-17 v0.01 PL: - first version  
48 -# 2012-10-04 v0.02 PL: - added license  
49 -  
50 -#------------------------------------------------------------------------------  
51 -# TODO:  
52 -# + options to set title and msg  
53 -  
54 -  
55 -from thirdparty.easygui import easygui  
56 -import sys  
57 -  
58 -#------------------------------------------------------------------------------  
59 -# The following code (hexdump3 only) is a modified version of the hex dumper  
60 -# recipe published on ASPN by Sebastien Keim and Raymond Hattinger under the  
61 -# PSF license. I added the startindex parameter.  
62 -# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812  
63 -# PSF license: http://docs.python.org/license.html  
64 -# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved  
65 -  
66 -FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])  
67 -  
68 -def hexdump3(src, length=8, startindex=0):  
69 - """  
70 - Returns a hexadecimal dump of a binary string.  
71 - length: number of bytes per row.  
72 - startindex: index of 1st byte.  
73 - """  
74 - result=[]  
75 - for i in xrange(0, len(src), length):  
76 - s = src[i:i+length]  
77 - hexa = ' '.join(["%02X"%ord(x) for x in s])  
78 - printable = s.translate(FILTER)  
79 - result.append("%08X %-*s %s\n" % (i+startindex, length*3, hexa, printable))  
80 - return ''.join(result)  
81 -  
82 -# end of PSF-licensed code.  
83 -#------------------------------------------------------------------------------  
84 -  
85 -  
86 -def hexview_data (data, msg='', title='ezhexviewer', length=16, startindex=0):  
87 - hex = hexdump3(data, length=length, startindex=startindex)  
88 - easygui.codebox(msg=msg, title=title, text=hex)  
89 -  
90 -  
91 -def hexview_file (filename, msg='', title='ezhexviewer', length=16, startindex=0):  
92 - data = open(filename, 'rb').read()  
93 - hexview_data(data, msg=msg, title=title, length=length, startindex=startindex)  
94 -  
95 -  
96 -if __name__ == '__main__':  
97 - try:  
98 - filename = sys.argv[1]  
99 - except:  
100 - filename = easygui.fileopenbox()  
101 - if filename:  
102 - try:  
103 - hexview_file(filename, msg='File: %s' % filename)  
104 - except:  
105 - easygui.exceptionbox(msg='Error:', title='ezhexviewer') 1 +#!/usr/bin/env python
  2 +"""
  3 +ezhexviewer.py
  4 +
  5 +A simple hexadecimal viewer based on easygui. It should work on any platform
  6 +with Python 2.x.
  7 +
  8 +Usage: ezhexviewer.py [file]
  9 +
  10 +Usage in a python application:
  11 +
  12 + import ezhexviewer
  13 + ezhexviewer.hexview_file(filename)
  14 + ezhexviewer.hexview_data(data)
  15 +
  16 +
  17 +ezhexviewer project website: http://www.decalage.info/python/ezhexviewer
  18 +
  19 +ezhexviewer is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  20 +All rights reserved.
  21 +
  22 +Redistribution and use in source and binary forms, with or without modification,
  23 +are permitted provided that the following conditions are met:
  24 +
  25 + * Redistributions of source code must retain the above copyright notice, this
  26 + list of conditions and the following disclaimer.
  27 + * Redistributions in binary form must reproduce the above copyright notice,
  28 + this list of conditions and the following disclaimer in the documentation
  29 + and/or other materials provided with the distribution.
  30 +
  31 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  32 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  33 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  34 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  35 +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  36 +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  37 +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38 +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  39 +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  40 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  41 +"""
  42 +
  43 +__version__ = '0.02'
  44 +
  45 +#------------------------------------------------------------------------------
  46 +# CHANGELOG:
  47 +# 2012-09-17 v0.01 PL: - first version
  48 +# 2012-10-04 v0.02 PL: - added license
  49 +
  50 +#------------------------------------------------------------------------------
  51 +# TODO:
  52 +# + options to set title and msg
  53 +
  54 +
  55 +from thirdparty.easygui import easygui
  56 +import sys
  57 +
  58 +#------------------------------------------------------------------------------
  59 +# The following code (hexdump3 only) is a modified version of the hex dumper
  60 +# recipe published on ASPN by Sebastien Keim and Raymond Hattinger under the
  61 +# PSF license. I added the startindex parameter.
  62 +# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812
  63 +# PSF license: http://docs.python.org/license.html
  64 +# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved
  65 +
  66 +FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
  67 +
  68 +def hexdump3(src, length=8, startindex=0):
  69 + """
  70 + Returns a hexadecimal dump of a binary string.
  71 + length: number of bytes per row.
  72 + startindex: index of 1st byte.
  73 + """
  74 + result=[]
  75 + for i in xrange(0, len(src), length):
  76 + s = src[i:i+length]
  77 + hexa = ' '.join(["%02X"%ord(x) for x in s])
  78 + printable = s.translate(FILTER)
  79 + result.append("%08X %-*s %s\n" % (i+startindex, length*3, hexa, printable))
  80 + return ''.join(result)
  81 +
  82 +# end of PSF-licensed code.
  83 +#------------------------------------------------------------------------------
  84 +
  85 +
  86 +def hexview_data (data, msg='', title='ezhexviewer', length=16, startindex=0):
  87 + hex = hexdump3(data, length=length, startindex=startindex)
  88 + easygui.codebox(msg=msg, title=title, text=hex)
  89 +
  90 +
  91 +def hexview_file (filename, msg='', title='ezhexviewer', length=16, startindex=0):
  92 + data = open(filename, 'rb').read()
  93 + hexview_data(data, msg=msg, title=title, length=length, startindex=startindex)
  94 +
  95 +
  96 +if __name__ == '__main__':
  97 + try:
  98 + filename = sys.argv[1]
  99 + except:
  100 + filename = easygui.fileopenbox()
  101 + if filename:
  102 + try:
  103 + hexview_file(filename, msg='File: %s' % filename)
  104 + except:
  105 + easygui.exceptionbox(msg='Error:', title='ezhexviewer')
oletools/olebrowse.py
1 -#!/usr/bin/env python  
2 -"""  
3 -olebrowse.py  
4 -  
5 -A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to  
6 -view and extract individual data streams.  
7 -  
8 -Usage: olebrowse.py [file]  
9 -  
10 -olebrowse project website: http://www.decalage.info/python/olebrowse  
11 -  
12 -olebrowse is part of the python-oletools package:  
13 -http://www.decalage.info/python/oletools  
14 -  
15 -olebrowse is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info)  
16 -All rights reserved.  
17 -  
18 -Redistribution and use in source and binary forms, with or without modification,  
19 -are permitted provided that the following conditions are met:  
20 -  
21 - * Redistributions of source code must retain the above copyright notice, this  
22 - list of conditions and the following disclaimer.  
23 - * Redistributions in binary form must reproduce the above copyright notice,  
24 - this list of conditions and the following disclaimer in the documentation  
25 - and/or other materials provided with the distribution.  
26 -  
27 -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND  
28 -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
29 -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE  
30 -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE  
31 -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
32 -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  
33 -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  
34 -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,  
35 -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  
36 -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
37 -"""  
38 -  
39 -__version__ = '0.02'  
40 -  
41 -#------------------------------------------------------------------------------  
42 -# CHANGELOG:  
43 -# 2012-09-17 v0.01 PL: - first version  
44 -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL  
45 -  
46 -#------------------------------------------------------------------------------  
47 -# TODO:  
48 -# - menu option to open another file  
49 -# - menu option to display properties  
50 -# - menu option to run other oletools, external tools such as OfficeCat?  
51 -# - for a stream, display info: size, path, etc  
52 -# - stream info: magic, entropy, ... ?  
53 -  
54 -import optparse, sys, os  
55 -from thirdparty.easygui import easygui  
56 -import thirdparty.olefile as olefile  
57 -import ezhexviewer  
58 -  
59 -ABOUT = '~ About olebrowse'  
60 -QUIT = '~ Quit'  
61 -  
62 -  
63 -def about ():  
64 - """  
65 - Display information about this tool  
66 - """  
67 - easygui.textbox(title='About olebrowse', text=__doc__)  
68 -  
69 -  
70 -def browse_stream (ole, stream):  
71 - """  
72 - Browse a stream (hex view or save to file)  
73 - """  
74 - #print 'stream:', stream  
75 - while True:  
76 - msg ='Select an action for the stream "%s", or press Esc to exit' % repr(stream)  
77 - actions = [  
78 - 'Hex view',  
79 -## 'Text view',  
80 -## 'Repr view',  
81 - 'Save stream to file',  
82 - '~ Back to main menu',  
83 - ]  
84 - action = easygui.choicebox(msg, title='olebrowse', choices=actions)  
85 - if action is None or 'Back' in action:  
86 - break  
87 - elif action.startswith('Hex'):  
88 - data = ole.openstream(stream).getvalue()  
89 - ezhexviewer.hexview_data(data, msg='Stream: %s' % stream, title='olebrowse')  
90 -## elif action.startswith('Text'):  
91 -## data = ole.openstream(stream).getvalue()  
92 -## easygui.codebox(title='Text view - %s' % stream, text=data)  
93 -## elif action.startswith('Repr'):  
94 -## data = ole.openstream(stream).getvalue()  
95 -## easygui.codebox(title='Repr view - %s' % stream, text=repr(data))  
96 - elif action.startswith('Save'):  
97 - data = ole.openstream(stream).getvalue()  
98 - fname = easygui.filesavebox(default='stream.bin')  
99 - if fname is not None:  
100 - f = open(fname, 'wb')  
101 - f.write(data)  
102 - f.close()  
103 - easygui.msgbox('stream saved to file %s' % fname)  
104 -  
105 -  
106 -  
107 -def main():  
108 - """  
109 - Main function  
110 - """  
111 - try:  
112 - filename = sys.argv[1]  
113 - except:  
114 - filename = easygui.fileopenbox()  
115 - try:  
116 - ole = olefile.OleFileIO(filename)  
117 - listdir = ole.listdir()  
118 - streams = []  
119 - for direntry in listdir:  
120 - #print direntry  
121 - streams.append('/'.join(direntry))  
122 - streams.append(ABOUT)  
123 - streams.append(QUIT)  
124 - stream = True  
125 - while stream is not None:  
126 - msg ="Select a stream, or press Esc to exit"  
127 - title = "olebrowse"  
128 - stream = easygui.choicebox(msg, title, streams)  
129 - if stream is None or stream == QUIT:  
130 - break  
131 - if stream == ABOUT:  
132 - about()  
133 - else:  
134 - browse_stream(ole, stream)  
135 - except:  
136 - easygui.exceptionbox()  
137 -  
138 -  
139 -  
140 -  
141 -if __name__ == '__main__':  
142 - main() 1 +#!/usr/bin/env python
  2 +"""
  3 +olebrowse.py
  4 +
  5 +A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to
  6 +view and extract individual data streams.
  7 +
  8 +Usage: olebrowse.py [file]
  9 +
  10 +olebrowse project website: http://www.decalage.info/python/olebrowse
  11 +
  12 +olebrowse is part of the python-oletools package:
  13 +http://www.decalage.info/python/oletools
  14 +
  15 +olebrowse is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  16 +All rights reserved.
  17 +
  18 +Redistribution and use in source and binary forms, with or without modification,
  19 +are permitted provided that the following conditions are met:
  20 +
  21 + * Redistributions of source code must retain the above copyright notice, this
  22 + list of conditions and the following disclaimer.
  23 + * Redistributions in binary form must reproduce the above copyright notice,
  24 + this list of conditions and the following disclaimer in the documentation
  25 + and/or other materials provided with the distribution.
  26 +
  27 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  28 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  29 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  30 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  31 +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  32 +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  33 +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  34 +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  35 +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  36 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  37 +"""
  38 +
  39 +__version__ = '0.02'
  40 +
  41 +#------------------------------------------------------------------------------
  42 +# CHANGELOG:
  43 +# 2012-09-17 v0.01 PL: - first version
  44 +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
  45 +
  46 +#------------------------------------------------------------------------------
  47 +# TODO:
  48 +# - menu option to open another file
  49 +# - menu option to display properties
  50 +# - menu option to run other oletools, external tools such as OfficeCat?
  51 +# - for a stream, display info: size, path, etc
  52 +# - stream info: magic, entropy, ... ?
  53 +
  54 +import optparse, sys, os
  55 +from thirdparty.easygui import easygui
  56 +import thirdparty.olefile as olefile
  57 +import ezhexviewer
  58 +
  59 +ABOUT = '~ About olebrowse'
  60 +QUIT = '~ Quit'
  61 +
  62 +
  63 +def about ():
  64 + """
  65 + Display information about this tool
  66 + """
  67 + easygui.textbox(title='About olebrowse', text=__doc__)
  68 +
  69 +
  70 +def browse_stream (ole, stream):
  71 + """
  72 + Browse a stream (hex view or save to file)
  73 + """
  74 + #print 'stream:', stream
  75 + while True:
  76 + msg ='Select an action for the stream "%s", or press Esc to exit' % repr(stream)
  77 + actions = [
  78 + 'Hex view',
  79 +## 'Text view',
  80 +## 'Repr view',
  81 + 'Save stream to file',
  82 + '~ Back to main menu',
  83 + ]
  84 + action = easygui.choicebox(msg, title='olebrowse', choices=actions)
  85 + if action is None or 'Back' in action:
  86 + break
  87 + elif action.startswith('Hex'):
  88 + data = ole.openstream(stream).getvalue()
  89 + ezhexviewer.hexview_data(data, msg='Stream: %s' % stream, title='olebrowse')
  90 +## elif action.startswith('Text'):
  91 +## data = ole.openstream(stream).getvalue()
  92 +## easygui.codebox(title='Text view - %s' % stream, text=data)
  93 +## elif action.startswith('Repr'):
  94 +## data = ole.openstream(stream).getvalue()
  95 +## easygui.codebox(title='Repr view - %s' % stream, text=repr(data))
  96 + elif action.startswith('Save'):
  97 + data = ole.openstream(stream).getvalue()
  98 + fname = easygui.filesavebox(default='stream.bin')
  99 + if fname is not None:
  100 + f = open(fname, 'wb')
  101 + f.write(data)
  102 + f.close()
  103 + easygui.msgbox('stream saved to file %s' % fname)
  104 +
  105 +
  106 +
  107 +def main():
  108 + """
  109 + Main function
  110 + """
  111 + try:
  112 + filename = sys.argv[1]
  113 + except:
  114 + filename = easygui.fileopenbox()
  115 + try:
  116 + ole = olefile.OleFileIO(filename)
  117 + listdir = ole.listdir()
  118 + streams = []
  119 + for direntry in listdir:
  120 + #print direntry
  121 + streams.append('/'.join(direntry))
  122 + streams.append(ABOUT)
  123 + streams.append(QUIT)
  124 + stream = True
  125 + while stream is not None:
  126 + msg ="Select a stream, or press Esc to exit"
  127 + title = "olebrowse"
  128 + stream = easygui.choicebox(msg, title, streams)
  129 + if stream is None or stream == QUIT:
  130 + break
  131 + if stream == ABOUT:
  132 + about()
  133 + else:
  134 + browse_stream(ole, stream)
  135 + except:
  136 + easygui.exceptionbox()
  137 +
  138 +
  139 +
  140 +
  141 +if __name__ == '__main__':
  142 + main()
oletools/oleid.py
1 -#!/usr/bin/env python  
2 -"""  
3 -oleid.py  
4 -  
5 -oleid is a script to analyze OLE files such as MS Office documents (e.g. Word,  
6 -Excel), to detect specific characteristics that could potentially indicate that  
7 -the file is suspicious or malicious, in terms of security (e.g. malware).  
8 -For example it can detect VBA macros, embedded Flash objects, fragmentation.  
9 -The results can be displayed or returned as XML for further processing.  
10 -  
11 -Usage: oleid.py <file>  
12 -  
13 -oleid project website: http://www.decalage.info/python/oleid  
14 -  
15 -oleid is part of the python-oletools package:  
16 -http://www.decalage.info/python/oletools  
17 -"""  
18 -  
19 -#=== LICENSE =================================================================  
20 -  
21 -# oleid is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info)  
22 -# All rights reserved.  
23 -#  
24 -# Redistribution and use in source and binary forms, with or without modification,  
25 -# are permitted provided that the following conditions are met:  
26 -#  
27 -# * Redistributions of source code must retain the above copyright notice, this  
28 -# list of conditions and the following disclaimer.  
29 -# * Redistributions in binary form must reproduce the above copyright notice,  
30 -# this list of conditions and the following disclaimer in the documentation  
31 -# and/or other materials provided with the distribution.  
32 -#  
33 -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND  
34 -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
35 -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE  
36 -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE  
37 -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
38 -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  
39 -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  
40 -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,  
41 -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  
42 -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
43 -  
44 -  
45 -#------------------------------------------------------------------------------  
46 -# CHANGELOG:  
47 -# 2012-10-29 v0.01 PL: - first version  
48 -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL  
49 -# - improved usage display with -h  
50 -# 2014-11-30 v0.03 PL: - improved output with prettytable  
51 -  
52 -__version__ = '0.03'  
53 -  
54 -  
55 -#------------------------------------------------------------------------------  
56 -# TODO:  
57 -# + extract relevant metadata: codepage, author, application, timestamps, etc  
58 -# - detect RTF and OpenXML  
59 -# - fragmentation  
60 -# - OLE package  
61 -# - entropy  
62 -# - detect PE header?  
63 -# - detect NOPs?  
64 -# - list type of each object in object pool?  
65 -# - criticality for each indicator?: info, low, medium, high  
66 -# - support wildcards with glob?  
67 -# - verbose option  
68 -# - csv, xml output  
69 -  
70 -  
71 -#=== IMPORTS =================================================================  
72 -  
73 -import optparse, sys, os, re, zlib, struct  
74 -import thirdparty.olefile as olefile  
75 -from thirdparty.prettytable import prettytable  
76 -  
77 -  
78 -#=== FUNCTIONS ===============================================================  
79 -  
80 -def detect_flash (data):  
81 - """  
82 - Detect Flash objects (SWF files) within a binary string of data  
83 - return a list of (start_index, length, compressed) tuples, or [] if nothing  
84 - found.  
85 -  
86 - Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked)  
87 - http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html  
88 - """  
89 - #TODO: report  
90 - found = []  
91 - for match in re.finditer('CWS|FWS', data):  
92 - start = match.start()  
93 - if start+8 > len(data):  
94 - # header size larger than remaining data, this is not a SWF  
95 - continue  
96 - #TODO: one struct.unpack should be simpler  
97 - # Read Header  
98 - header = data[start:start+3]  
99 - # Read Version  
100 - ver = struct.unpack('<b', data[start+3])[0]  
101 - # Error check for version above 20  
102 - #TODO: is this accurate? (check SWF specifications)  
103 - if ver > 20:  
104 - continue  
105 - # Read SWF Size  
106 - size = struct.unpack('<i', data[start+4:start+8])[0]  
107 - if start+size > len(data) or size < 1024:  
108 - # declared size larger than remaining data, this is not a SWF  
109 - # or declared size too small for a usual SWF  
110 - continue  
111 - # Read SWF into buffer. If compressed read uncompressed size.  
112 - swf = data[start:start+size]  
113 - compressed = False  
114 - if 'CWS' in header:  
115 - compressed = True  
116 - # compressed SWF: data after header (8 bytes) until the end is  
117 - # compressed with zlib. Attempt to decompress it to check if it is  
118 - # valid  
119 - compressed_data = swf[8:]  
120 - try:  
121 - zlib.decompress(compressed_data)  
122 - except:  
123 - continue  
124 - # else we don't check anything at this stage, we only assume it is a  
125 - # valid SWF. So there might be false positives for uncompressed SWF.  
126 - found.append((start, size, compressed))  
127 - #print 'Found SWF start=%x, length=%d' % (start, size)  
128 - return found  
129 -  
130 -  
131 -#=== CLASSES =================================================================  
132 -  
133 -class Indicator (object):  
134 -  
135 - def __init__(self, _id, value=None, _type=bool, name=None, description=None):  
136 - self.id = _id  
137 - self.value = value  
138 - self.type = _type  
139 - self.name = name  
140 - if name == None:  
141 - self.name = _id  
142 - self.description = description  
143 -  
144 -  
145 -class OleID:  
146 -  
147 - def __init__(self, filename):  
148 - self.filename = filename  
149 - self.indicators = []  
150 -  
151 - def check(self):  
152 - # check if it is actually an OLE file:  
153 - oleformat = Indicator('ole_format', True, name='OLE format')  
154 - self.indicators.append(oleformat)  
155 - if not olefile.isOleFile(self.filename):  
156 - oleformat.value = False  
157 - return self.indicators  
158 - # parse file:  
159 - self.ole = olefile.OleFileIO(self.filename)  
160 - # checks:  
161 - self.check_properties()  
162 - self.check_encrypted()  
163 - self.check_word()  
164 - self.check_excel()  
165 - self.check_powerpoint()  
166 - self.check_visio()  
167 - self.check_ObjectPool()  
168 - self.check_flash()  
169 - self.ole.close()  
170 - return self.indicators  
171 -  
172 - def check_properties (self):  
173 - suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream')  
174 - self.indicators.append(suminfo)  
175 - appname = Indicator('appname', 'unknown', _type=str, name='Application name')  
176 - self.indicators.append(appname)  
177 - self.suminfo = {}  
178 - # check stream SummaryInformation  
179 - if self.ole.exists("\x05SummaryInformation"):  
180 - suminfo.value = True  
181 - self.suminfo = self.ole.getproperties("\x05SummaryInformation")  
182 - # check application name:  
183 - appname.value = self.suminfo.get(0x12, 'unknown')  
184 -  
185 - def check_encrypted (self):  
186 - # we keep the pointer to the indicator, can be modified by other checks:  
187 - self.encrypted = Indicator('encrypted', False, name='Encrypted')  
188 - self.indicators.append(self.encrypted)  
189 - # check if bit 1 of security field = 1:  
190 - # (this field may be missing for Powerpoint2000, for example)  
191 - if 0x13 in self.suminfo:  
192 - if self.suminfo[0x13] & 1:  
193 - self.encrypted.value = True  
194 -  
195 - def check_word (self):  
196 - word = Indicator('word', False, name='Word Document',  
197 - description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.')  
198 - self.indicators.append(word)  
199 - self.macros = Indicator('vba_macros', False, name='VBA Macros')  
200 - self.indicators.append(self.macros)  
201 - if self.ole.exists('WordDocument'):  
202 - word.value = True  
203 - # check for Word-specific encryption flag:  
204 - s = self.ole.openstream(["WordDocument"])  
205 - # pass header 10 bytes  
206 - s.read(10)  
207 - # read flag structure:  
208 - temp16 = struct.unpack("H", s.read(2))[0]  
209 - fEncrypted = (temp16 & 0x0100) >> 8  
210 - if fEncrypted:  
211 - self.encrypted.value = True  
212 - s.close()  
213 - # check for VBA macros:  
214 - if self.ole.exists('Macros'):  
215 - self.macros.value = True  
216 -  
217 - def check_excel (self):  
218 - excel = Indicator('excel', False, name='Excel Workbook',  
219 - description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.')  
220 - self.indicators.append(excel)  
221 - #self.macros = Indicator('vba_macros', False, name='VBA Macros')  
222 - #self.indicators.append(self.macros)  
223 - if self.ole.exists('Workbook') or self.ole.exists('Book'):  
224 - excel.value = True  
225 - # check for VBA macros:  
226 - if self.ole.exists('_VBA_PROJECT_CUR'):  
227 - self.macros.value = True  
228 -  
229 - def check_powerpoint (self):  
230 - ppt = Indicator('ppt', False, name='PowerPoint Presentation',  
231 - description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.')  
232 - self.indicators.append(ppt)  
233 - if self.ole.exists('PowerPoint Document'):  
234 - ppt.value = True  
235 -  
236 - def check_visio (self):  
237 - visio = Indicator('visio', False, name='Visio Drawing',  
238 - description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.')  
239 - self.indicators.append(visio)  
240 - if self.ole.exists('VisioDocument'):  
241 - visio.value = True  
242 -  
243 - def check_ObjectPool (self):  
244 - objpool = Indicator('ObjectPool', False, name='ObjectPool',  
245 - description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.')  
246 - self.indicators.append(objpool)  
247 - if self.ole.exists('ObjectPool'):  
248 - objpool.value = True  
249 -  
250 -  
251 - def check_flash (self):  
252 - flash = Indicator('flash', 0, _type=int, name='Flash objects',  
253 - description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.')  
254 - self.indicators.append(flash)  
255 - for stream in self.ole.listdir():  
256 - data = self.ole.openstream(stream).read()  
257 - found = detect_flash(data)  
258 - # just add to the count of Flash objects:  
259 - flash.value += len(found)  
260 - #print stream, found  
261 -  
262 -  
263 -#=== MAIN =================================================================  
264 -  
265 -def main():  
266 - usage = 'usage: %prog [options] <file>'  
267 - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)  
268 -## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')  
269 -  
270 - (options, args) = parser.parse_args()  
271 -  
272 - # Print help if no argurments are passed  
273 - if len(args) == 0:  
274 - parser.print_help()  
275 - return  
276 -  
277 - for filename in args:  
278 - print '\nFilename:', filename  
279 - oleid = OleID(filename)  
280 - indicators = oleid.check()  
281 -  
282 - #TODO: add description  
283 - #TODO: highlight suspicious indicators  
284 - t = prettytable.PrettyTable(['Indicator', 'Value'])  
285 - t.align = 'l'  
286 - t.max_width = 39  
287 - #t.border = False  
288 -  
289 - for indicator in indicators:  
290 - #print '%s: %s' % (indicator.name, indicator.value)  
291 - t.add_row((indicator.name, indicator.value))  
292 -  
293 - print t  
294 -  
295 -if __name__ == '__main__':  
296 - main() 1 +#!/usr/bin/env python
  2 +"""
  3 +oleid.py
  4 +
  5 +oleid is a script to analyze OLE files such as MS Office documents (e.g. Word,
  6 +Excel), to detect specific characteristics that could potentially indicate that
  7 +the file is suspicious or malicious, in terms of security (e.g. malware).
  8 +For example it can detect VBA macros, embedded Flash objects, fragmentation.
  9 +The results can be displayed or returned as XML for further processing.
  10 +
  11 +Usage: oleid.py <file>
  12 +
  13 +oleid project website: http://www.decalage.info/python/oleid
  14 +
  15 +oleid is part of the python-oletools package:
  16 +http://www.decalage.info/python/oletools
  17 +"""
  18 +
  19 +#=== LICENSE =================================================================
  20 +
  21 +# oleid is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  22 +# All rights reserved.
  23 +#
  24 +# Redistribution and use in source and binary forms, with or without modification,
  25 +# are permitted provided that the following conditions are met:
  26 +#
  27 +# * Redistributions of source code must retain the above copyright notice, this
  28 +# list of conditions and the following disclaimer.
  29 +# * Redistributions in binary form must reproduce the above copyright notice,
  30 +# this list of conditions and the following disclaimer in the documentation
  31 +# and/or other materials provided with the distribution.
  32 +#
  33 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  34 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  35 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  36 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  37 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  38 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  39 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  40 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  41 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  42 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43 +
  44 +
  45 +#------------------------------------------------------------------------------
  46 +# CHANGELOG:
  47 +# 2012-10-29 v0.01 PL: - first version
  48 +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
  49 +# - improved usage display with -h
  50 +# 2014-11-30 v0.03 PL: - improved output with prettytable
  51 +
  52 +__version__ = '0.03'
  53 +
  54 +
  55 +#------------------------------------------------------------------------------
  56 +# TODO:
  57 +# + extract relevant metadata: codepage, author, application, timestamps, etc
  58 +# - detect RTF and OpenXML
  59 +# - fragmentation
  60 +# - OLE package
  61 +# - entropy
  62 +# - detect PE header?
  63 +# - detect NOPs?
  64 +# - list type of each object in object pool?
  65 +# - criticality for each indicator?: info, low, medium, high
  66 +# - support wildcards with glob?
  67 +# - verbose option
  68 +# - csv, xml output
  69 +
  70 +
  71 +#=== IMPORTS =================================================================
  72 +
  73 +import optparse, sys, os, re, zlib, struct
  74 +import thirdparty.olefile as olefile
  75 +from thirdparty.prettytable import prettytable
  76 +
  77 +
  78 +#=== FUNCTIONS ===============================================================
  79 +
  80 +def detect_flash (data):
  81 + """
  82 + Detect Flash objects (SWF files) within a binary string of data
  83 + return a list of (start_index, length, compressed) tuples, or [] if nothing
  84 + found.
  85 +
  86 + Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked)
  87 + http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html
  88 + """
  89 + #TODO: report
  90 + found = []
  91 + for match in re.finditer('CWS|FWS', data):
  92 + start = match.start()
  93 + if start+8 > len(data):
  94 + # header size larger than remaining data, this is not a SWF
  95 + continue
  96 + #TODO: one struct.unpack should be simpler
  97 + # Read Header
  98 + header = data[start:start+3]
  99 + # Read Version
  100 + ver = struct.unpack('<b', data[start+3])[0]
  101 + # Error check for version above 20
  102 + #TODO: is this accurate? (check SWF specifications)
  103 + if ver > 20:
  104 + continue
  105 + # Read SWF Size
  106 + size = struct.unpack('<i', data[start+4:start+8])[0]
  107 + if start+size > len(data) or size < 1024:
  108 + # declared size larger than remaining data, this is not a SWF
  109 + # or declared size too small for a usual SWF
  110 + continue
  111 + # Read SWF into buffer. If compressed read uncompressed size.
  112 + swf = data[start:start+size]
  113 + compressed = False
  114 + if 'CWS' in header:
  115 + compressed = True
  116 + # compressed SWF: data after header (8 bytes) until the end is
  117 + # compressed with zlib. Attempt to decompress it to check if it is
  118 + # valid
  119 + compressed_data = swf[8:]
  120 + try:
  121 + zlib.decompress(compressed_data)
  122 + except:
  123 + continue
  124 + # else we don't check anything at this stage, we only assume it is a
  125 + # valid SWF. So there might be false positives for uncompressed SWF.
  126 + found.append((start, size, compressed))
  127 + #print 'Found SWF start=%x, length=%d' % (start, size)
  128 + return found
  129 +
  130 +
  131 +#=== CLASSES =================================================================
  132 +
  133 +class Indicator (object):
  134 +
  135 + def __init__(self, _id, value=None, _type=bool, name=None, description=None):
  136 + self.id = _id
  137 + self.value = value
  138 + self.type = _type
  139 + self.name = name
  140 + if name == None:
  141 + self.name = _id
  142 + self.description = description
  143 +
  144 +
  145 +class OleID:
  146 +
  147 + def __init__(self, filename):
  148 + self.filename = filename
  149 + self.indicators = []
  150 +
  151 + def check(self):
  152 + # check if it is actually an OLE file:
  153 + oleformat = Indicator('ole_format', True, name='OLE format')
  154 + self.indicators.append(oleformat)
  155 + if not olefile.isOleFile(self.filename):
  156 + oleformat.value = False
  157 + return self.indicators
  158 + # parse file:
  159 + self.ole = olefile.OleFileIO(self.filename)
  160 + # checks:
  161 + self.check_properties()
  162 + self.check_encrypted()
  163 + self.check_word()
  164 + self.check_excel()
  165 + self.check_powerpoint()
  166 + self.check_visio()
  167 + self.check_ObjectPool()
  168 + self.check_flash()
  169 + self.ole.close()
  170 + return self.indicators
  171 +
  172 + def check_properties (self):
  173 + suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream')
  174 + self.indicators.append(suminfo)
  175 + appname = Indicator('appname', 'unknown', _type=str, name='Application name')
  176 + self.indicators.append(appname)
  177 + self.suminfo = {}
  178 + # check stream SummaryInformation
  179 + if self.ole.exists("\x05SummaryInformation"):
  180 + suminfo.value = True
  181 + self.suminfo = self.ole.getproperties("\x05SummaryInformation")
  182 + # check application name:
  183 + appname.value = self.suminfo.get(0x12, 'unknown')
  184 +
  185 + def check_encrypted (self):
  186 + # we keep the pointer to the indicator, can be modified by other checks:
  187 + self.encrypted = Indicator('encrypted', False, name='Encrypted')
  188 + self.indicators.append(self.encrypted)
  189 + # check if bit 1 of security field = 1:
  190 + # (this field may be missing for Powerpoint2000, for example)
  191 + if 0x13 in self.suminfo:
  192 + if self.suminfo[0x13] & 1:
  193 + self.encrypted.value = True
  194 +
  195 + def check_word (self):
  196 + word = Indicator('word', False, name='Word Document',
  197 + description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.')
  198 + self.indicators.append(word)
  199 + self.macros = Indicator('vba_macros', False, name='VBA Macros')
  200 + self.indicators.append(self.macros)
  201 + if self.ole.exists('WordDocument'):
  202 + word.value = True
  203 + # check for Word-specific encryption flag:
  204 + s = self.ole.openstream(["WordDocument"])
  205 + # pass header 10 bytes
  206 + s.read(10)
  207 + # read flag structure:
  208 + temp16 = struct.unpack("H", s.read(2))[0]
  209 + fEncrypted = (temp16 & 0x0100) >> 8
  210 + if fEncrypted:
  211 + self.encrypted.value = True
  212 + s.close()
  213 + # check for VBA macros:
  214 + if self.ole.exists('Macros'):
  215 + self.macros.value = True
  216 +
  217 + def check_excel (self):
  218 + excel = Indicator('excel', False, name='Excel Workbook',
  219 + description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.')
  220 + self.indicators.append(excel)
  221 + #self.macros = Indicator('vba_macros', False, name='VBA Macros')
  222 + #self.indicators.append(self.macros)
  223 + if self.ole.exists('Workbook') or self.ole.exists('Book'):
  224 + excel.value = True
  225 + # check for VBA macros:
  226 + if self.ole.exists('_VBA_PROJECT_CUR'):
  227 + self.macros.value = True
  228 +
  229 + def check_powerpoint (self):
  230 + ppt = Indicator('ppt', False, name='PowerPoint Presentation',
  231 + description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.')
  232 + self.indicators.append(ppt)
  233 + if self.ole.exists('PowerPoint Document'):
  234 + ppt.value = True
  235 +
  236 + def check_visio (self):
  237 + visio = Indicator('visio', False, name='Visio Drawing',
  238 + description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.')
  239 + self.indicators.append(visio)
  240 + if self.ole.exists('VisioDocument'):
  241 + visio.value = True
  242 +
  243 + def check_ObjectPool (self):
  244 + objpool = Indicator('ObjectPool', False, name='ObjectPool',
  245 + description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.')
  246 + self.indicators.append(objpool)
  247 + if self.ole.exists('ObjectPool'):
  248 + objpool.value = True
  249 +
  250 +
  251 + def check_flash (self):
  252 + flash = Indicator('flash', 0, _type=int, name='Flash objects',
  253 + description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.')
  254 + self.indicators.append(flash)
  255 + for stream in self.ole.listdir():
  256 + data = self.ole.openstream(stream).read()
  257 + found = detect_flash(data)
  258 + # just add to the count of Flash objects:
  259 + flash.value += len(found)
  260 + #print stream, found
  261 +
  262 +
  263 +#=== MAIN =================================================================
  264 +
  265 +def main():
  266 + usage = 'usage: %prog [options] <file>'
  267 + parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)
  268 +## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')
  269 +
  270 + (options, args) = parser.parse_args()
  271 +
  272 + # Print help if no argurments are passed
  273 + if len(args) == 0:
  274 + parser.print_help()
  275 + return
  276 +
  277 + for filename in args:
  278 + print '\nFilename:', filename
  279 + oleid = OleID(filename)
  280 + indicators = oleid.check()
  281 +
  282 + #TODO: add description
  283 + #TODO: highlight suspicious indicators
  284 + t = prettytable.PrettyTable(['Indicator', 'Value'])
  285 + t.align = 'l'
  286 + t.max_width = 39
  287 + #t.border = False
  288 +
  289 + for indicator in indicators:
  290 + #print '%s: %s' % (indicator.name, indicator.value)
  291 + t.add_row((indicator.name, indicator.value))
  292 +
  293 + print t
  294 +
  295 +if __name__ == '__main__':
  296 + main()
oletools/olemeta.py
1 -#!/usr/bin/env python  
2 -"""  
3 -olemeta.py  
4 -  
5 -olemeta is a script to parse OLE files such as MS Office documents (e.g. Word,  
6 -Excel), to extract all standard properties present in the OLE file.  
7 -  
8 -Usage: olemeta.py <file>  
9 -  
10 -olemeta project website: http://www.decalage.info/python/olemeta  
11 -  
12 -olemeta is part of the python-oletools package:  
13 -http://www.decalage.info/python/oletools  
14 -"""  
15 -  
16 -#=== LICENSE =================================================================  
17 -  
18 -# olemeta is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info)  
19 -# All rights reserved.  
20 -#  
21 -# Redistribution and use in source and binary forms, with or without modification,  
22 -# are permitted provided that the following conditions are met:  
23 -#  
24 -# * Redistributions of source code must retain the above copyright notice, this  
25 -# list of conditions and the following disclaimer.  
26 -# * Redistributions in binary form must reproduce the above copyright notice,  
27 -# this list of conditions and the following disclaimer in the documentation  
28 -# and/or other materials provided with the distribution.  
29 -#  
30 -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND  
31 -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
32 -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE  
33 -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE  
34 -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
35 -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  
36 -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  
37 -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,  
38 -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  
39 -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
40 -  
41 -#------------------------------------------------------------------------------  
42 -# CHANGELOG:  
43 -# 2013-07-24 v0.01 PL: - first version  
44 -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL  
45 -# - improved usage display  
46 -  
47 -__version__ = '0.02'  
48 -  
49 -#------------------------------------------------------------------------------  
50 -# TODO:  
51 -# + optparse  
52 -# + nicer output: table with fixed columns, datetime, etc  
53 -# + CSV output  
54 -# + option to only show available properties (by default)  
55 -  
56 -#=== IMPORTS =================================================================  
57 -  
58 -import sys  
59 -import thirdparty.olefile as olefile  
60 -  
61 -  
62 -#=== MAIN =================================================================  
63 -  
64 -try:  
65 - ole = olefile.OleFileIO(sys.argv[1])  
66 -except IndexError:  
67 - sys.exit(__doc__)  
68 -  
69 -# parse and display metadata:  
70 -meta = ole.get_metadata()  
71 -meta.dump()  
72 -  
73 -ole.close() 1 +#!/usr/bin/env python
  2 +"""
  3 +olemeta.py
  4 +
  5 +olemeta is a script to parse OLE files such as MS Office documents (e.g. Word,
  6 +Excel), to extract all standard properties present in the OLE file.
  7 +
  8 +Usage: olemeta.py <file>
  9 +
  10 +olemeta project website: http://www.decalage.info/python/olemeta
  11 +
  12 +olemeta is part of the python-oletools package:
  13 +http://www.decalage.info/python/oletools
  14 +"""
  15 +
  16 +#=== LICENSE =================================================================
  17 +
  18 +# olemeta is copyright (c) 2013-2015, Philippe Lagadec (http://www.decalage.info)
  19 +# All rights reserved.
  20 +#
  21 +# Redistribution and use in source and binary forms, with or without modification,
  22 +# are permitted provided that the following conditions are met:
  23 +#
  24 +# * Redistributions of source code must retain the above copyright notice, this
  25 +# list of conditions and the following disclaimer.
  26 +# * Redistributions in binary form must reproduce the above copyright notice,
  27 +# this list of conditions and the following disclaimer in the documentation
  28 +# and/or other materials provided with the distribution.
  29 +#
  30 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  31 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  32 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  33 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  34 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  35 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  36 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  37 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  38 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40 +
  41 +#------------------------------------------------------------------------------
  42 +# CHANGELOG:
  43 +# 2013-07-24 v0.01 PL: - first version
  44 +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
  45 +# - improved usage display
  46 +
  47 +__version__ = '0.02'
  48 +
  49 +#------------------------------------------------------------------------------
  50 +# TODO:
  51 +# + optparse
  52 +# + nicer output: table with fixed columns, datetime, etc
  53 +# + CSV output
  54 +# + option to only show available properties (by default)
  55 +
  56 +#=== IMPORTS =================================================================
  57 +
  58 +import sys
  59 +import thirdparty.olefile as olefile
  60 +
  61 +
  62 +#=== MAIN =================================================================
  63 +
  64 +try:
  65 + ole = olefile.OleFileIO(sys.argv[1])
  66 +except IndexError:
  67 + sys.exit(__doc__)
  68 +
  69 +# parse and display metadata:
  70 +meta = ole.get_metadata()
  71 +meta.dump()
  72 +
  73 +ole.close()
oletools/oletimes.py
1 -#!/usr/bin/env python  
2 -"""  
3 -oletimes.py  
4 -  
5 -oletimes is a script to parse OLE files such as MS Office documents (e.g. Word,  
6 -Excel), to extract creation and modification times of all streams and storages  
7 -in the OLE file.  
8 -  
9 -Usage: oletimes.py <file>  
10 -  
11 -oletimes project website: http://www.decalage.info/python/oletimes  
12 -  
13 -oletimes is part of the python-oletools package:  
14 -http://www.decalage.info/python/oletools  
15 -"""  
16 -  
17 -#=== LICENSE =================================================================  
18 -  
19 -# oletimes is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info)  
20 -# All rights reserved.  
21 -#  
22 -# Redistribution and use in source and binary forms, with or without modification,  
23 -# are permitted provided that the following conditions are met:  
24 -#  
25 -# * Redistributions of source code must retain the above copyright notice, this  
26 -# list of conditions and the following disclaimer.  
27 -# * Redistributions in binary form must reproduce the above copyright notice,  
28 -# this list of conditions and the following disclaimer in the documentation  
29 -# and/or other materials provided with the distribution.  
30 -#  
31 -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND  
32 -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
33 -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE  
34 -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE  
35 -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
36 -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  
37 -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  
38 -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,  
39 -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  
40 -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
41 -  
42 -  
43 -#------------------------------------------------------------------------------  
44 -# CHANGELOG:  
45 -# 2013-07-24 v0.01 PL: - first version  
46 -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL  
47 -# - improved usage display  
48 -# 2014-11-30 v0.03 PL: - improved output with prettytable  
49 -  
50 -__version__ = '0.03'  
51 -  
52 -#------------------------------------------------------------------------------  
53 -# TODO:  
54 -# + optparse  
55 -# + nicer output: table with fixed columns, datetime, etc  
56 -# + CSV output  
57 -# + option to only show available timestamps (by default?)  
58 -  
59 -#=== IMPORTS =================================================================  
60 -  
61 -import sys, datetime  
62 -import thirdparty.olefile as olefile  
63 -from thirdparty.prettytable import prettytable  
64 -  
65 -  
66 -#=== MAIN =================================================================  
67 -  
68 -try:  
69 - ole = olefile.OleFileIO(sys.argv[1])  
70 -except IndexError:  
71 - sys.exit(__doc__)  
72 -  
73 -def dt2str (dt):  
74 - """  
75 - Convert a datetime object to a string for display, without microseconds  
76 -  
77 - :param dt: datetime.datetime object, or None  
78 - :return: str, or None  
79 - """  
80 - if dt is None:  
81 - return None  
82 - dt = dt.replace(microsecond = 0)  
83 - return str(dt)  
84 -  
85 -t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time'])  
86 -t.align = 'l'  
87 -t.max_width = 26  
88 -#t.border = False  
89 -  
90 -#print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime())  
91 -t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime())))  
92 -  
93 -for obj in ole.listdir(streams=True, storages=True):  
94 - #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))  
95 - t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj))))  
96 -  
97 -print t  
98 -  
99 -ole.close() 1 +#!/usr/bin/env python
  2 +"""
  3 +oletimes.py
  4 +
  5 +oletimes is a script to parse OLE files such as MS Office documents (e.g. Word,
  6 +Excel), to extract creation and modification times of all streams and storages
  7 +in the OLE file.
  8 +
  9 +Usage: oletimes.py <file>
  10 +
  11 +oletimes project website: http://www.decalage.info/python/oletimes
  12 +
  13 +oletimes is part of the python-oletools package:
  14 +http://www.decalage.info/python/oletools
  15 +"""
  16 +
  17 +#=== LICENSE =================================================================
  18 +
  19 +# oletimes is copyright (c) 2013-2015, Philippe Lagadec (http://www.decalage.info)
  20 +# All rights reserved.
  21 +#
  22 +# Redistribution and use in source and binary forms, with or without modification,
  23 +# are permitted provided that the following conditions are met:
  24 +#
  25 +# * Redistributions of source code must retain the above copyright notice, this
  26 +# list of conditions and the following disclaimer.
  27 +# * Redistributions in binary form must reproduce the above copyright notice,
  28 +# this list of conditions and the following disclaimer in the documentation
  29 +# and/or other materials provided with the distribution.
  30 +#
  31 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  32 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  33 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  34 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  35 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  36 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  37 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  39 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  40 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  41 +
  42 +
  43 +#------------------------------------------------------------------------------
  44 +# CHANGELOG:
  45 +# 2013-07-24 v0.01 PL: - first version
  46 +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
  47 +# - improved usage display
  48 +# 2014-11-30 v0.03 PL: - improved output with prettytable
  49 +
  50 +__version__ = '0.03'
  51 +
  52 +#------------------------------------------------------------------------------
  53 +# TODO:
  54 +# + optparse
  55 +# + nicer output: table with fixed columns, datetime, etc
  56 +# + CSV output
  57 +# + option to only show available timestamps (by default?)
  58 +
  59 +#=== IMPORTS =================================================================
  60 +
  61 +import sys, datetime
  62 +import thirdparty.olefile as olefile
  63 +from thirdparty.prettytable import prettytable
  64 +
  65 +
  66 +#=== MAIN =================================================================
  67 +
  68 +try:
  69 + ole = olefile.OleFileIO(sys.argv[1])
  70 +except IndexError:
  71 + sys.exit(__doc__)
  72 +
  73 +def dt2str (dt):
  74 + """
  75 + Convert a datetime object to a string for display, without microseconds
  76 +
  77 + :param dt: datetime.datetime object, or None
  78 + :return: str, or None
  79 + """
  80 + if dt is None:
  81 + return None
  82 + dt = dt.replace(microsecond = 0)
  83 + return str(dt)
  84 +
  85 +t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time'])
  86 +t.align = 'l'
  87 +t.max_width = 26
  88 +#t.border = False
  89 +
  90 +#print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime())
  91 +t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime())))
  92 +
  93 +for obj in ole.listdir(streams=True, storages=True):
  94 + #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))
  95 + t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj))))
  96 +
  97 +print t
  98 +
  99 +ole.close()
oletools/pyxswf.py
1 -#!/usr/bin/env python  
2 -"""  
3 -pyxswf.py  
4 -  
5 -pyxswf is a script to detect, extract and analyze Flash objects (SWF) that may  
6 -be embedded in files such as MS Office documents (e.g. Word, Excel),  
7 -which is especially useful for malware analysis.  
8 -  
9 -pyxswf is an extension to xxxswf.py published by Alexander Hanel on  
10 -http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html  
11 -Compared to xxxswf, it can extract streams from MS Office documents by parsing  
12 -their OLE structure properly (-o option), which is necessary when streams are  
13 -fragmented.  
14 -Stream fragmentation is a known obfuscation technique, as explained on  
15 -http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/  
16 -  
17 -It can also extract Flash objects from RTF documents, by parsing embedded  
18 -objects encoded in hexadecimal format (-f option).  
19 -  
20 -pyxswf project website: http://www.decalage.info/python/pyxswf  
21 -  
22 -pyxswf is part of the python-oletools package:  
23 -http://www.decalage.info/python/oletools  
24 -"""  
25 -  
26 -#=== LICENSE =================================================================  
27 -  
28 -# pyxswf is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info)  
29 -# All rights reserved.  
30 -#  
31 -# Redistribution and use in source and binary forms, with or without modification,  
32 -# are permitted provided that the following conditions are met:  
33 -#  
34 -# * Redistributions of source code must retain the above copyright notice, this  
35 -# list of conditions and the following disclaimer.  
36 -# * Redistributions in binary form must reproduce the above copyright notice,  
37 -# this list of conditions and the following disclaimer in the documentation  
38 -# and/or other materials provided with the distribution.  
39 -#  
40 -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND  
41 -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
42 -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE  
43 -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE  
44 -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
45 -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  
46 -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  
47 -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,  
48 -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  
49 -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
50 -  
51 -#------------------------------------------------------------------------------  
52 -# CHANGELOG:  
53 -# 2012-09-17 v0.01 PL: - first version  
54 -# 2012-11-09 v0.02 PL: - added RTF embedded objects extraction  
55 -# 2014-11-29 v0.03 PL: - use olefile instead of OleFileIO_PL  
56 -# - improved usage display with -h  
57 -  
58 -__version__ = '0.03'  
59 -  
60 -#------------------------------------------------------------------------------  
61 -# TODO:  
62 -# + add support for LZMA-compressed flash files (ZWS header)  
63 -# references: http://blog.malwaretracker.com/2014/01/cve-2013-5331-evaded-av-by-using.html  
64 -# http://code.metager.de/source/xref/adobe/flash/crossbridge/tools/swf-info.py  
65 -# http://room32.dyndns.org/forums/showthread.php?766-SWFCompression  
66 -# sample code: http://room32.dyndns.org/SWFCompression.py  
67 -# - check if file is OLE  
68 -# - support -r  
69 -  
70 -  
71 -#=== IMPORTS =================================================================  
72 -  
73 -import optparse, sys, os, rtfobj, StringIO  
74 -from thirdparty.xxxswf import xxxswf  
75 -import thirdparty.olefile as olefile  
76 -  
77 -  
78 -#=== MAIN =================================================================  
79 -  
80 -def main():  
81 - # Scenarios:  
82 - # Scan file for SWF(s)  
83 - # Scan file for SWF(s) and extract them  
84 - # Scan file for SWF(s) and scan them with Yara  
85 - # Scan file for SWF(s), extract them and scan with Yara  
86 - # Scan directory recursively for files that contain SWF(s)  
87 - # Scan directory recursively for files that contain SWF(s) and extract them  
88 -  
89 - usage = 'usage: %prog [options] <file.bad>'  
90 - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)  
91 - parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed')  
92 - parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed')  
93 - parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed')  
94 - parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed')  
95 - parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)')  
96 - parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes')  
97 - parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib')  
98 -  
99 - parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')  
100 - parser.add_option('-f', '--rtf', action='store_true', dest='rtf', help='Parse an RTF file to look for SWF in each embedded object')  
101 -  
102 -  
103 - (options, args) = parser.parse_args()  
104 -  
105 - # Print help if no arguments are passed  
106 - if len(args) == 0:  
107 - parser.print_help()  
108 - return  
109 -  
110 - # OLE MODE:  
111 - if options.ole:  
112 - for filename in args:  
113 - ole = olefile.OleFileIO(filename)  
114 - for direntry in ole.direntries:  
115 - if direntry is not None and direntry.entry_type == olefile.STGTY_STREAM:  
116 - f = ole._open(direntry.isectStart, direntry.size)  
117 - # check if data contains the SWF magic: FWS or CWS  
118 - data = f.getvalue()  
119 - if 'FWS' in data or 'CWS' in data:  
120 - print 'OLE stream: %s' % repr(direntry.name)  
121 - # call xxxswf to scan or extract Flash files:  
122 - xxxswf.disneyland(f, direntry.name, options)  
123 - f.close()  
124 - ole.close()  
125 -  
126 - # RTF MODE:  
127 - elif options.rtf:  
128 - for filename in args:  
129 - for index, data in rtfobj.rtf_iter_objects(filename):  
130 - if 'FWS' in data or 'CWS' in data:  
131 - print 'RTF embedded object size %d at index %08X' % (len(data), index)  
132 - f = StringIO.StringIO(data)  
133 - name = 'RTF_embedded_object_%08X' % index  
134 - # call xxxswf to scan or extract Flash files:  
135 - xxxswf.disneyland(f, name, options)  
136 -  
137 - else:  
138 - xxxswf.main()  
139 -  
140 -if __name__ == '__main__':  
141 - main() 1 +#!/usr/bin/env python
  2 +"""
  3 +pyxswf.py
  4 +
  5 +pyxswf is a script to detect, extract and analyze Flash objects (SWF) that may
  6 +be embedded in files such as MS Office documents (e.g. Word, Excel),
  7 +which is especially useful for malware analysis.
  8 +
  9 +pyxswf is an extension to xxxswf.py published by Alexander Hanel on
  10 +http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html
  11 +Compared to xxxswf, it can extract streams from MS Office documents by parsing
  12 +their OLE structure properly (-o option), which is necessary when streams are
  13 +fragmented.
  14 +Stream fragmentation is a known obfuscation technique, as explained on
  15 +http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/
  16 +
  17 +It can also extract Flash objects from RTF documents, by parsing embedded
  18 +objects encoded in hexadecimal format (-f option).
  19 +
  20 +pyxswf project website: http://www.decalage.info/python/pyxswf
  21 +
  22 +pyxswf is part of the python-oletools package:
  23 +http://www.decalage.info/python/oletools
  24 +"""
  25 +
  26 +#=== LICENSE =================================================================
  27 +
  28 +# pyxswf is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  29 +# All rights reserved.
  30 +#
  31 +# Redistribution and use in source and binary forms, with or without modification,
  32 +# are permitted provided that the following conditions are met:
  33 +#
  34 +# * Redistributions of source code must retain the above copyright notice, this
  35 +# list of conditions and the following disclaimer.
  36 +# * Redistributions in binary form must reproduce the above copyright notice,
  37 +# this list of conditions and the following disclaimer in the documentation
  38 +# and/or other materials provided with the distribution.
  39 +#
  40 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  41 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  42 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  43 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  44 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  46 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  47 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  48 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  49 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  50 +
  51 +#------------------------------------------------------------------------------
  52 +# CHANGELOG:
  53 +# 2012-09-17 v0.01 PL: - first version
  54 +# 2012-11-09 v0.02 PL: - added RTF embedded objects extraction
  55 +# 2014-11-29 v0.03 PL: - use olefile instead of OleFileIO_PL
  56 +# - improved usage display with -h
  57 +
  58 +__version__ = '0.03'
  59 +
  60 +#------------------------------------------------------------------------------
  61 +# TODO:
  62 +# + add support for LZMA-compressed flash files (ZWS header)
  63 +# references: http://blog.malwaretracker.com/2014/01/cve-2013-5331-evaded-av-by-using.html
  64 +# http://code.metager.de/source/xref/adobe/flash/crossbridge/tools/swf-info.py
  65 +# http://room32.dyndns.org/forums/showthread.php?766-SWFCompression
  66 +# sample code: http://room32.dyndns.org/SWFCompression.py
  67 +# - check if file is OLE
  68 +# - support -r
  69 +
  70 +
  71 +#=== IMPORTS =================================================================
  72 +
  73 +import optparse, sys, os, rtfobj, StringIO
  74 +from thirdparty.xxxswf import xxxswf
  75 +import thirdparty.olefile as olefile
  76 +
  77 +
  78 +#=== MAIN =================================================================
  79 +
  80 +def main():
  81 + # Scenarios:
  82 + # Scan file for SWF(s)
  83 + # Scan file for SWF(s) and extract them
  84 + # Scan file for SWF(s) and scan them with Yara
  85 + # Scan file for SWF(s), extract them and scan with Yara
  86 + # Scan directory recursively for files that contain SWF(s)
  87 + # Scan directory recursively for files that contain SWF(s) and extract them
  88 +
  89 + usage = 'usage: %prog [options] <file.bad>'
  90 + parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)
  91 + parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed')
  92 + parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed')
  93 + parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed')
  94 + parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed')
  95 + parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)')
  96 + parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes')
  97 + parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib')
  98 +
  99 + parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')
  100 + parser.add_option('-f', '--rtf', action='store_true', dest='rtf', help='Parse an RTF file to look for SWF in each embedded object')
  101 +
  102 +
  103 + (options, args) = parser.parse_args()
  104 +
  105 + # Print help if no arguments are passed
  106 + if len(args) == 0:
  107 + parser.print_help()
  108 + return
  109 +
  110 + # OLE MODE:
  111 + if options.ole:
  112 + for filename in args:
  113 + ole = olefile.OleFileIO(filename)
  114 + for direntry in ole.direntries:
  115 + if direntry is not None and direntry.entry_type == olefile.STGTY_STREAM:
  116 + f = ole._open(direntry.isectStart, direntry.size)
  117 + # check if data contains the SWF magic: FWS or CWS
  118 + data = f.getvalue()
  119 + if 'FWS' in data or 'CWS' in data:
  120 + print 'OLE stream: %s' % repr(direntry.name)
  121 + # call xxxswf to scan or extract Flash files:
  122 + xxxswf.disneyland(f, direntry.name, options)
  123 + f.close()
  124 + ole.close()
  125 +
  126 + # RTF MODE:
  127 + elif options.rtf:
  128 + for filename in args:
  129 + for index, data in rtfobj.rtf_iter_objects(filename):
  130 + if 'FWS' in data or 'CWS' in data:
  131 + print 'RTF embedded object size %d at index %08X' % (len(data), index)
  132 + f = StringIO.StringIO(data)
  133 + name = 'RTF_embedded_object_%08X' % index
  134 + # call xxxswf to scan or extract Flash files:
  135 + xxxswf.disneyland(f, name, options)
  136 +
  137 + else:
  138 + xxxswf.main()
  139 +
  140 +if __name__ == '__main__':
  141 + main()
oletools/rtfobj.py
1 -#!/usr/bin/env python  
2 -"""  
3 -rtfobj.py - Philippe Lagadec 2013-04-02  
4 -  
5 -rtfobj is a Python module to extract embedded objects from RTF files, such as  
6 -OLE ojects. It can be used as a Python library or a command-line tool.  
7 -  
8 -Usage: rtfobj.py <file.rtf>  
9 -  
10 -rtfobj project website: http://www.decalage.info/python/rtfobj  
11 -  
12 -rtfobj is part of the python-oletools package:  
13 -http://www.decalage.info/python/oletools  
14 -"""  
15 -  
16 -#=== LICENSE =================================================================  
17 -  
18 -# rtfobj is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info)  
19 -# All rights reserved.  
20 -#  
21 -# Redistribution and use in source and binary forms, with or without modification,  
22 -# are permitted provided that the following conditions are met:  
23 -#  
24 -# * Redistributions of source code must retain the above copyright notice, this  
25 -# list of conditions and the following disclaimer.  
26 -# * Redistributions in binary form must reproduce the above copyright notice,  
27 -# this list of conditions and the following disclaimer in the documentation  
28 -# and/or other materials provided with the distribution.  
29 -#  
30 -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND  
31 -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
32 -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE  
33 -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE  
34 -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
35 -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR  
36 -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  
37 -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,  
38 -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  
39 -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
40 -  
41 -  
42 -#------------------------------------------------------------------------------  
43 -# CHANGELOG:  
44 -# 2012-11-09 v0.01 PL: - first version  
45 -# 2013-04-02 v0.02 PL: - fixed bug in main  
46 -  
47 -__version__ = '0.02'  
48 -  
49 -#------------------------------------------------------------------------------  
50 -# TODO:  
51 -# - improve regex pattern for better performance?  
52 -# - allow semicolon within hex, as found in this sample:  
53 -# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html  
54 -  
55 -#=== IMPORTS =================================================================  
56 -  
57 -import re, sys, string, binascii  
58 -  
59 -  
60 -#=== CONSTANTS=================================================================  
61 -  
62 -# REGEX pattern to extract embedded OLE objects in hexadecimal format:  
63 -# alphanum digit: [0-9A-Fa-f]  
64 -# hex char = two alphanum digits: [0-9A-Fa-f]{2}  
65 -# several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,}  
66 -# at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s*  
67 -PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'  
68 -# improved pattern, allowing semicolons within hex:  
69 -#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'  
70 -  
71 -# a dummy translation table for str.translate, which does not change anythying:  
72 -TRANSTABLE_NOCHANGE = string.maketrans('', '')  
73 -  
74 -  
75 -#=== FUNCTIONS =================================================================  
76 -  
77 -def rtf_iter_objects (filename, min_size=32):  
78 - """  
79 - Open a RTF file, extract each embedded object encoded in hexadecimal of  
80 - size > min_size, yield the index of the object in the RTF file and its data  
81 - in binary format.  
82 - This is an iterator.  
83 - """  
84 - data = open(filename, 'rb').read()  
85 - for m in re.finditer(PATTERN, data):  
86 - found = m.group(0)  
87 - # remove all whitespace and line feeds:  
88 - #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE  
89 - found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')  
90 - found = binascii.unhexlify(found)  
91 - #print repr(found)  
92 - if len(found)>min_size:  
93 - yield m.start(), found  
94 -  
95 -  
96 -#=== MAIN =================================================================  
97 -  
98 -if __name__ == '__main__':  
99 - if len(sys.argv)<2:  
100 - sys.exit(__doc__)  
101 - for index, data in rtf_iter_objects(sys.argv[1]):  
102 - print 'found object size %d at index %08X' % (len(data), index)  
103 - fname = 'object_%08X.bin' % index  
104 - print 'saving to file %s' % fname  
105 - open(fname, 'wb').write(data) 1 +#!/usr/bin/env python
  2 +"""
  3 +rtfobj.py - Philippe Lagadec 2013-04-02
  4 +
  5 +rtfobj is a Python module to extract embedded objects from RTF files, such as
  6 +OLE ojects. It can be used as a Python library or a command-line tool.
  7 +
  8 +Usage: rtfobj.py <file.rtf>
  9 +
  10 +rtfobj project website: http://www.decalage.info/python/rtfobj
  11 +
  12 +rtfobj is part of the python-oletools package:
  13 +http://www.decalage.info/python/oletools
  14 +"""
  15 +
  16 +#=== LICENSE =================================================================
  17 +
  18 +# rtfobj is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  19 +# All rights reserved.
  20 +#
  21 +# Redistribution and use in source and binary forms, with or without modification,
  22 +# are permitted provided that the following conditions are met:
  23 +#
  24 +# * Redistributions of source code must retain the above copyright notice, this
  25 +# list of conditions and the following disclaimer.
  26 +# * Redistributions in binary form must reproduce the above copyright notice,
  27 +# this list of conditions and the following disclaimer in the documentation
  28 +# and/or other materials provided with the distribution.
  29 +#
  30 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  31 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  32 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  33 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  34 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  35 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  36 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  37 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  38 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40 +
  41 +
  42 +#------------------------------------------------------------------------------
  43 +# CHANGELOG:
  44 +# 2012-11-09 v0.01 PL: - first version
  45 +# 2013-04-02 v0.02 PL: - fixed bug in main
  46 +
  47 +__version__ = '0.02'
  48 +
  49 +#------------------------------------------------------------------------------
  50 +# TODO:
  51 +# - improve regex pattern for better performance?
  52 +# - allow semicolon within hex, as found in this sample:
  53 +# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html
  54 +
  55 +#=== IMPORTS =================================================================
  56 +
  57 +import re, sys, string, binascii
  58 +
  59 +
  60 +#=== CONSTANTS=================================================================
  61 +
  62 +# REGEX pattern to extract embedded OLE objects in hexadecimal format:
  63 +# alphanum digit: [0-9A-Fa-f]
  64 +# hex char = two alphanum digits: [0-9A-Fa-f]{2}
  65 +# several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,}
  66 +# at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s*
  67 +PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
  68 +# improved pattern, allowing semicolons within hex:
  69 +#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
  70 +
  71 +# a dummy translation table for str.translate, which does not change anythying:
  72 +TRANSTABLE_NOCHANGE = string.maketrans('', '')
  73 +
  74 +
  75 +#=== FUNCTIONS =================================================================
  76 +
  77 +def rtf_iter_objects (filename, min_size=32):
  78 + """
  79 + Open a RTF file, extract each embedded object encoded in hexadecimal of
  80 + size > min_size, yield the index of the object in the RTF file and its data
  81 + in binary format.
  82 + This is an iterator.
  83 + """
  84 + data = open(filename, 'rb').read()
  85 + for m in re.finditer(PATTERN, data):
  86 + found = m.group(0)
  87 + # remove all whitespace and line feeds:
  88 + #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
  89 + found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
  90 + found = binascii.unhexlify(found)
  91 + #print repr(found)
  92 + if len(found)>min_size:
  93 + yield m.start(), found
  94 +
  95 +
  96 +#=== MAIN =================================================================
  97 +
  98 +if __name__ == '__main__':
  99 + if len(sys.argv)<2:
  100 + sys.exit(__doc__)
  101 + for index, data in rtf_iter_objects(sys.argv[1]):
  102 + print 'found object size %d at index %08X' % (len(data), index)
  103 + fname = 'object_%08X.bin' % index
  104 + print 'saving to file %s' % fname
  105 + open(fname, 'wb').write(data)