Commit cda797574d2076115cc8547c9ccc74aa5664a991

Authored by Philippe Lagadec
1 parent a4ffb743

changed line endings from CRLF to LF in all scripts to improve Linux/Unix compatibility

oletools/ezhexviewer.py
1   -#!/usr/bin/env python
2   -"""
3   -ezhexviewer.py
4   -
5   -A simple hexadecimal viewer based on easygui. It should work on any platform
6   -with Python 2.x.
7   -
8   -Usage: ezhexviewer.py [file]
9   -
10   -Usage in a python application:
11   -
12   - import ezhexviewer
13   - ezhexviewer.hexview_file(filename)
14   - ezhexviewer.hexview_data(data)
15   -
16   -
17   -ezhexviewer project website: http://www.decalage.info/python/ezhexviewer
18   -
19   -ezhexviewer is copyright (c) 2012, Philippe Lagadec (http://www.decalage.info)
20   -All rights reserved.
21   -
22   -Redistribution and use in source and binary forms, with or without modification,
23   -are permitted provided that the following conditions are met:
24   -
25   - * Redistributions of source code must retain the above copyright notice, this
26   - list of conditions and the following disclaimer.
27   - * Redistributions in binary form must reproduce the above copyright notice,
28   - this list of conditions and the following disclaimer in the documentation
29   - and/or other materials provided with the distribution.
30   -
31   -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
32   -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
33   -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
34   -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
35   -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36   -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
37   -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
39   -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40   -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41   -"""
42   -
43   -__version__ = '0.01'
44   -
45   -#------------------------------------------------------------------------------
46   -# CHANGELOG:
47   -# 2012-09-17 v0.01 PL: - first version
48   -# 2012-10-04 v0.02 PL: - added license
49   -
50   -#------------------------------------------------------------------------------
51   -# TODO:
52   -# + options to set title and msg
53   -
54   -
55   -from thirdparty.easygui import easygui
56   -import sys
57   -
58   -#------------------------------------------------------------------------------
59   -# The following code (hexdump3 only) is a modified version of the hex dumper
60   -# recipe published on ASPN by Sebastien Keim and Raymond Hattinger under the
61   -# PSF license. I added the startindex parameter.
62   -# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812
63   -# PSF license: http://docs.python.org/license.html
64   -# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved
65   -
66   -FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
67   -
68   -def hexdump3(src, length=8, startindex=0):
69   - """
70   - Returns a hexadecimal dump of a binary string.
71   - length: number of bytes per row.
72   - startindex: index of 1st byte.
73   - """
74   - result=[]
75   - for i in xrange(0, len(src), length):
76   - s = src[i:i+length]
77   - hexa = ' '.join(["%02X"%ord(x) for x in s])
78   - printable = s.translate(FILTER)
79   - result.append("%08X %-*s %s\n" % (i+startindex, length*3, hexa, printable))
80   - return ''.join(result)
81   -
82   -# end of PSF-licensed code.
83   -#------------------------------------------------------------------------------
84   -
85   -
86   -def hexview_data (data, msg='', title='ezhexviewer', length=16, startindex=0):
87   - hex = hexdump3(data, length=length, startindex=startindex)
88   - easygui.codebox(msg=msg, title=title, text=hex)
89   -
90   -
91   -def hexview_file (filename, msg='', title='ezhexviewer', length=16, startindex=0):
92   - data = open(filename, 'rb').read()
93   - hexview_data(data, msg=msg, title=title, length=length, startindex=startindex)
94   -
95   -
96   -if __name__ == '__main__':
97   - try:
98   - filename = sys.argv[1]
99   - except:
100   - filename = easygui.fileopenbox()
101   - if filename:
102   - try:
103   - hexview_file(filename, msg='File: %s' % filename)
104   - except:
105   - easygui.exceptionbox(msg='Error:', title='ezhexviewer')
  1 +#!/usr/bin/env python
  2 +"""
  3 +ezhexviewer.py
  4 +
  5 +A simple hexadecimal viewer based on easygui. It should work on any platform
  6 +with Python 2.x.
  7 +
  8 +Usage: ezhexviewer.py [file]
  9 +
  10 +Usage in a python application:
  11 +
  12 + import ezhexviewer
  13 + ezhexviewer.hexview_file(filename)
  14 + ezhexviewer.hexview_data(data)
  15 +
  16 +
  17 +ezhexviewer project website: http://www.decalage.info/python/ezhexviewer
  18 +
  19 +ezhexviewer is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  20 +All rights reserved.
  21 +
  22 +Redistribution and use in source and binary forms, with or without modification,
  23 +are permitted provided that the following conditions are met:
  24 +
  25 + * Redistributions of source code must retain the above copyright notice, this
  26 + list of conditions and the following disclaimer.
  27 + * Redistributions in binary form must reproduce the above copyright notice,
  28 + this list of conditions and the following disclaimer in the documentation
  29 + and/or other materials provided with the distribution.
  30 +
  31 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  32 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  33 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  34 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  35 +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  36 +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  37 +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38 +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  39 +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  40 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  41 +"""
  42 +
  43 +__version__ = '0.02'
  44 +
  45 +#------------------------------------------------------------------------------
  46 +# CHANGELOG:
  47 +# 2012-09-17 v0.01 PL: - first version
  48 +# 2012-10-04 v0.02 PL: - added license
  49 +
  50 +#------------------------------------------------------------------------------
  51 +# TODO:
  52 +# + options to set title and msg
  53 +
  54 +
  55 +from thirdparty.easygui import easygui
  56 +import sys
  57 +
  58 +#------------------------------------------------------------------------------
  59 +# The following code (hexdump3 only) is a modified version of the hex dumper
  60 +# recipe published on ASPN by Sebastien Keim and Raymond Hattinger under the
  61 +# PSF license. I added the startindex parameter.
  62 +# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812
  63 +# PSF license: http://docs.python.org/license.html
  64 +# Copyright (c) 2001-2012 Python Software Foundation; All Rights Reserved
  65 +
  66 +FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
  67 +
  68 +def hexdump3(src, length=8, startindex=0):
  69 + """
  70 + Returns a hexadecimal dump of a binary string.
  71 + length: number of bytes per row.
  72 + startindex: index of 1st byte.
  73 + """
  74 + result=[]
  75 + for i in xrange(0, len(src), length):
  76 + s = src[i:i+length]
  77 + hexa = ' '.join(["%02X"%ord(x) for x in s])
  78 + printable = s.translate(FILTER)
  79 + result.append("%08X %-*s %s\n" % (i+startindex, length*3, hexa, printable))
  80 + return ''.join(result)
  81 +
  82 +# end of PSF-licensed code.
  83 +#------------------------------------------------------------------------------
  84 +
  85 +
  86 +def hexview_data (data, msg='', title='ezhexviewer', length=16, startindex=0):
  87 + hex = hexdump3(data, length=length, startindex=startindex)
  88 + easygui.codebox(msg=msg, title=title, text=hex)
  89 +
  90 +
  91 +def hexview_file (filename, msg='', title='ezhexviewer', length=16, startindex=0):
  92 + data = open(filename, 'rb').read()
  93 + hexview_data(data, msg=msg, title=title, length=length, startindex=startindex)
  94 +
  95 +
  96 +if __name__ == '__main__':
  97 + try:
  98 + filename = sys.argv[1]
  99 + except:
  100 + filename = easygui.fileopenbox()
  101 + if filename:
  102 + try:
  103 + hexview_file(filename, msg='File: %s' % filename)
  104 + except:
  105 + easygui.exceptionbox(msg='Error:', title='ezhexviewer')
... ...
oletools/olebrowse.py
1   -#!/usr/bin/env python
2   -"""
3   -olebrowse.py
4   -
5   -A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to
6   -view and extract individual data streams.
7   -
8   -Usage: olebrowse.py [file]
9   -
10   -olebrowse project website: http://www.decalage.info/python/olebrowse
11   -
12   -olebrowse is part of the python-oletools package:
13   -http://www.decalage.info/python/oletools
14   -
15   -olebrowse is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info)
16   -All rights reserved.
17   -
18   -Redistribution and use in source and binary forms, with or without modification,
19   -are permitted provided that the following conditions are met:
20   -
21   - * Redistributions of source code must retain the above copyright notice, this
22   - list of conditions and the following disclaimer.
23   - * Redistributions in binary form must reproduce the above copyright notice,
24   - this list of conditions and the following disclaimer in the documentation
25   - and/or other materials provided with the distribution.
26   -
27   -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
28   -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29   -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30   -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
31   -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32   -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33   -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
34   -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
35   -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36   -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37   -"""
38   -
39   -__version__ = '0.02'
40   -
41   -#------------------------------------------------------------------------------
42   -# CHANGELOG:
43   -# 2012-09-17 v0.01 PL: - first version
44   -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
45   -
46   -#------------------------------------------------------------------------------
47   -# TODO:
48   -# - menu option to open another file
49   -# - menu option to display properties
50   -# - menu option to run other oletools, external tools such as OfficeCat?
51   -# - for a stream, display info: size, path, etc
52   -# - stream info: magic, entropy, ... ?
53   -
54   -import optparse, sys, os
55   -from thirdparty.easygui import easygui
56   -import thirdparty.olefile as olefile
57   -import ezhexviewer
58   -
59   -ABOUT = '~ About olebrowse'
60   -QUIT = '~ Quit'
61   -
62   -
63   -def about ():
64   - """
65   - Display information about this tool
66   - """
67   - easygui.textbox(title='About olebrowse', text=__doc__)
68   -
69   -
70   -def browse_stream (ole, stream):
71   - """
72   - Browse a stream (hex view or save to file)
73   - """
74   - #print 'stream:', stream
75   - while True:
76   - msg ='Select an action for the stream "%s", or press Esc to exit' % repr(stream)
77   - actions = [
78   - 'Hex view',
79   -## 'Text view',
80   -## 'Repr view',
81   - 'Save stream to file',
82   - '~ Back to main menu',
83   - ]
84   - action = easygui.choicebox(msg, title='olebrowse', choices=actions)
85   - if action is None or 'Back' in action:
86   - break
87   - elif action.startswith('Hex'):
88   - data = ole.openstream(stream).getvalue()
89   - ezhexviewer.hexview_data(data, msg='Stream: %s' % stream, title='olebrowse')
90   -## elif action.startswith('Text'):
91   -## data = ole.openstream(stream).getvalue()
92   -## easygui.codebox(title='Text view - %s' % stream, text=data)
93   -## elif action.startswith('Repr'):
94   -## data = ole.openstream(stream).getvalue()
95   -## easygui.codebox(title='Repr view - %s' % stream, text=repr(data))
96   - elif action.startswith('Save'):
97   - data = ole.openstream(stream).getvalue()
98   - fname = easygui.filesavebox(default='stream.bin')
99   - if fname is not None:
100   - f = open(fname, 'wb')
101   - f.write(data)
102   - f.close()
103   - easygui.msgbox('stream saved to file %s' % fname)
104   -
105   -
106   -
107   -def main():
108   - """
109   - Main function
110   - """
111   - try:
112   - filename = sys.argv[1]
113   - except:
114   - filename = easygui.fileopenbox()
115   - try:
116   - ole = olefile.OleFileIO(filename)
117   - listdir = ole.listdir()
118   - streams = []
119   - for direntry in listdir:
120   - #print direntry
121   - streams.append('/'.join(direntry))
122   - streams.append(ABOUT)
123   - streams.append(QUIT)
124   - stream = True
125   - while stream is not None:
126   - msg ="Select a stream, or press Esc to exit"
127   - title = "olebrowse"
128   - stream = easygui.choicebox(msg, title, streams)
129   - if stream is None or stream == QUIT:
130   - break
131   - if stream == ABOUT:
132   - about()
133   - else:
134   - browse_stream(ole, stream)
135   - except:
136   - easygui.exceptionbox()
137   -
138   -
139   -
140   -
141   -if __name__ == '__main__':
142   - main()
  1 +#!/usr/bin/env python
  2 +"""
  3 +olebrowse.py
  4 +
  5 +A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to
  6 +view and extract individual data streams.
  7 +
  8 +Usage: olebrowse.py [file]
  9 +
  10 +olebrowse project website: http://www.decalage.info/python/olebrowse
  11 +
  12 +olebrowse is part of the python-oletools package:
  13 +http://www.decalage.info/python/oletools
  14 +
  15 +olebrowse is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  16 +All rights reserved.
  17 +
  18 +Redistribution and use in source and binary forms, with or without modification,
  19 +are permitted provided that the following conditions are met:
  20 +
  21 + * Redistributions of source code must retain the above copyright notice, this
  22 + list of conditions and the following disclaimer.
  23 + * Redistributions in binary form must reproduce the above copyright notice,
  24 + this list of conditions and the following disclaimer in the documentation
  25 + and/or other materials provided with the distribution.
  26 +
  27 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  28 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  29 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  30 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  31 +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  32 +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  33 +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  34 +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  35 +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  36 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  37 +"""
  38 +
  39 +__version__ = '0.02'
  40 +
  41 +#------------------------------------------------------------------------------
  42 +# CHANGELOG:
  43 +# 2012-09-17 v0.01 PL: - first version
  44 +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
  45 +
  46 +#------------------------------------------------------------------------------
  47 +# TODO:
  48 +# - menu option to open another file
  49 +# - menu option to display properties
  50 +# - menu option to run other oletools, external tools such as OfficeCat?
  51 +# - for a stream, display info: size, path, etc
  52 +# - stream info: magic, entropy, ... ?
  53 +
  54 +import optparse, sys, os
  55 +from thirdparty.easygui import easygui
  56 +import thirdparty.olefile as olefile
  57 +import ezhexviewer
  58 +
  59 +ABOUT = '~ About olebrowse'
  60 +QUIT = '~ Quit'
  61 +
  62 +
  63 +def about ():
  64 + """
  65 + Display information about this tool
  66 + """
  67 + easygui.textbox(title='About olebrowse', text=__doc__)
  68 +
  69 +
  70 +def browse_stream (ole, stream):
  71 + """
  72 + Browse a stream (hex view or save to file)
  73 + """
  74 + #print 'stream:', stream
  75 + while True:
  76 + msg ='Select an action for the stream "%s", or press Esc to exit' % repr(stream)
  77 + actions = [
  78 + 'Hex view',
  79 +## 'Text view',
  80 +## 'Repr view',
  81 + 'Save stream to file',
  82 + '~ Back to main menu',
  83 + ]
  84 + action = easygui.choicebox(msg, title='olebrowse', choices=actions)
  85 + if action is None or 'Back' in action:
  86 + break
  87 + elif action.startswith('Hex'):
  88 + data = ole.openstream(stream).getvalue()
  89 + ezhexviewer.hexview_data(data, msg='Stream: %s' % stream, title='olebrowse')
  90 +## elif action.startswith('Text'):
  91 +## data = ole.openstream(stream).getvalue()
  92 +## easygui.codebox(title='Text view - %s' % stream, text=data)
  93 +## elif action.startswith('Repr'):
  94 +## data = ole.openstream(stream).getvalue()
  95 +## easygui.codebox(title='Repr view - %s' % stream, text=repr(data))
  96 + elif action.startswith('Save'):
  97 + data = ole.openstream(stream).getvalue()
  98 + fname = easygui.filesavebox(default='stream.bin')
  99 + if fname is not None:
  100 + f = open(fname, 'wb')
  101 + f.write(data)
  102 + f.close()
  103 + easygui.msgbox('stream saved to file %s' % fname)
  104 +
  105 +
  106 +
  107 +def main():
  108 + """
  109 + Main function
  110 + """
  111 + try:
  112 + filename = sys.argv[1]
  113 + except:
  114 + filename = easygui.fileopenbox()
  115 + try:
  116 + ole = olefile.OleFileIO(filename)
  117 + listdir = ole.listdir()
  118 + streams = []
  119 + for direntry in listdir:
  120 + #print direntry
  121 + streams.append('/'.join(direntry))
  122 + streams.append(ABOUT)
  123 + streams.append(QUIT)
  124 + stream = True
  125 + while stream is not None:
  126 + msg ="Select a stream, or press Esc to exit"
  127 + title = "olebrowse"
  128 + stream = easygui.choicebox(msg, title, streams)
  129 + if stream is None or stream == QUIT:
  130 + break
  131 + if stream == ABOUT:
  132 + about()
  133 + else:
  134 + browse_stream(ole, stream)
  135 + except:
  136 + easygui.exceptionbox()
  137 +
  138 +
  139 +
  140 +
  141 +if __name__ == '__main__':
  142 + main()
... ...
oletools/oleid.py
1   -#!/usr/bin/env python
2   -"""
3   -oleid.py
4   -
5   -oleid is a script to analyze OLE files such as MS Office documents (e.g. Word,
6   -Excel), to detect specific characteristics that could potentially indicate that
7   -the file is suspicious or malicious, in terms of security (e.g. malware).
8   -For example it can detect VBA macros, embedded Flash objects, fragmentation.
9   -The results can be displayed or returned as XML for further processing.
10   -
11   -Usage: oleid.py <file>
12   -
13   -oleid project website: http://www.decalage.info/python/oleid
14   -
15   -oleid is part of the python-oletools package:
16   -http://www.decalage.info/python/oletools
17   -"""
18   -
19   -#=== LICENSE =================================================================
20   -
21   -# oleid is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info)
22   -# All rights reserved.
23   -#
24   -# Redistribution and use in source and binary forms, with or without modification,
25   -# are permitted provided that the following conditions are met:
26   -#
27   -# * Redistributions of source code must retain the above copyright notice, this
28   -# list of conditions and the following disclaimer.
29   -# * Redistributions in binary form must reproduce the above copyright notice,
30   -# this list of conditions and the following disclaimer in the documentation
31   -# and/or other materials provided with the distribution.
32   -#
33   -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34   -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35   -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36   -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
37   -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38   -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39   -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
40   -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
41   -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
42   -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   -
44   -
45   -#------------------------------------------------------------------------------
46   -# CHANGELOG:
47   -# 2012-10-29 v0.01 PL: - first version
48   -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
49   -# - improved usage display with -h
50   -# 2014-11-30 v0.03 PL: - improved output with prettytable
51   -
52   -__version__ = '0.03'
53   -
54   -
55   -#------------------------------------------------------------------------------
56   -# TODO:
57   -# + extract relevant metadata: codepage, author, application, timestamps, etc
58   -# - detect RTF and OpenXML
59   -# - fragmentation
60   -# - OLE package
61   -# - entropy
62   -# - detect PE header?
63   -# - detect NOPs?
64   -# - list type of each object in object pool?
65   -# - criticality for each indicator?: info, low, medium, high
66   -# - support wildcards with glob?
67   -# - verbose option
68   -# - csv, xml output
69   -
70   -
71   -#=== IMPORTS =================================================================
72   -
73   -import optparse, sys, os, re, zlib, struct
74   -import thirdparty.olefile as olefile
75   -from thirdparty.prettytable import prettytable
76   -
77   -
78   -#=== FUNCTIONS ===============================================================
79   -
80   -def detect_flash (data):
81   - """
82   - Detect Flash objects (SWF files) within a binary string of data
83   - return a list of (start_index, length, compressed) tuples, or [] if nothing
84   - found.
85   -
86   - Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked)
87   - http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html
88   - """
89   - #TODO: report
90   - found = []
91   - for match in re.finditer('CWS|FWS', data):
92   - start = match.start()
93   - if start+8 > len(data):
94   - # header size larger than remaining data, this is not a SWF
95   - continue
96   - #TODO: one struct.unpack should be simpler
97   - # Read Header
98   - header = data[start:start+3]
99   - # Read Version
100   - ver = struct.unpack('<b', data[start+3])[0]
101   - # Error check for version above 20
102   - #TODO: is this accurate? (check SWF specifications)
103   - if ver > 20:
104   - continue
105   - # Read SWF Size
106   - size = struct.unpack('<i', data[start+4:start+8])[0]
107   - if start+size > len(data) or size < 1024:
108   - # declared size larger than remaining data, this is not a SWF
109   - # or declared size too small for a usual SWF
110   - continue
111   - # Read SWF into buffer. If compressed read uncompressed size.
112   - swf = data[start:start+size]
113   - compressed = False
114   - if 'CWS' in header:
115   - compressed = True
116   - # compressed SWF: data after header (8 bytes) until the end is
117   - # compressed with zlib. Attempt to decompress it to check if it is
118   - # valid
119   - compressed_data = swf[8:]
120   - try:
121   - zlib.decompress(compressed_data)
122   - except:
123   - continue
124   - # else we don't check anything at this stage, we only assume it is a
125   - # valid SWF. So there might be false positives for uncompressed SWF.
126   - found.append((start, size, compressed))
127   - #print 'Found SWF start=%x, length=%d' % (start, size)
128   - return found
129   -
130   -
131   -#=== CLASSES =================================================================
132   -
133   -class Indicator (object):
134   -
135   - def __init__(self, _id, value=None, _type=bool, name=None, description=None):
136   - self.id = _id
137   - self.value = value
138   - self.type = _type
139   - self.name = name
140   - if name == None:
141   - self.name = _id
142   - self.description = description
143   -
144   -
145   -class OleID:
146   -
147   - def __init__(self, filename):
148   - self.filename = filename
149   - self.indicators = []
150   -
151   - def check(self):
152   - # check if it is actually an OLE file:
153   - oleformat = Indicator('ole_format', True, name='OLE format')
154   - self.indicators.append(oleformat)
155   - if not olefile.isOleFile(self.filename):
156   - oleformat.value = False
157   - return self.indicators
158   - # parse file:
159   - self.ole = olefile.OleFileIO(self.filename)
160   - # checks:
161   - self.check_properties()
162   - self.check_encrypted()
163   - self.check_word()
164   - self.check_excel()
165   - self.check_powerpoint()
166   - self.check_visio()
167   - self.check_ObjectPool()
168   - self.check_flash()
169   - self.ole.close()
170   - return self.indicators
171   -
172   - def check_properties (self):
173   - suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream')
174   - self.indicators.append(suminfo)
175   - appname = Indicator('appname', 'unknown', _type=str, name='Application name')
176   - self.indicators.append(appname)
177   - self.suminfo = {}
178   - # check stream SummaryInformation
179   - if self.ole.exists("\x05SummaryInformation"):
180   - suminfo.value = True
181   - self.suminfo = self.ole.getproperties("\x05SummaryInformation")
182   - # check application name:
183   - appname.value = self.suminfo.get(0x12, 'unknown')
184   -
185   - def check_encrypted (self):
186   - # we keep the pointer to the indicator, can be modified by other checks:
187   - self.encrypted = Indicator('encrypted', False, name='Encrypted')
188   - self.indicators.append(self.encrypted)
189   - # check if bit 1 of security field = 1:
190   - # (this field may be missing for Powerpoint2000, for example)
191   - if 0x13 in self.suminfo:
192   - if self.suminfo[0x13] & 1:
193   - self.encrypted.value = True
194   -
195   - def check_word (self):
196   - word = Indicator('word', False, name='Word Document',
197   - description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.')
198   - self.indicators.append(word)
199   - self.macros = Indicator('vba_macros', False, name='VBA Macros')
200   - self.indicators.append(self.macros)
201   - if self.ole.exists('WordDocument'):
202   - word.value = True
203   - # check for Word-specific encryption flag:
204   - s = self.ole.openstream(["WordDocument"])
205   - # pass header 10 bytes
206   - s.read(10)
207   - # read flag structure:
208   - temp16 = struct.unpack("H", s.read(2))[0]
209   - fEncrypted = (temp16 & 0x0100) >> 8
210   - if fEncrypted:
211   - self.encrypted.value = True
212   - s.close()
213   - # check for VBA macros:
214   - if self.ole.exists('Macros'):
215   - self.macros.value = True
216   -
217   - def check_excel (self):
218   - excel = Indicator('excel', False, name='Excel Workbook',
219   - description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.')
220   - self.indicators.append(excel)
221   - #self.macros = Indicator('vba_macros', False, name='VBA Macros')
222   - #self.indicators.append(self.macros)
223   - if self.ole.exists('Workbook') or self.ole.exists('Book'):
224   - excel.value = True
225   - # check for VBA macros:
226   - if self.ole.exists('_VBA_PROJECT_CUR'):
227   - self.macros.value = True
228   -
229   - def check_powerpoint (self):
230   - ppt = Indicator('ppt', False, name='PowerPoint Presentation',
231   - description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.')
232   - self.indicators.append(ppt)
233   - if self.ole.exists('PowerPoint Document'):
234   - ppt.value = True
235   -
236   - def check_visio (self):
237   - visio = Indicator('visio', False, name='Visio Drawing',
238   - description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.')
239   - self.indicators.append(visio)
240   - if self.ole.exists('VisioDocument'):
241   - visio.value = True
242   -
243   - def check_ObjectPool (self):
244   - objpool = Indicator('ObjectPool', False, name='ObjectPool',
245   - description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.')
246   - self.indicators.append(objpool)
247   - if self.ole.exists('ObjectPool'):
248   - objpool.value = True
249   -
250   -
251   - def check_flash (self):
252   - flash = Indicator('flash', 0, _type=int, name='Flash objects',
253   - description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.')
254   - self.indicators.append(flash)
255   - for stream in self.ole.listdir():
256   - data = self.ole.openstream(stream).read()
257   - found = detect_flash(data)
258   - # just add to the count of Flash objects:
259   - flash.value += len(found)
260   - #print stream, found
261   -
262   -
263   -#=== MAIN =================================================================
264   -
265   -def main():
266   - usage = 'usage: %prog [options] <file>'
267   - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)
268   -## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')
269   -
270   - (options, args) = parser.parse_args()
271   -
272   - # Print help if no argurments are passed
273   - if len(args) == 0:
274   - parser.print_help()
275   - return
276   -
277   - for filename in args:
278   - print '\nFilename:', filename
279   - oleid = OleID(filename)
280   - indicators = oleid.check()
281   -
282   - #TODO: add description
283   - #TODO: highlight suspicious indicators
284   - t = prettytable.PrettyTable(['Indicator', 'Value'])
285   - t.align = 'l'
286   - t.max_width = 39
287   - #t.border = False
288   -
289   - for indicator in indicators:
290   - #print '%s: %s' % (indicator.name, indicator.value)
291   - t.add_row((indicator.name, indicator.value))
292   -
293   - print t
294   -
295   -if __name__ == '__main__':
296   - main()
  1 +#!/usr/bin/env python
  2 +"""
  3 +oleid.py
  4 +
  5 +oleid is a script to analyze OLE files such as MS Office documents (e.g. Word,
  6 +Excel), to detect specific characteristics that could potentially indicate that
  7 +the file is suspicious or malicious, in terms of security (e.g. malware).
  8 +For example it can detect VBA macros, embedded Flash objects, fragmentation.
  9 +The results can be displayed or returned as XML for further processing.
  10 +
  11 +Usage: oleid.py <file>
  12 +
  13 +oleid project website: http://www.decalage.info/python/oleid
  14 +
  15 +oleid is part of the python-oletools package:
  16 +http://www.decalage.info/python/oletools
  17 +"""
  18 +
  19 +#=== LICENSE =================================================================
  20 +
  21 +# oleid is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  22 +# All rights reserved.
  23 +#
  24 +# Redistribution and use in source and binary forms, with or without modification,
  25 +# are permitted provided that the following conditions are met:
  26 +#
  27 +# * Redistributions of source code must retain the above copyright notice, this
  28 +# list of conditions and the following disclaimer.
  29 +# * Redistributions in binary form must reproduce the above copyright notice,
  30 +# this list of conditions and the following disclaimer in the documentation
  31 +# and/or other materials provided with the distribution.
  32 +#
  33 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  34 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  35 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  36 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  37 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  38 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  39 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  40 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  41 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  42 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43 +
  44 +
  45 +#------------------------------------------------------------------------------
  46 +# CHANGELOG:
  47 +# 2012-10-29 v0.01 PL: - first version
  48 +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
  49 +# - improved usage display with -h
  50 +# 2014-11-30 v0.03 PL: - improved output with prettytable
  51 +
  52 +__version__ = '0.03'
  53 +
  54 +
  55 +#------------------------------------------------------------------------------
  56 +# TODO:
  57 +# + extract relevant metadata: codepage, author, application, timestamps, etc
  58 +# - detect RTF and OpenXML
  59 +# - fragmentation
  60 +# - OLE package
  61 +# - entropy
  62 +# - detect PE header?
  63 +# - detect NOPs?
  64 +# - list type of each object in object pool?
  65 +# - criticality for each indicator?: info, low, medium, high
  66 +# - support wildcards with glob?
  67 +# - verbose option
  68 +# - csv, xml output
  69 +
  70 +
  71 +#=== IMPORTS =================================================================
  72 +
  73 +import optparse, sys, os, re, zlib, struct
  74 +import thirdparty.olefile as olefile
  75 +from thirdparty.prettytable import prettytable
  76 +
  77 +
  78 +#=== FUNCTIONS ===============================================================
  79 +
  80 +def detect_flash (data):
  81 + """
  82 + Detect Flash objects (SWF files) within a binary string of data
  83 + return a list of (start_index, length, compressed) tuples, or [] if nothing
  84 + found.
  85 +
  86 + Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked)
  87 + http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html
  88 + """
  89 + #TODO: report
  90 + found = []
  91 + for match in re.finditer('CWS|FWS', data):
  92 + start = match.start()
  93 + if start+8 > len(data):
  94 + # header size larger than remaining data, this is not a SWF
  95 + continue
  96 + #TODO: one struct.unpack should be simpler
  97 + # Read Header
  98 + header = data[start:start+3]
  99 + # Read Version
  100 + ver = struct.unpack('<b', data[start+3])[0]
  101 + # Error check for version above 20
  102 + #TODO: is this accurate? (check SWF specifications)
  103 + if ver > 20:
  104 + continue
  105 + # Read SWF Size
  106 + size = struct.unpack('<i', data[start+4:start+8])[0]
  107 + if start+size > len(data) or size < 1024:
  108 + # declared size larger than remaining data, this is not a SWF
  109 + # or declared size too small for a usual SWF
  110 + continue
  111 + # Read SWF into buffer. If compressed read uncompressed size.
  112 + swf = data[start:start+size]
  113 + compressed = False
  114 + if 'CWS' in header:
  115 + compressed = True
  116 + # compressed SWF: data after header (8 bytes) until the end is
  117 + # compressed with zlib. Attempt to decompress it to check if it is
  118 + # valid
  119 + compressed_data = swf[8:]
  120 + try:
  121 + zlib.decompress(compressed_data)
  122 + except:
  123 + continue
  124 + # else we don't check anything at this stage, we only assume it is a
  125 + # valid SWF. So there might be false positives for uncompressed SWF.
  126 + found.append((start, size, compressed))
  127 + #print 'Found SWF start=%x, length=%d' % (start, size)
  128 + return found
  129 +
  130 +
  131 +#=== CLASSES =================================================================
  132 +
  133 +class Indicator (object):
  134 +
  135 + def __init__(self, _id, value=None, _type=bool, name=None, description=None):
  136 + self.id = _id
  137 + self.value = value
  138 + self.type = _type
  139 + self.name = name
  140 + if name == None:
  141 + self.name = _id
  142 + self.description = description
  143 +
  144 +
  145 +class OleID:
  146 +
  147 + def __init__(self, filename):
  148 + self.filename = filename
  149 + self.indicators = []
  150 +
  151 + def check(self):
  152 + # check if it is actually an OLE file:
  153 + oleformat = Indicator('ole_format', True, name='OLE format')
  154 + self.indicators.append(oleformat)
  155 + if not olefile.isOleFile(self.filename):
  156 + oleformat.value = False
  157 + return self.indicators
  158 + # parse file:
  159 + self.ole = olefile.OleFileIO(self.filename)
  160 + # checks:
  161 + self.check_properties()
  162 + self.check_encrypted()
  163 + self.check_word()
  164 + self.check_excel()
  165 + self.check_powerpoint()
  166 + self.check_visio()
  167 + self.check_ObjectPool()
  168 + self.check_flash()
  169 + self.ole.close()
  170 + return self.indicators
  171 +
  172 + def check_properties (self):
  173 + suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream')
  174 + self.indicators.append(suminfo)
  175 + appname = Indicator('appname', 'unknown', _type=str, name='Application name')
  176 + self.indicators.append(appname)
  177 + self.suminfo = {}
  178 + # check stream SummaryInformation
  179 + if self.ole.exists("\x05SummaryInformation"):
  180 + suminfo.value = True
  181 + self.suminfo = self.ole.getproperties("\x05SummaryInformation")
  182 + # check application name:
  183 + appname.value = self.suminfo.get(0x12, 'unknown')
  184 +
  185 + def check_encrypted (self):
  186 + # we keep the pointer to the indicator, can be modified by other checks:
  187 + self.encrypted = Indicator('encrypted', False, name='Encrypted')
  188 + self.indicators.append(self.encrypted)
  189 + # check if bit 1 of security field = 1:
  190 + # (this field may be missing for Powerpoint2000, for example)
  191 + if 0x13 in self.suminfo:
  192 + if self.suminfo[0x13] & 1:
  193 + self.encrypted.value = True
  194 +
  195 + def check_word (self):
  196 + word = Indicator('word', False, name='Word Document',
  197 + description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.')
  198 + self.indicators.append(word)
  199 + self.macros = Indicator('vba_macros', False, name='VBA Macros')
  200 + self.indicators.append(self.macros)
  201 + if self.ole.exists('WordDocument'):
  202 + word.value = True
  203 + # check for Word-specific encryption flag:
  204 + s = self.ole.openstream(["WordDocument"])
  205 + # pass header 10 bytes
  206 + s.read(10)
  207 + # read flag structure:
  208 + temp16 = struct.unpack("H", s.read(2))[0]
  209 + fEncrypted = (temp16 & 0x0100) >> 8
  210 + if fEncrypted:
  211 + self.encrypted.value = True
  212 + s.close()
  213 + # check for VBA macros:
  214 + if self.ole.exists('Macros'):
  215 + self.macros.value = True
  216 +
  217 + def check_excel (self):
  218 + excel = Indicator('excel', False, name='Excel Workbook',
  219 + description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.')
  220 + self.indicators.append(excel)
  221 + #self.macros = Indicator('vba_macros', False, name='VBA Macros')
  222 + #self.indicators.append(self.macros)
  223 + if self.ole.exists('Workbook') or self.ole.exists('Book'):
  224 + excel.value = True
  225 + # check for VBA macros:
  226 + if self.ole.exists('_VBA_PROJECT_CUR'):
  227 + self.macros.value = True
  228 +
  229 + def check_powerpoint (self):
  230 + ppt = Indicator('ppt', False, name='PowerPoint Presentation',
  231 + description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.')
  232 + self.indicators.append(ppt)
  233 + if self.ole.exists('PowerPoint Document'):
  234 + ppt.value = True
  235 +
  236 + def check_visio (self):
  237 + visio = Indicator('visio', False, name='Visio Drawing',
  238 + description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.')
  239 + self.indicators.append(visio)
  240 + if self.ole.exists('VisioDocument'):
  241 + visio.value = True
  242 +
  243 + def check_ObjectPool (self):
  244 + objpool = Indicator('ObjectPool', False, name='ObjectPool',
  245 + description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.')
  246 + self.indicators.append(objpool)
  247 + if self.ole.exists('ObjectPool'):
  248 + objpool.value = True
  249 +
  250 +
  251 + def check_flash (self):
  252 + flash = Indicator('flash', 0, _type=int, name='Flash objects',
  253 + description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.')
  254 + self.indicators.append(flash)
  255 + for stream in self.ole.listdir():
  256 + data = self.ole.openstream(stream).read()
  257 + found = detect_flash(data)
  258 + # just add to the count of Flash objects:
  259 + flash.value += len(found)
  260 + #print stream, found
  261 +
  262 +
  263 +#=== MAIN =================================================================
  264 +
  265 +def main():
  266 + usage = 'usage: %prog [options] <file>'
  267 + parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)
  268 +## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')
  269 +
  270 + (options, args) = parser.parse_args()
  271 +
  272 + # Print help if no argurments are passed
  273 + if len(args) == 0:
  274 + parser.print_help()
  275 + return
  276 +
  277 + for filename in args:
  278 + print '\nFilename:', filename
  279 + oleid = OleID(filename)
  280 + indicators = oleid.check()
  281 +
  282 + #TODO: add description
  283 + #TODO: highlight suspicious indicators
  284 + t = prettytable.PrettyTable(['Indicator', 'Value'])
  285 + t.align = 'l'
  286 + t.max_width = 39
  287 + #t.border = False
  288 +
  289 + for indicator in indicators:
  290 + #print '%s: %s' % (indicator.name, indicator.value)
  291 + t.add_row((indicator.name, indicator.value))
  292 +
  293 + print t
  294 +
  295 +if __name__ == '__main__':
  296 + main()
... ...
oletools/olemeta.py
1   -#!/usr/bin/env python
2   -"""
3   -olemeta.py
4   -
5   -olemeta is a script to parse OLE files such as MS Office documents (e.g. Word,
6   -Excel), to extract all standard properties present in the OLE file.
7   -
8   -Usage: olemeta.py <file>
9   -
10   -olemeta project website: http://www.decalage.info/python/olemeta
11   -
12   -olemeta is part of the python-oletools package:
13   -http://www.decalage.info/python/oletools
14   -"""
15   -
16   -#=== LICENSE =================================================================
17   -
18   -# olemeta is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info)
19   -# All rights reserved.
20   -#
21   -# Redistribution and use in source and binary forms, with or without modification,
22   -# are permitted provided that the following conditions are met:
23   -#
24   -# * Redistributions of source code must retain the above copyright notice, this
25   -# list of conditions and the following disclaimer.
26   -# * Redistributions in binary form must reproduce the above copyright notice,
27   -# this list of conditions and the following disclaimer in the documentation
28   -# and/or other materials provided with the distribution.
29   -#
30   -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31   -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32   -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
33   -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
34   -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35   -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
36   -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
37   -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
38   -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39   -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40   -
41   -#------------------------------------------------------------------------------
42   -# CHANGELOG:
43   -# 2013-07-24 v0.01 PL: - first version
44   -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
45   -# - improved usage display
46   -
47   -__version__ = '0.02'
48   -
49   -#------------------------------------------------------------------------------
50   -# TODO:
51   -# + optparse
52   -# + nicer output: table with fixed columns, datetime, etc
53   -# + CSV output
54   -# + option to only show available properties (by default)
55   -
56   -#=== IMPORTS =================================================================
57   -
58   -import sys
59   -import thirdparty.olefile as olefile
60   -
61   -
62   -#=== MAIN =================================================================
63   -
64   -try:
65   - ole = olefile.OleFileIO(sys.argv[1])
66   -except IndexError:
67   - sys.exit(__doc__)
68   -
69   -# parse and display metadata:
70   -meta = ole.get_metadata()
71   -meta.dump()
72   -
73   -ole.close()
  1 +#!/usr/bin/env python
  2 +"""
  3 +olemeta.py
  4 +
  5 +olemeta is a script to parse OLE files such as MS Office documents (e.g. Word,
  6 +Excel), to extract all standard properties present in the OLE file.
  7 +
  8 +Usage: olemeta.py <file>
  9 +
  10 +olemeta project website: http://www.decalage.info/python/olemeta
  11 +
  12 +olemeta is part of the python-oletools package:
  13 +http://www.decalage.info/python/oletools
  14 +"""
  15 +
  16 +#=== LICENSE =================================================================
  17 +
  18 +# olemeta is copyright (c) 2013-2015, Philippe Lagadec (http://www.decalage.info)
  19 +# All rights reserved.
  20 +#
  21 +# Redistribution and use in source and binary forms, with or without modification,
  22 +# are permitted provided that the following conditions are met:
  23 +#
  24 +# * Redistributions of source code must retain the above copyright notice, this
  25 +# list of conditions and the following disclaimer.
  26 +# * Redistributions in binary form must reproduce the above copyright notice,
  27 +# this list of conditions and the following disclaimer in the documentation
  28 +# and/or other materials provided with the distribution.
  29 +#
  30 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  31 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  32 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  33 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  34 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  35 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  36 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  37 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  38 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40 +
  41 +#------------------------------------------------------------------------------
  42 +# CHANGELOG:
  43 +# 2013-07-24 v0.01 PL: - first version
  44 +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
  45 +# - improved usage display
  46 +
  47 +__version__ = '0.02'
  48 +
  49 +#------------------------------------------------------------------------------
  50 +# TODO:
  51 +# + optparse
  52 +# + nicer output: table with fixed columns, datetime, etc
  53 +# + CSV output
  54 +# + option to only show available properties (by default)
  55 +
  56 +#=== IMPORTS =================================================================
  57 +
  58 +import sys
  59 +import thirdparty.olefile as olefile
  60 +
  61 +
  62 +#=== MAIN =================================================================
  63 +
  64 +try:
  65 + ole = olefile.OleFileIO(sys.argv[1])
  66 +except IndexError:
  67 + sys.exit(__doc__)
  68 +
  69 +# parse and display metadata:
  70 +meta = ole.get_metadata()
  71 +meta.dump()
  72 +
  73 +ole.close()
... ...
oletools/oletimes.py
1   -#!/usr/bin/env python
2   -"""
3   -oletimes.py
4   -
5   -oletimes is a script to parse OLE files such as MS Office documents (e.g. Word,
6   -Excel), to extract creation and modification times of all streams and storages
7   -in the OLE file.
8   -
9   -Usage: oletimes.py <file>
10   -
11   -oletimes project website: http://www.decalage.info/python/oletimes
12   -
13   -oletimes is part of the python-oletools package:
14   -http://www.decalage.info/python/oletools
15   -"""
16   -
17   -#=== LICENSE =================================================================
18   -
19   -# oletimes is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info)
20   -# All rights reserved.
21   -#
22   -# Redistribution and use in source and binary forms, with or without modification,
23   -# are permitted provided that the following conditions are met:
24   -#
25   -# * Redistributions of source code must retain the above copyright notice, this
26   -# list of conditions and the following disclaimer.
27   -# * Redistributions in binary form must reproduce the above copyright notice,
28   -# this list of conditions and the following disclaimer in the documentation
29   -# and/or other materials provided with the distribution.
30   -#
31   -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
32   -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
33   -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
34   -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
35   -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36   -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
37   -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
39   -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40   -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41   -
42   -
43   -#------------------------------------------------------------------------------
44   -# CHANGELOG:
45   -# 2013-07-24 v0.01 PL: - first version
46   -# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
47   -# - improved usage display
48   -# 2014-11-30 v0.03 PL: - improved output with prettytable
49   -
50   -__version__ = '0.03'
51   -
52   -#------------------------------------------------------------------------------
53   -# TODO:
54   -# + optparse
55   -# + nicer output: table with fixed columns, datetime, etc
56   -# + CSV output
57   -# + option to only show available timestamps (by default?)
58   -
59   -#=== IMPORTS =================================================================
60   -
61   -import sys, datetime
62   -import thirdparty.olefile as olefile
63   -from thirdparty.prettytable import prettytable
64   -
65   -
66   -#=== MAIN =================================================================
67   -
68   -try:
69   - ole = olefile.OleFileIO(sys.argv[1])
70   -except IndexError:
71   - sys.exit(__doc__)
72   -
73   -def dt2str (dt):
74   - """
75   - Convert a datetime object to a string for display, without microseconds
76   -
77   - :param dt: datetime.datetime object, or None
78   - :return: str, or None
79   - """
80   - if dt is None:
81   - return None
82   - dt = dt.replace(microsecond = 0)
83   - return str(dt)
84   -
85   -t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time'])
86   -t.align = 'l'
87   -t.max_width = 26
88   -#t.border = False
89   -
90   -#print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime())
91   -t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime())))
92   -
93   -for obj in ole.listdir(streams=True, storages=True):
94   - #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))
95   - t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj))))
96   -
97   -print t
98   -
99   -ole.close()
  1 +#!/usr/bin/env python
  2 +"""
  3 +oletimes.py
  4 +
  5 +oletimes is a script to parse OLE files such as MS Office documents (e.g. Word,
  6 +Excel), to extract creation and modification times of all streams and storages
  7 +in the OLE file.
  8 +
  9 +Usage: oletimes.py <file>
  10 +
  11 +oletimes project website: http://www.decalage.info/python/oletimes
  12 +
  13 +oletimes is part of the python-oletools package:
  14 +http://www.decalage.info/python/oletools
  15 +"""
  16 +
  17 +#=== LICENSE =================================================================
  18 +
  19 +# oletimes is copyright (c) 2013-2015, Philippe Lagadec (http://www.decalage.info)
  20 +# All rights reserved.
  21 +#
  22 +# Redistribution and use in source and binary forms, with or without modification,
  23 +# are permitted provided that the following conditions are met:
  24 +#
  25 +# * Redistributions of source code must retain the above copyright notice, this
  26 +# list of conditions and the following disclaimer.
  27 +# * Redistributions in binary form must reproduce the above copyright notice,
  28 +# this list of conditions and the following disclaimer in the documentation
  29 +# and/or other materials provided with the distribution.
  30 +#
  31 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  32 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  33 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  34 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  35 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  36 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  37 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  39 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  40 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  41 +
  42 +
  43 +#------------------------------------------------------------------------------
  44 +# CHANGELOG:
  45 +# 2013-07-24 v0.01 PL: - first version
  46 +# 2014-11-29 v0.02 PL: - use olefile instead of OleFileIO_PL
  47 +# - improved usage display
  48 +# 2014-11-30 v0.03 PL: - improved output with prettytable
  49 +
  50 +__version__ = '0.03'
  51 +
  52 +#------------------------------------------------------------------------------
  53 +# TODO:
  54 +# + optparse
  55 +# + nicer output: table with fixed columns, datetime, etc
  56 +# + CSV output
  57 +# + option to only show available timestamps (by default?)
  58 +
  59 +#=== IMPORTS =================================================================
  60 +
  61 +import sys, datetime
  62 +import thirdparty.olefile as olefile
  63 +from thirdparty.prettytable import prettytable
  64 +
  65 +
  66 +#=== MAIN =================================================================
  67 +
  68 +try:
  69 + ole = olefile.OleFileIO(sys.argv[1])
  70 +except IndexError:
  71 + sys.exit(__doc__)
  72 +
  73 +def dt2str (dt):
  74 + """
  75 + Convert a datetime object to a string for display, without microseconds
  76 +
  77 + :param dt: datetime.datetime object, or None
  78 + :return: str, or None
  79 + """
  80 + if dt is None:
  81 + return None
  82 + dt = dt.replace(microsecond = 0)
  83 + return str(dt)
  84 +
  85 +t = prettytable.PrettyTable(['Stream/Storage name', 'Modification Time', 'Creation Time'])
  86 +t.align = 'l'
  87 +t.max_width = 26
  88 +#t.border = False
  89 +
  90 +#print'- Root mtime=%s ctime=%s' % (ole.root.getmtime(), ole.root.getctime())
  91 +t.add_row(('Root', dt2str(ole.root.getmtime()), dt2str(ole.root.getctime())))
  92 +
  93 +for obj in ole.listdir(streams=True, storages=True):
  94 + #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))
  95 + t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj))))
  96 +
  97 +print t
  98 +
  99 +ole.close()
... ...
oletools/pyxswf.py
1   -#!/usr/bin/env python
2   -"""
3   -pyxswf.py
4   -
5   -pyxswf is a script to detect, extract and analyze Flash objects (SWF) that may
6   -be embedded in files such as MS Office documents (e.g. Word, Excel),
7   -which is especially useful for malware analysis.
8   -
9   -pyxswf is an extension to xxxswf.py published by Alexander Hanel on
10   -http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html
11   -Compared to xxxswf, it can extract streams from MS Office documents by parsing
12   -their OLE structure properly (-o option), which is necessary when streams are
13   -fragmented.
14   -Stream fragmentation is a known obfuscation technique, as explained on
15   -http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/
16   -
17   -It can also extract Flash objects from RTF documents, by parsing embedded
18   -objects encoded in hexadecimal format (-f option).
19   -
20   -pyxswf project website: http://www.decalage.info/python/pyxswf
21   -
22   -pyxswf is part of the python-oletools package:
23   -http://www.decalage.info/python/oletools
24   -"""
25   -
26   -#=== LICENSE =================================================================
27   -
28   -# pyxswf is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info)
29   -# All rights reserved.
30   -#
31   -# Redistribution and use in source and binary forms, with or without modification,
32   -# are permitted provided that the following conditions are met:
33   -#
34   -# * Redistributions of source code must retain the above copyright notice, this
35   -# list of conditions and the following disclaimer.
36   -# * Redistributions in binary form must reproduce the above copyright notice,
37   -# this list of conditions and the following disclaimer in the documentation
38   -# and/or other materials provided with the distribution.
39   -#
40   -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
41   -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
42   -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43   -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
44   -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45   -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
46   -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
47   -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48   -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
49   -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50   -
51   -#------------------------------------------------------------------------------
52   -# CHANGELOG:
53   -# 2012-09-17 v0.01 PL: - first version
54   -# 2012-11-09 v0.02 PL: - added RTF embedded objects extraction
55   -# 2014-11-29 v0.03 PL: - use olefile instead of OleFileIO_PL
56   -# - improved usage display with -h
57   -
58   -__version__ = '0.03'
59   -
60   -#------------------------------------------------------------------------------
61   -# TODO:
62   -# + add support for LZMA-compressed flash files (ZWS header)
63   -# references: http://blog.malwaretracker.com/2014/01/cve-2013-5331-evaded-av-by-using.html
64   -# http://code.metager.de/source/xref/adobe/flash/crossbridge/tools/swf-info.py
65   -# http://room32.dyndns.org/forums/showthread.php?766-SWFCompression
66   -# sample code: http://room32.dyndns.org/SWFCompression.py
67   -# - check if file is OLE
68   -# - support -r
69   -
70   -
71   -#=== IMPORTS =================================================================
72   -
73   -import optparse, sys, os, rtfobj, StringIO
74   -from thirdparty.xxxswf import xxxswf
75   -import thirdparty.olefile as olefile
76   -
77   -
78   -#=== MAIN =================================================================
79   -
80   -def main():
81   - # Scenarios:
82   - # Scan file for SWF(s)
83   - # Scan file for SWF(s) and extract them
84   - # Scan file for SWF(s) and scan them with Yara
85   - # Scan file for SWF(s), extract them and scan with Yara
86   - # Scan directory recursively for files that contain SWF(s)
87   - # Scan directory recursively for files that contain SWF(s) and extract them
88   -
89   - usage = 'usage: %prog [options] <file.bad>'
90   - parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)
91   - parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed')
92   - parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed')
93   - parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed')
94   - parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed')
95   - parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)')
96   - parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes')
97   - parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib')
98   -
99   - parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')
100   - parser.add_option('-f', '--rtf', action='store_true', dest='rtf', help='Parse an RTF file to look for SWF in each embedded object')
101   -
102   -
103   - (options, args) = parser.parse_args()
104   -
105   - # Print help if no arguments are passed
106   - if len(args) == 0:
107   - parser.print_help()
108   - return
109   -
110   - # OLE MODE:
111   - if options.ole:
112   - for filename in args:
113   - ole = olefile.OleFileIO(filename)
114   - for direntry in ole.direntries:
115   - if direntry is not None and direntry.entry_type == olefile.STGTY_STREAM:
116   - f = ole._open(direntry.isectStart, direntry.size)
117   - # check if data contains the SWF magic: FWS or CWS
118   - data = f.getvalue()
119   - if 'FWS' in data or 'CWS' in data:
120   - print 'OLE stream: %s' % repr(direntry.name)
121   - # call xxxswf to scan or extract Flash files:
122   - xxxswf.disneyland(f, direntry.name, options)
123   - f.close()
124   - ole.close()
125   -
126   - # RTF MODE:
127   - elif options.rtf:
128   - for filename in args:
129   - for index, data in rtfobj.rtf_iter_objects(filename):
130   - if 'FWS' in data or 'CWS' in data:
131   - print 'RTF embedded object size %d at index %08X' % (len(data), index)
132   - f = StringIO.StringIO(data)
133   - name = 'RTF_embedded_object_%08X' % index
134   - # call xxxswf to scan or extract Flash files:
135   - xxxswf.disneyland(f, name, options)
136   -
137   - else:
138   - xxxswf.main()
139   -
140   -if __name__ == '__main__':
141   - main()
  1 +#!/usr/bin/env python
  2 +"""
  3 +pyxswf.py
  4 +
  5 +pyxswf is a script to detect, extract and analyze Flash objects (SWF) that may
  6 +be embedded in files such as MS Office documents (e.g. Word, Excel),
  7 +which is especially useful for malware analysis.
  8 +
  9 +pyxswf is an extension to xxxswf.py published by Alexander Hanel on
  10 +http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html
  11 +Compared to xxxswf, it can extract streams from MS Office documents by parsing
  12 +their OLE structure properly (-o option), which is necessary when streams are
  13 +fragmented.
  14 +Stream fragmentation is a known obfuscation technique, as explained on
  15 +http://www.breakingpointsystems.com/resources/blog/evasion-with-ole2-fragmentation/
  16 +
  17 +It can also extract Flash objects from RTF documents, by parsing embedded
  18 +objects encoded in hexadecimal format (-f option).
  19 +
  20 +pyxswf project website: http://www.decalage.info/python/pyxswf
  21 +
  22 +pyxswf is part of the python-oletools package:
  23 +http://www.decalage.info/python/oletools
  24 +"""
  25 +
  26 +#=== LICENSE =================================================================
  27 +
  28 +# pyxswf is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  29 +# All rights reserved.
  30 +#
  31 +# Redistribution and use in source and binary forms, with or without modification,
  32 +# are permitted provided that the following conditions are met:
  33 +#
  34 +# * Redistributions of source code must retain the above copyright notice, this
  35 +# list of conditions and the following disclaimer.
  36 +# * Redistributions in binary form must reproduce the above copyright notice,
  37 +# this list of conditions and the following disclaimer in the documentation
  38 +# and/or other materials provided with the distribution.
  39 +#
  40 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  41 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  42 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  43 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  44 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  46 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  47 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  48 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  49 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  50 +
  51 +#------------------------------------------------------------------------------
  52 +# CHANGELOG:
  53 +# 2012-09-17 v0.01 PL: - first version
  54 +# 2012-11-09 v0.02 PL: - added RTF embedded objects extraction
  55 +# 2014-11-29 v0.03 PL: - use olefile instead of OleFileIO_PL
  56 +# - improved usage display with -h
  57 +
  58 +__version__ = '0.03'
  59 +
  60 +#------------------------------------------------------------------------------
  61 +# TODO:
  62 +# + add support for LZMA-compressed flash files (ZWS header)
  63 +# references: http://blog.malwaretracker.com/2014/01/cve-2013-5331-evaded-av-by-using.html
  64 +# http://code.metager.de/source/xref/adobe/flash/crossbridge/tools/swf-info.py
  65 +# http://room32.dyndns.org/forums/showthread.php?766-SWFCompression
  66 +# sample code: http://room32.dyndns.org/SWFCompression.py
  67 +# - check if file is OLE
  68 +# - support -r
  69 +
  70 +
  71 +#=== IMPORTS =================================================================
  72 +
  73 +import optparse, sys, os, rtfobj, StringIO
  74 +from thirdparty.xxxswf import xxxswf
  75 +import thirdparty.olefile as olefile
  76 +
  77 +
  78 +#=== MAIN =================================================================
  79 +
  80 +def main():
  81 + # Scenarios:
  82 + # Scan file for SWF(s)
  83 + # Scan file for SWF(s) and extract them
  84 + # Scan file for SWF(s) and scan them with Yara
  85 + # Scan file for SWF(s), extract them and scan with Yara
  86 + # Scan directory recursively for files that contain SWF(s)
  87 + # Scan directory recursively for files that contain SWF(s) and extract them
  88 +
  89 + usage = 'usage: %prog [options] <file.bad>'
  90 + parser = optparse.OptionParser(usage=__doc__ + '\n' + usage)
  91 + parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed')
  92 + parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed')
  93 + parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed')
  94 + parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed')
  95 + parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)')
  96 + parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes')
  97 + parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib')
  98 +
  99 + parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream')
  100 + parser.add_option('-f', '--rtf', action='store_true', dest='rtf', help='Parse an RTF file to look for SWF in each embedded object')
  101 +
  102 +
  103 + (options, args) = parser.parse_args()
  104 +
  105 + # Print help if no arguments are passed
  106 + if len(args) == 0:
  107 + parser.print_help()
  108 + return
  109 +
  110 + # OLE MODE:
  111 + if options.ole:
  112 + for filename in args:
  113 + ole = olefile.OleFileIO(filename)
  114 + for direntry in ole.direntries:
  115 + if direntry is not None and direntry.entry_type == olefile.STGTY_STREAM:
  116 + f = ole._open(direntry.isectStart, direntry.size)
  117 + # check if data contains the SWF magic: FWS or CWS
  118 + data = f.getvalue()
  119 + if 'FWS' in data or 'CWS' in data:
  120 + print 'OLE stream: %s' % repr(direntry.name)
  121 + # call xxxswf to scan or extract Flash files:
  122 + xxxswf.disneyland(f, direntry.name, options)
  123 + f.close()
  124 + ole.close()
  125 +
  126 + # RTF MODE:
  127 + elif options.rtf:
  128 + for filename in args:
  129 + for index, data in rtfobj.rtf_iter_objects(filename):
  130 + if 'FWS' in data or 'CWS' in data:
  131 + print 'RTF embedded object size %d at index %08X' % (len(data), index)
  132 + f = StringIO.StringIO(data)
  133 + name = 'RTF_embedded_object_%08X' % index
  134 + # call xxxswf to scan or extract Flash files:
  135 + xxxswf.disneyland(f, name, options)
  136 +
  137 + else:
  138 + xxxswf.main()
  139 +
  140 +if __name__ == '__main__':
  141 + main()
... ...
oletools/rtfobj.py
1   -#!/usr/bin/env python
2   -"""
3   -rtfobj.py - Philippe Lagadec 2013-04-02
4   -
5   -rtfobj is a Python module to extract embedded objects from RTF files, such as
6   -OLE ojects. It can be used as a Python library or a command-line tool.
7   -
8   -Usage: rtfobj.py <file.rtf>
9   -
10   -rtfobj project website: http://www.decalage.info/python/rtfobj
11   -
12   -rtfobj is part of the python-oletools package:
13   -http://www.decalage.info/python/oletools
14   -"""
15   -
16   -#=== LICENSE =================================================================
17   -
18   -# rtfobj is copyright (c) 2012-2014, Philippe Lagadec (http://www.decalage.info)
19   -# All rights reserved.
20   -#
21   -# Redistribution and use in source and binary forms, with or without modification,
22   -# are permitted provided that the following conditions are met:
23   -#
24   -# * Redistributions of source code must retain the above copyright notice, this
25   -# list of conditions and the following disclaimer.
26   -# * Redistributions in binary form must reproduce the above copyright notice,
27   -# this list of conditions and the following disclaimer in the documentation
28   -# and/or other materials provided with the distribution.
29   -#
30   -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
31   -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32   -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
33   -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
34   -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35   -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
36   -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
37   -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
38   -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39   -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40   -
41   -
42   -#------------------------------------------------------------------------------
43   -# CHANGELOG:
44   -# 2012-11-09 v0.01 PL: - first version
45   -# 2013-04-02 v0.02 PL: - fixed bug in main
46   -
47   -__version__ = '0.02'
48   -
49   -#------------------------------------------------------------------------------
50   -# TODO:
51   -# - improve regex pattern for better performance?
52   -# - allow semicolon within hex, as found in this sample:
53   -# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html
54   -
55   -#=== IMPORTS =================================================================
56   -
57   -import re, sys, string, binascii
58   -
59   -
60   -#=== CONSTANTS=================================================================
61   -
62   -# REGEX pattern to extract embedded OLE objects in hexadecimal format:
63   -# alphanum digit: [0-9A-Fa-f]
64   -# hex char = two alphanum digits: [0-9A-Fa-f]{2}
65   -# several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,}
66   -# at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s*
67   -PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
68   -# improved pattern, allowing semicolons within hex:
69   -#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
70   -
71   -# a dummy translation table for str.translate, which does not change anythying:
72   -TRANSTABLE_NOCHANGE = string.maketrans('', '')
73   -
74   -
75   -#=== FUNCTIONS =================================================================
76   -
77   -def rtf_iter_objects (filename, min_size=32):
78   - """
79   - Open a RTF file, extract each embedded object encoded in hexadecimal of
80   - size > min_size, yield the index of the object in the RTF file and its data
81   - in binary format.
82   - This is an iterator.
83   - """
84   - data = open(filename, 'rb').read()
85   - for m in re.finditer(PATTERN, data):
86   - found = m.group(0)
87   - # remove all whitespace and line feeds:
88   - #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
89   - found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
90   - found = binascii.unhexlify(found)
91   - #print repr(found)
92   - if len(found)>min_size:
93   - yield m.start(), found
94   -
95   -
96   -#=== MAIN =================================================================
97   -
98   -if __name__ == '__main__':
99   - if len(sys.argv)<2:
100   - sys.exit(__doc__)
101   - for index, data in rtf_iter_objects(sys.argv[1]):
102   - print 'found object size %d at index %08X' % (len(data), index)
103   - fname = 'object_%08X.bin' % index
104   - print 'saving to file %s' % fname
105   - open(fname, 'wb').write(data)
  1 +#!/usr/bin/env python
  2 +"""
  3 +rtfobj.py - Philippe Lagadec 2013-04-02
  4 +
  5 +rtfobj is a Python module to extract embedded objects from RTF files, such as
  6 +OLE ojects. It can be used as a Python library or a command-line tool.
  7 +
  8 +Usage: rtfobj.py <file.rtf>
  9 +
  10 +rtfobj project website: http://www.decalage.info/python/rtfobj
  11 +
  12 +rtfobj is part of the python-oletools package:
  13 +http://www.decalage.info/python/oletools
  14 +"""
  15 +
  16 +#=== LICENSE =================================================================
  17 +
  18 +# rtfobj is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  19 +# All rights reserved.
  20 +#
  21 +# Redistribution and use in source and binary forms, with or without modification,
  22 +# are permitted provided that the following conditions are met:
  23 +#
  24 +# * Redistributions of source code must retain the above copyright notice, this
  25 +# list of conditions and the following disclaimer.
  26 +# * Redistributions in binary form must reproduce the above copyright notice,
  27 +# this list of conditions and the following disclaimer in the documentation
  28 +# and/or other materials provided with the distribution.
  29 +#
  30 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  31 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  32 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  33 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  34 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  35 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  36 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  37 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  38 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40 +
  41 +
  42 +#------------------------------------------------------------------------------
  43 +# CHANGELOG:
  44 +# 2012-11-09 v0.01 PL: - first version
  45 +# 2013-04-02 v0.02 PL: - fixed bug in main
  46 +
  47 +__version__ = '0.02'
  48 +
  49 +#------------------------------------------------------------------------------
  50 +# TODO:
  51 +# - improve regex pattern for better performance?
  52 +# - allow semicolon within hex, as found in this sample:
  53 +# http://contagiodump.blogspot.nl/2011/10/sep-28-cve-2010-3333-manuscript-with.html
  54 +
  55 +#=== IMPORTS =================================================================
  56 +
  57 +import re, sys, string, binascii
  58 +
  59 +
  60 +#=== CONSTANTS=================================================================
  61 +
  62 +# REGEX pattern to extract embedded OLE objects in hexadecimal format:
  63 +# alphanum digit: [0-9A-Fa-f]
  64 +# hex char = two alphanum digits: [0-9A-Fa-f]{2}
  65 +# several hex chars, at least 4: (?:[0-9A-Fa-f]{2}){4,}
  66 +# at least 4 hex chars, followed by whitespace or CR/LF: (?:[0-9A-Fa-f]{2}){4,}\s*
  67 +PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
  68 +# improved pattern, allowing semicolons within hex:
  69 +#PATTERN = r'(?:(?:[0-9A-Fa-f]{2})+\s*)*(?:[0-9A-Fa-f]{2}){4,}'
  70 +
  71 +# a dummy translation table for str.translate, which does not change anythying:
  72 +TRANSTABLE_NOCHANGE = string.maketrans('', '')
  73 +
  74 +
  75 +#=== FUNCTIONS =================================================================
  76 +
  77 +def rtf_iter_objects (filename, min_size=32):
  78 + """
  79 + Open a RTF file, extract each embedded object encoded in hexadecimal of
  80 + size > min_size, yield the index of the object in the RTF file and its data
  81 + in binary format.
  82 + This is an iterator.
  83 + """
  84 + data = open(filename, 'rb').read()
  85 + for m in re.finditer(PATTERN, data):
  86 + found = m.group(0)
  87 + # remove all whitespace and line feeds:
  88 + #NOTE: with Python 2.6+, we could use None instead of TRANSTABLE_NOCHANGE
  89 + found = found.translate(TRANSTABLE_NOCHANGE, ' \t\r\n\f\v')
  90 + found = binascii.unhexlify(found)
  91 + #print repr(found)
  92 + if len(found)>min_size:
  93 + yield m.start(), found
  94 +
  95 +
  96 +#=== MAIN =================================================================
  97 +
  98 +if __name__ == '__main__':
  99 + if len(sys.argv)<2:
  100 + sys.exit(__doc__)
  101 + for index, data in rtf_iter_objects(sys.argv[1]):
  102 + print 'found object size %d at index %08X' % (len(data), index)
  103 + fname = 'object_%08X.bin' % index
  104 + print 'saving to file %s' % fname
  105 + open(fname, 'wb').write(data)
... ...