Commit 03c0a9ec7ada8c71fc8b3daf1d17eef47bc08951
1 parent
439edb24
Added oleid
Showing
3 changed files
with
342 additions
and
6 deletions
README.md
| @@ -10,7 +10,8 @@ Tools in python-oletools: | @@ -10,7 +10,8 @@ Tools in python-oletools: | ||
| 10 | 10 | ||
| 11 | - **olebrowse**: A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to | 11 | - **olebrowse**: A simple GUI to browse OLE files (e.g. MS Word, Excel, Powerpoint documents), to |
| 12 | view and extract individual data streams. | 12 | view and extract individual data streams. |
| 13 | -- **pyxswf**: a script to detect, extract and analyze Flash objects (SWF) that may | 13 | +- **oleid**: a tool to analyze OLE files to detect specific characteristics that could potentially indicate that the file is suspicious or malicious. |
| 14 | +- **pyxswf**: a tool to detect, extract and analyze Flash objects (SWF) that may | ||
| 14 | be embedded in files such as MS Office documents (e.g. Word, Excel), | 15 | be embedded in files such as MS Office documents (e.g. Word, Excel), |
| 15 | which is especially useful for malware analysis. | 16 | which is especially useful for malware analysis. |
| 16 | - and a few others (coming soon) | 17 | - and a few others (coming soon) |
| @@ -18,7 +19,8 @@ Tools in python-oletools: | @@ -18,7 +19,8 @@ Tools in python-oletools: | ||
| 18 | News | 19 | News |
| 19 | ---- | 20 | ---- |
| 20 | 21 | ||
| 21 | -- 2012-10-09: Initial version of olebrowse and pyxswf | 22 | +- 2012-10-29 v0.02: Added oleid |
| 23 | +- 2012-10-09 v0.01: Initial version of olebrowse and pyxswf | ||
| 22 | - see changelog in source code for more info. | 24 | - see changelog in source code for more info. |
| 23 | 25 | ||
| 24 | Download: | 26 | Download: |
| @@ -39,6 +41,35 @@ If you provide a file it will be opened, else a dialog will allow you to browse | @@ -39,6 +41,35 @@ If you provide a file it will be opened, else a dialog will allow you to browse | ||
| 39 | 41 | ||
| 40 | For screenshots and other info, see [http://www.decalage.info/python/olebrowse](http://www.decalage.info/python/olebrowse) | 42 | For screenshots and other info, see [http://www.decalage.info/python/olebrowse](http://www.decalage.info/python/olebrowse) |
| 41 | 43 | ||
| 44 | +oleid: | ||
| 45 | +------ | ||
| 46 | + | ||
| 47 | +oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, | ||
| 48 | +Excel), to detect specific characteristics that could potentially indicate that | ||
| 49 | +the file is suspicious or malicious, in terms of security (e.g. malware). | ||
| 50 | +For example it can detect VBA macros, embedded Flash objects, fragmentation. | ||
| 51 | + | ||
| 52 | + Usage: oleid.py <file> | ||
| 53 | + | ||
| 54 | +Example - analyzing a Word document containing a Flash object and VBA macros: | ||
| 55 | + | ||
| 56 | + C:\oletools>oleid.py word_flash_vba.doc | ||
| 57 | + Filename: word_flash_vba.doc | ||
| 58 | + OLE format: True | ||
| 59 | + Has SummaryInformation stream: True | ||
| 60 | + Application name: Microsoft Office Word | ||
| 61 | + Encrypted: False | ||
| 62 | + Word Document: True | ||
| 63 | + VBA Macros: True | ||
| 64 | + Excel Workbook: False | ||
| 65 | + PowerPoint Presentation: False | ||
| 66 | + Visio Drawing: False | ||
| 67 | + ObjectPool: True | ||
| 68 | + Flash objects: 1 | ||
| 69 | + | ||
| 70 | +oleid project website: [http://www.decalage.info/python/oleid](http://www.decalage.info/python/oleid) | ||
| 71 | + | ||
| 72 | + | ||
| 42 | pyxswf: | 73 | pyxswf: |
| 43 | -------- | 74 | -------- |
| 44 | 75 |
oletools/README.txt
| @@ -20,15 +20,19 @@ Tools in python-oletools: | @@ -20,15 +20,19 @@ Tools in python-oletools: | ||
| 20 | 20 | ||
| 21 | - **olebrowse**: A simple GUI to browse OLE files (e.g. MS Word, Excel, | 21 | - **olebrowse**: A simple GUI to browse OLE files (e.g. MS Word, Excel, |
| 22 | Powerpoint documents), to view and extract individual data streams. | 22 | Powerpoint documents), to view and extract individual data streams. |
| 23 | -- **pyxswf**: a script to detect, extract and analyze Flash objects | ||
| 24 | - (SWF) that may be embedded in files such as MS Office documents (e.g. | ||
| 25 | - Word, Excel), which is especially useful for malware analysis. | 23 | +- **oleid**: a tool to analyze OLE files to detect specific |
| 24 | + characteristics that could potentially indicate that the file is | ||
| 25 | + suspicious or malicious. | ||
| 26 | +- **pyxswf**: a tool to detect, extract and analyze Flash objects (SWF) | ||
| 27 | + that may be embedded in files such as MS Office documents (e.g. Word, | ||
| 28 | + Excel), which is especially useful for malware analysis. | ||
| 26 | - and a few others (coming soon) | 29 | - and a few others (coming soon) |
| 27 | 30 | ||
| 28 | News | 31 | News |
| 29 | ---- | 32 | ---- |
| 30 | 33 | ||
| 31 | -- 2012-10-09: Initial version of olebrowse and pyxswf | 34 | +- 2012-10-29 v0.02: Added oleid |
| 35 | +- 2012-10-09 v0.01: Initial version of olebrowse and pyxswf | ||
| 32 | - see changelog in source code for more info. | 36 | - see changelog in source code for more info. |
| 33 | 37 | ||
| 34 | Download: | 38 | Download: |
| @@ -56,6 +60,41 @@ file for further analysis. | @@ -56,6 +60,41 @@ file for further analysis. | ||
| 56 | For screenshots and other info, see | 60 | For screenshots and other info, see |
| 57 | `http://www.decalage.info/python/olebrowse <http://www.decalage.info/python/olebrowse>`_ | 61 | `http://www.decalage.info/python/olebrowse <http://www.decalage.info/python/olebrowse>`_ |
| 58 | 62 | ||
| 63 | +oleid: | ||
| 64 | +------ | ||
| 65 | + | ||
| 66 | +oleid is a script to analyze OLE files such as MS Office documents (e.g. | ||
| 67 | +Word, Excel), to detect specific characteristics that could potentially | ||
| 68 | +indicate that the file is suspicious or malicious, in terms of security | ||
| 69 | +(e.g. malware). For example it can detect VBA macros, embedded Flash | ||
| 70 | +objects, fragmentation. | ||
| 71 | + | ||
| 72 | +:: | ||
| 73 | + | ||
| 74 | + Usage: oleid.py <file> | ||
| 75 | + | ||
| 76 | +Example - analyzing a Word document containing a Flash object and VBA | ||
| 77 | +macros: | ||
| 78 | + | ||
| 79 | +:: | ||
| 80 | + | ||
| 81 | + C:\oletools>oleid.py word_flash_vba.doc | ||
| 82 | + Filename: word_flash_vba.doc | ||
| 83 | + OLE format: True | ||
| 84 | + Has SummaryInformation stream: True | ||
| 85 | + Application name: Microsoft Office Word | ||
| 86 | + Encrypted: False | ||
| 87 | + Word Document: True | ||
| 88 | + VBA Macros: True | ||
| 89 | + Excel Workbook: False | ||
| 90 | + PowerPoint Presentation: False | ||
| 91 | + Visio Drawing: False | ||
| 92 | + ObjectPool: True | ||
| 93 | + Flash objects: 1 | ||
| 94 | + | ||
| 95 | +oleid project website: | ||
| 96 | +`http://www.decalage.info/python/oleid <http://www.decalage.info/python/oleid>`_ | ||
| 97 | + | ||
| 59 | pyxswf: | 98 | pyxswf: |
| 60 | ------- | 99 | ------- |
| 61 | 100 |
oletools/oleid.py
0 โ 100644
| 1 | +#!/usr/bin/env python | ||
| 2 | +""" | ||
| 3 | +oleid.py - Philippe Lagadec 2012-10-18 | ||
| 4 | + | ||
| 5 | +oleid is a script to analyze OLE files such as MS Office documents (e.g. Word, | ||
| 6 | +Excel), to detect specific characteristics that could potentially indicate that | ||
| 7 | +the file is suspicious or malicious, in terms of security (e.g. malware). | ||
| 8 | +For example it can detect VBA macros, embedded Flash objects, fragmentation. | ||
| 9 | +The results can be displayed or returned as XML for further processing. | ||
| 10 | + | ||
| 11 | +Usage: oleid.py <file> | ||
| 12 | + | ||
| 13 | +oleid project website: http://www.decalage.info/python/oleid | ||
| 14 | + | ||
| 15 | +oleid is part of the python-oletools package: | ||
| 16 | +http://www.decalage.info/python/oletools | ||
| 17 | + | ||
| 18 | +oleid is copyright (c) 2012, Philippe Lagadec (http://www.decalage.info) | ||
| 19 | +All rights reserved. | ||
| 20 | + | ||
| 21 | +Redistribution and use in source and binary forms, with or without modification, | ||
| 22 | +are permitted provided that the following conditions are met: | ||
| 23 | + | ||
| 24 | + * Redistributions of source code must retain the above copyright notice, this | ||
| 25 | + list of conditions and the following disclaimer. | ||
| 26 | + * Redistributions in binary form must reproduce the above copyright notice, | ||
| 27 | + this list of conditions and the following disclaimer in the documentation | ||
| 28 | + and/or other materials provided with the distribution. | ||
| 29 | + | ||
| 30 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
| 31 | +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 32 | +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 33 | +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
| 34 | +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 35 | +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 36 | +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
| 37 | +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| 38 | +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 39 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 40 | +""" | ||
| 41 | + | ||
| 42 | +__version__ = '0.01' | ||
| 43 | + | ||
| 44 | +#------------------------------------------------------------------------------ | ||
| 45 | +# CHANGELOG: | ||
| 46 | +# 2012-10-29 v0.01 PL: - first version | ||
| 47 | + | ||
| 48 | +#------------------------------------------------------------------------------ | ||
| 49 | +# TODO: | ||
| 50 | +# - detect RTF and OpenXML | ||
| 51 | +# - fragmentation | ||
| 52 | +# - OLE package | ||
| 53 | +# - entropy | ||
| 54 | +# - detect PE header? | ||
| 55 | +# - detect NOPs? | ||
| 56 | +# - list type of each object in object pool? | ||
| 57 | +# - criticality for each indicator?: info, low, medium, high | ||
| 58 | +# - support wildcards with glob? | ||
| 59 | +# - verbose option | ||
| 60 | +# - csv, xml output | ||
| 61 | + | ||
| 62 | +import optparse, sys, os, re, zlib, struct | ||
| 63 | +from thirdparty.OleFileIO_PL import OleFileIO_PL | ||
| 64 | + | ||
| 65 | + | ||
| 66 | +class Indicator (object): | ||
| 67 | + | ||
| 68 | + def __init__(self, _id, value=None, _type=bool, name=None, description=None): | ||
| 69 | + self.id = _id | ||
| 70 | + self.value = value | ||
| 71 | + self.type = _type | ||
| 72 | + self.name = name | ||
| 73 | + if name == None: | ||
| 74 | + self.name = _id | ||
| 75 | + self.description = description | ||
| 76 | + | ||
| 77 | + | ||
| 78 | +def detect_flash (data): | ||
| 79 | + """ | ||
| 80 | + Detect Flash objects (SWF files) within a binary string of data | ||
| 81 | + return a list of (start_index, length, compressed) tuples, or [] if nothing | ||
| 82 | + found. | ||
| 83 | + | ||
| 84 | + Code inspired from xxxswf.py by Alexander Hanel (but significantly reworked) | ||
| 85 | + http://hooked-on-mnemonics.blogspot.nl/2011/12/xxxswfpy.html | ||
| 86 | + """ | ||
| 87 | + #TODO: report | ||
| 88 | + found = [] | ||
| 89 | + for match in re.finditer('CWS|FWS', data): | ||
| 90 | + start = match.start() | ||
| 91 | + if start+8 > len(data): | ||
| 92 | + # header size larger than remaining data, this is not a SWF | ||
| 93 | + continue | ||
| 94 | + #TODO: one struct.unpack should be simpler | ||
| 95 | + # Read Header | ||
| 96 | + header = data[start:start+3] | ||
| 97 | + # Read Version | ||
| 98 | + ver = struct.unpack('<b', data[start+3])[0] | ||
| 99 | + # Error check for version above 20 | ||
| 100 | + #TODO: is this accurate? (check SWF specifications) | ||
| 101 | + if ver > 20: | ||
| 102 | + continue | ||
| 103 | + # Read SWF Size | ||
| 104 | + size = struct.unpack('<i', data[start+4:start+8])[0] | ||
| 105 | + if start+size > len(data) or size < 1024: | ||
| 106 | + # declared size larger than remaining data, this is not a SWF | ||
| 107 | + # or declared size too small for a usual SWF | ||
| 108 | + continue | ||
| 109 | + # Read SWF into buffer. If compressed read uncompressed size. | ||
| 110 | + swf = data[start:start+size] | ||
| 111 | + compressed = False | ||
| 112 | + if 'CWS' in header: | ||
| 113 | + compressed = True | ||
| 114 | + # compressed SWF: data after header (8 bytes) until the end is | ||
| 115 | + # compressed with zlib. Attempt to decompress it to check if it is | ||
| 116 | + # valid | ||
| 117 | + compressed_data = swf[8:] | ||
| 118 | + try: | ||
| 119 | + zlib.decompress(compressed_data) | ||
| 120 | + except: | ||
| 121 | + continue | ||
| 122 | + # else we don't check anything at this stage, we only assume it is a | ||
| 123 | + # valid SWF. So there might be false positives for uncompressed SWF. | ||
| 124 | + found.append((start, size, compressed)) | ||
| 125 | + #print 'Found SWF start=%x, length=%d' % (start, size) | ||
| 126 | + return found | ||
| 127 | + | ||
| 128 | + | ||
| 129 | +class OleID: | ||
| 130 | + | ||
| 131 | + def __init__(self, filename): | ||
| 132 | + self.filename = filename | ||
| 133 | + self.indicators = [] | ||
| 134 | + | ||
| 135 | + def check(self): | ||
| 136 | + # check if it is actually an OLE file: | ||
| 137 | + oleformat = Indicator('ole_format', True, name='OLE format') | ||
| 138 | + self.indicators.append(oleformat) | ||
| 139 | + if not OleFileIO_PL.isOleFile(self.filename): | ||
| 140 | + oleformat.value = False | ||
| 141 | + return self.indicators | ||
| 142 | + # parse file: | ||
| 143 | + self.ole = OleFileIO_PL.OleFileIO(self.filename) | ||
| 144 | + # checks: | ||
| 145 | + self.check_properties() | ||
| 146 | + self.check_encrypted() | ||
| 147 | + self.check_word() | ||
| 148 | + self.check_excel() | ||
| 149 | + self.check_powerpoint() | ||
| 150 | + self.check_visio() | ||
| 151 | + self.check_ObjectPool() | ||
| 152 | + self.check_flash() | ||
| 153 | + self.ole.close() | ||
| 154 | + return self.indicators | ||
| 155 | + | ||
| 156 | + def check_properties (self): | ||
| 157 | + suminfo = Indicator('has_suminfo', False, name='Has SummaryInformation stream') | ||
| 158 | + self.indicators.append(suminfo) | ||
| 159 | + appname = Indicator('appname', 'unknown', _type=str, name='Application name') | ||
| 160 | + self.indicators.append(appname) | ||
| 161 | + self.suminfo = {} | ||
| 162 | + # check stream SummaryInformation | ||
| 163 | + if self.ole.exists("\x05SummaryInformation"): | ||
| 164 | + suminfo.value = True | ||
| 165 | + self.suminfo = self.ole.getproperties("\x05SummaryInformation") | ||
| 166 | + # check application name: | ||
| 167 | + appname.value = self.suminfo.get(0x12, 'unknown') | ||
| 168 | + | ||
| 169 | + def check_encrypted (self): | ||
| 170 | + # we keep the pointer to the indicator, can be modified by other checks: | ||
| 171 | + self.encrypted = Indicator('encrypted', False, name='Encrypted') | ||
| 172 | + self.indicators.append(self.encrypted) | ||
| 173 | + # check if bit 1 of security field = 1: | ||
| 174 | + # (this field may be missing for Powerpoint2000, for example) | ||
| 175 | + if 0x13 in self.suminfo: | ||
| 176 | + if self.suminfo[0x13] & 1: | ||
| 177 | + self.encrypted.value = True | ||
| 178 | + | ||
| 179 | + def check_word (self): | ||
| 180 | + word = Indicator('word', False, name='Word Document', | ||
| 181 | + description='Contains a WordDocument stream, very likely to be a Microsoft Word Document.') | ||
| 182 | + self.indicators.append(word) | ||
| 183 | + self.macros = Indicator('vba_macros', False, name='VBA Macros') | ||
| 184 | + self.indicators.append(self.macros) | ||
| 185 | + if self.ole.exists('WordDocument'): | ||
| 186 | + word.value = True | ||
| 187 | + # check for Word-specific encryption flag: | ||
| 188 | + s = self.ole.openstream(["WordDocument"]) | ||
| 189 | + # pass header 10 bytes | ||
| 190 | + s.read(10) | ||
| 191 | + # read flag structure: | ||
| 192 | + temp16 = struct.unpack("H", s.read(2))[0] | ||
| 193 | + fEncrypted = (temp16 & 0x0100) >> 8 | ||
| 194 | + if fEncrypted: | ||
| 195 | + self.encrypted.value = True | ||
| 196 | + s.close() | ||
| 197 | + # check for VBA macros: | ||
| 198 | + if self.ole.exists('Macros'): | ||
| 199 | + self.macros.value = True | ||
| 200 | + | ||
| 201 | + def check_excel (self): | ||
| 202 | + excel = Indicator('excel', False, name='Excel Workbook', | ||
| 203 | + description='Contains a Workbook or Book stream, very likely to be a Microsoft Excel Workbook.') | ||
| 204 | + self.indicators.append(excel) | ||
| 205 | + #self.macros = Indicator('vba_macros', False, name='VBA Macros') | ||
| 206 | + #self.indicators.append(self.macros) | ||
| 207 | + if self.ole.exists('Workbook') or self.ole.exists('Book'): | ||
| 208 | + excel.value = True | ||
| 209 | + # check for VBA macros: | ||
| 210 | + if self.ole.exists('_VBA_PROJECT_CUR'): | ||
| 211 | + self.macros.value = True | ||
| 212 | + | ||
| 213 | + def check_powerpoint (self): | ||
| 214 | + ppt = Indicator('ppt', False, name='PowerPoint Presentation', | ||
| 215 | + description='Contains a PowerPoint Document stream, very likely to be a Microsoft PowerPoint Presentation.') | ||
| 216 | + self.indicators.append(ppt) | ||
| 217 | + if self.ole.exists('PowerPoint Document'): | ||
| 218 | + ppt.value = True | ||
| 219 | + | ||
| 220 | + def check_visio (self): | ||
| 221 | + visio = Indicator('visio', False, name='Visio Drawing', | ||
| 222 | + description='Contains a VisioDocument stream, very likely to be a Microsoft Visio Drawing.') | ||
| 223 | + self.indicators.append(visio) | ||
| 224 | + if self.ole.exists('VisioDocument'): | ||
| 225 | + visio.value = True | ||
| 226 | + | ||
| 227 | + def check_ObjectPool (self): | ||
| 228 | + objpool = Indicator('ObjectPool', False, name='ObjectPool', | ||
| 229 | + description='Contains an ObjectPool stream, very likely to contain embedded OLE objects or files.') | ||
| 230 | + self.indicators.append(objpool) | ||
| 231 | + if self.ole.exists('ObjectPool'): | ||
| 232 | + objpool.value = True | ||
| 233 | + | ||
| 234 | + | ||
| 235 | + def check_flash (self): | ||
| 236 | + flash = Indicator('flash', 0, _type=int, name='Flash objects', | ||
| 237 | + description='Number of embedded Flash objects (SWF files) detected in OLE streams. Not 100% accurate, there may be false positives.') | ||
| 238 | + self.indicators.append(flash) | ||
| 239 | + for stream in self.ole.listdir(): | ||
| 240 | + data = self.ole.openstream(stream).read() | ||
| 241 | + found = detect_flash(data) | ||
| 242 | + # just add to the count of Flash objects: | ||
| 243 | + flash.value += len(found) | ||
| 244 | + #print stream, found | ||
| 245 | + | ||
| 246 | +def main(): | ||
| 247 | + usage = 'usage: %prog [options] <file>' | ||
| 248 | + parser = optparse.OptionParser(usage=usage) | ||
| 249 | +## parser.add_option('-o', '--ole', action='store_true', dest='ole', help='Parse an OLE file (e.g. Word, Excel) to look for SWF in each stream') | ||
| 250 | + | ||
| 251 | + (options, args) = parser.parse_args() | ||
| 252 | + | ||
| 253 | + # Print help if no argurments are passed | ||
| 254 | + if len(args) == 0: | ||
| 255 | + parser.print_help() | ||
| 256 | + return | ||
| 257 | + | ||
| 258 | + for filename in args: | ||
| 259 | + print '\nFilename:', filename | ||
| 260 | + oleid = OleID(filename) | ||
| 261 | + indicators = oleid.check() | ||
| 262 | + for indicator in indicators: | ||
| 263 | + print '%s: %s' % (indicator.name, indicator.value) | ||
| 264 | + | ||
| 265 | +if __name__ == '__main__': | ||
| 266 | + main() |