Commit 0762f5bbb297bc1bf2e39331321f39f2575b2d22
1 parent
dc628fab
added support for Word MHTML files with macros (Single File Web Page), fixed issue #10
Showing
1 changed file
with
57 additions
and
16 deletions
oletools/olevba.py
| @@ -11,6 +11,7 @@ Supported formats: | @@ -11,6 +11,7 @@ Supported formats: | ||
| 11 | - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) | 11 | - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) |
| 12 | - PowerPoint 2007+ (.pptm, .ppsm) | 12 | - PowerPoint 2007+ (.pptm, .ppsm) |
| 13 | - Word 2003 XML (.xml) | 13 | - Word 2003 XML (.xml) |
| 14 | +- Word Single File Web Page / MHTML (.mht) | ||
| 14 | 15 | ||
| 15 | Author: Philippe Lagadec - http://www.decalage.info | 16 | Author: Philippe Lagadec - http://www.decalage.info |
| 16 | License: BSD, see source code or documentation | 17 | License: BSD, see source code or documentation |
| @@ -127,8 +128,10 @@ https://github.com/unixfreak0037/officeparser | @@ -127,8 +128,10 @@ https://github.com/unixfreak0037/officeparser | ||
| 127 | # 2015-03-04 v0.25 PL: - added support for Word 2003 XML | 128 | # 2015-03-04 v0.25 PL: - added support for Word 2003 XML |
| 128 | # 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and | 129 | # 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and |
| 129 | # virtualisation detection | 130 | # virtualisation detection |
| 131 | +# 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros | ||
| 132 | +# (issue #10 reported by Greg from SpamStopsHere) | ||
| 130 | 133 | ||
| 131 | -__version__ = '0.26' | 134 | +__version__ = '0.27' |
| 132 | 135 | ||
| 133 | #------------------------------------------------------------------------------ | 136 | #------------------------------------------------------------------------------ |
| 134 | # TODO: | 137 | # TODO: |
| @@ -175,6 +178,7 @@ import binascii | @@ -175,6 +178,7 @@ import binascii | ||
| 175 | import base64 | 178 | import base64 |
| 176 | import traceback | 179 | import traceback |
| 177 | import zlib | 180 | import zlib |
| 181 | +import email # for MHTML parsing | ||
| 178 | 182 | ||
| 179 | # import lxml or ElementTree for XML parsing: | 183 | # import lxml or ElementTree for XML parsing: |
| 180 | try: | 184 | try: |
| @@ -199,9 +203,11 @@ from thirdparty.xglob import xglob | @@ -199,9 +203,11 @@ from thirdparty.xglob import xglob | ||
| 199 | 203 | ||
| 200 | #--- CONSTANTS ---------------------------------------------------------------- | 204 | #--- CONSTANTS ---------------------------------------------------------------- |
| 201 | 205 | ||
| 206 | +# Container types: | ||
| 202 | TYPE_OLE = 'OLE' | 207 | TYPE_OLE = 'OLE' |
| 203 | TYPE_OpenXML = 'OpenXML' | 208 | TYPE_OpenXML = 'OpenXML' |
| 204 | TYPE_Word2003_XML = 'Word2003_XML' | 209 | TYPE_Word2003_XML = 'Word2003_XML' |
| 210 | +TYPE_MHTML = 'MHTML' | ||
| 205 | 211 | ||
| 206 | MODULE_EXTENSION = "bas" | 212 | MODULE_EXTENSION = "bas" |
| 207 | CLASS_EXTENSION = "cls" | 213 | CLASS_EXTENSION = "cls" |
| @@ -308,32 +314,32 @@ SUSPICIOUS_KEYWORDS = { | @@ -308,32 +314,32 @@ SUSPICIOUS_KEYWORDS = { | ||
| 308 | ('RegQueryValueExA', 'RegQueryValueEx', | 314 | ('RegQueryValueExA', 'RegQueryValueEx', |
| 309 | 'RegRead', #with Wscript.Shell | 315 | 'RegRead', #with Wscript.Shell |
| 310 | ), | 316 | ), |
| 311 | - 'May detect virtualisation': | ||
| 312 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | 317 | + 'May detect virtualization': |
| 318 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | ||
| 313 | (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'), | 319 | (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'), |
| 314 | 'May detect Anubis Sandbox': | 320 | 'May detect Anubis Sandbox': |
| 315 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | ||
| 316 | - #NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA | ||
| 317 | - #ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf | ||
| 318 | - ('GetVolumeInformationA', 'GetVolumeInformation', #with kernel32.dll | 321 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ |
| 322 | + # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA | ||
| 323 | + # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf | ||
| 324 | + ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll | ||
| 319 | '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId', | 325 | '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId', |
| 320 | '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller' | 326 | '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller' |
| 321 | ), | 327 | ), |
| 322 | 'May detect Sandboxie': | 328 | 'May detect Sandboxie': |
| 323 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | ||
| 324 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | 329 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ |
| 330 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | ||
| 325 | ('SbieDll.dll', 'SandboxieControlWndClass'), | 331 | ('SbieDll.dll', 'SandboxieControlWndClass'), |
| 326 | 'May detect Sunbelt Sandbox': | 332 | 'May detect Sunbelt Sandbox': |
| 327 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | 333 | + # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 328 | (r'C:\file.exe',), | 334 | (r'C:\file.exe',), |
| 329 | 'May detect Norman Sandbox': | 335 | 'May detect Norman Sandbox': |
| 330 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | 336 | + # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 331 | ('currentuser',), | 337 | ('currentuser',), |
| 332 | 'May detect CW Sandbox': | 338 | 'May detect CW Sandbox': |
| 333 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | 339 | + # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 334 | ('Schmidti',), | 340 | ('Schmidti',), |
| 335 | 'May detect WinJail Sandbox': | 341 | 'May detect WinJail Sandbox': |
| 336 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | 342 | + # ref: http://www.cplusplus.com/forum/windows/96874/ |
| 337 | ('Afx:400000:0',), | 343 | ('Afx:400000:0',), |
| 338 | } | 344 | } |
| 339 | 345 | ||
| @@ -1215,8 +1221,12 @@ class VBA_Parser(object): | @@ -1215,8 +1221,12 @@ class VBA_Parser(object): | ||
| 1215 | """ | 1221 | """ |
| 1216 | Class to parse MS Office files, to detect VBA macros and extract VBA source code | 1222 | Class to parse MS Office files, to detect VBA macros and extract VBA source code |
| 1217 | Supported file formats: | 1223 | Supported file formats: |
| 1218 | - - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) | ||
| 1219 | - - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) | 1224 | + - Word 97-2003 (.doc, .dot) |
| 1225 | + - Word 2007+ (.docm, .dotm) | ||
| 1226 | + - Word 2003 XML (.xml) | ||
| 1227 | + - Word MHT - Single File Web Page / MHTML (.mht) | ||
| 1228 | + - Excel 97-2003 (.xls) | ||
| 1229 | + - Excel 2007+ (.xlsm, .xlsb) | ||
| 1220 | - PowerPoint 2007+ (.pptm, .ppsm) | 1230 | - PowerPoint 2007+ (.pptm, .ppsm) |
| 1221 | """ | 1231 | """ |
| 1222 | 1232 | ||
| @@ -1287,6 +1297,7 @@ class VBA_Parser(object): | @@ -1287,6 +1297,7 @@ class VBA_Parser(object): | ||
| 1287 | # or a plain text file containing VBA code | 1297 | # or a plain text file containing VBA code |
| 1288 | if data is None: | 1298 | if data is None: |
| 1289 | data = open(filename, 'rb').read() | 1299 | data = open(filename, 'rb').read() |
| 1300 | + # TODO: move each format parser to a separate method | ||
| 1290 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | 1301 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 1291 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | 1302 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 1292 | logging.info('Opening Word 2003 XML file %s' % self.filename) | 1303 | logging.info('Opening Word 2003 XML file %s' % self.filename) |
| @@ -1308,6 +1319,33 @@ class VBA_Parser(object): | @@ -1308,6 +1319,33 @@ class VBA_Parser(object): | ||
| 1308 | except: | 1319 | except: |
| 1309 | logging.debug('%s is not a valid OLE file' % fname) | 1320 | logging.debug('%s is not a valid OLE file' % fname) |
| 1310 | continue | 1321 | continue |
| 1322 | + # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | ||
| 1323 | + # TODO: check if Word accepts data before the MIME header, if is case-sensitive, etc. | ||
| 1324 | + elif data.lower().startswith('mime-version:'): | ||
| 1325 | + logging.info('Opening Word MHTML file %s' % self.filename) | ||
| 1326 | + self.type = TYPE_MHTML | ||
| 1327 | + # parse the MIME content | ||
| 1328 | + mhtml = email.message_from_string(data) | ||
| 1329 | + # find all the attached files: | ||
| 1330 | + for part in mhtml.walk(): | ||
| 1331 | + content_type = part.get_content_type() # always returns a value | ||
| 1332 | + fname = part.get_filename(None) # returns None if it fails | ||
| 1333 | + logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type)) | ||
| 1334 | + part_data = part.get_payload(decode=True) | ||
| 1335 | + # VBA macros are stored in a binary file named "editdata.mso". | ||
| 1336 | + # the data content is an OLE container for the VBA project, compressed | ||
| 1337 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | ||
| 1338 | + # decompress the zlib data starting at offset 0x32, which is the OLE container: | ||
| 1339 | + try: | ||
| 1340 | + ole_data = zlib.decompress(part_data[0x32:]) | ||
| 1341 | + except: | ||
| 1342 | + logging.debug('%s is not an ActiveMime container' % fname) | ||
| 1343 | + continue | ||
| 1344 | + try: | ||
| 1345 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | ||
| 1346 | + except: | ||
| 1347 | + logging.debug('%s is not a valid OLE file' % fname) | ||
| 1348 | + continue | ||
| 1311 | #TODO: handle exceptions | 1349 | #TODO: handle exceptions |
| 1312 | #TODO: Excel 2003 XML | 1350 | #TODO: Excel 2003 XML |
| 1313 | #TODO: plain text VBA file | 1351 | #TODO: plain text VBA file |
| @@ -1575,6 +1613,8 @@ def process_file_triage (container, filename, data): | @@ -1575,6 +1613,8 @@ def process_file_triage (container, filename, data): | ||
| 1575 | flags = 'OpX:' | 1613 | flags = 'OpX:' |
| 1576 | elif vba.type == TYPE_Word2003_XML: | 1614 | elif vba.type == TYPE_Word2003_XML: |
| 1577 | flags = 'XML:' | 1615 | flags = 'XML:' |
| 1616 | + elif vba.type == TYPE_MHTML: | ||
| 1617 | + flags = 'MHT:' | ||
| 1578 | macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-' | 1618 | macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-' |
| 1579 | if nb_macros: macros = 'M' | 1619 | if nb_macros: macros = 'M' |
| 1580 | if nb_autoexec: autoexec = 'A' | 1620 | if nb_autoexec: autoexec = 'A' |
| @@ -1664,6 +1704,7 @@ def main(): | @@ -1664,6 +1704,7 @@ def main(): | ||
| 1664 | # print banner with version | 1704 | # print banner with version |
| 1665 | print 'olevba %s - http://decalage.info/python/oletools' % __version__ | 1705 | print 'olevba %s - http://decalage.info/python/oletools' % __version__ |
| 1666 | 1706 | ||
| 1707 | + # TODO: option to set logging level, none by default | ||
| 1667 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) | 1708 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) |
| 1668 | # For now, all logging is disabled: | 1709 | # For now, all logging is disabled: |
| 1669 | logging.disable(logging.CRITICAL) | 1710 | logging.disable(logging.CRITICAL) |
| @@ -1701,7 +1742,7 @@ def main(): | @@ -1701,7 +1742,7 @@ def main(): | ||
| 1701 | process_file_triage(container, filename, data) | 1742 | process_file_triage(container, filename, data) |
| 1702 | count += 1 | 1743 | count += 1 |
| 1703 | if not options.detailed_mode or options.triage_mode: | 1744 | if not options.detailed_mode or options.triage_mode: |
| 1704 | - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n' | 1745 | + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n' |
| 1705 | 1746 | ||
| 1706 | if count == 1 and not options.triage_mode and not options.detailed_mode: | 1747 | if count == 1 and not options.triage_mode and not options.detailed_mode: |
| 1707 | # if options -t and -d were not specified and it's a single file, print details: | 1748 | # if options -t and -d were not specified and it's a single file, print details: |