Commit 0762f5bbb297bc1bf2e39331321f39f2575b2d22

Authored by Philippe Lagadec
1 parent dc628fab

added support for Word MHTML files with macros (Single File Web Page), fixed issue #10

Showing 1 changed file with 57 additions and 16 deletions
oletools/olevba.py
@@ -11,6 +11,7 @@ Supported formats: @@ -11,6 +11,7 @@ Supported formats:
11 - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) 11 - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
12 - PowerPoint 2007+ (.pptm, .ppsm) 12 - PowerPoint 2007+ (.pptm, .ppsm)
13 - Word 2003 XML (.xml) 13 - Word 2003 XML (.xml)
  14 +- Word Single File Web Page / MHTML (.mht)
14 15
15 Author: Philippe Lagadec - http://www.decalage.info 16 Author: Philippe Lagadec - http://www.decalage.info
16 License: BSD, see source code or documentation 17 License: BSD, see source code or documentation
@@ -127,8 +128,10 @@ https://github.com/unixfreak0037/officeparser @@ -127,8 +128,10 @@ https://github.com/unixfreak0037/officeparser
127 # 2015-03-04 v0.25 PL: - added support for Word 2003 XML 128 # 2015-03-04 v0.25 PL: - added support for Word 2003 XML
128 # 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and 129 # 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and
129 # virtualisation detection 130 # virtualisation detection
  131 +# 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros
  132 +# (issue #10 reported by Greg from SpamStopsHere)
130 133
131 -__version__ = '0.26' 134 +__version__ = '0.27'
132 135
133 #------------------------------------------------------------------------------ 136 #------------------------------------------------------------------------------
134 # TODO: 137 # TODO:
@@ -175,6 +178,7 @@ import binascii @@ -175,6 +178,7 @@ import binascii
175 import base64 178 import base64
176 import traceback 179 import traceback
177 import zlib 180 import zlib
  181 +import email # for MHTML parsing
178 182
179 # import lxml or ElementTree for XML parsing: 183 # import lxml or ElementTree for XML parsing:
180 try: 184 try:
@@ -199,9 +203,11 @@ from thirdparty.xglob import xglob @@ -199,9 +203,11 @@ from thirdparty.xglob import xglob
199 203
200 #--- CONSTANTS ---------------------------------------------------------------- 204 #--- CONSTANTS ----------------------------------------------------------------
201 205
  206 +# Container types:
202 TYPE_OLE = 'OLE' 207 TYPE_OLE = 'OLE'
203 TYPE_OpenXML = 'OpenXML' 208 TYPE_OpenXML = 'OpenXML'
204 TYPE_Word2003_XML = 'Word2003_XML' 209 TYPE_Word2003_XML = 'Word2003_XML'
  210 +TYPE_MHTML = 'MHTML'
205 211
206 MODULE_EXTENSION = "bas" 212 MODULE_EXTENSION = "bas"
207 CLASS_EXTENSION = "cls" 213 CLASS_EXTENSION = "cls"
@@ -308,32 +314,32 @@ SUSPICIOUS_KEYWORDS = { @@ -308,32 +314,32 @@ SUSPICIOUS_KEYWORDS = {
308 ('RegQueryValueExA', 'RegQueryValueEx', 314 ('RegQueryValueExA', 'RegQueryValueEx',
309 'RegRead', #with Wscript.Shell 315 'RegRead', #with Wscript.Shell
310 ), 316 ),
311 - 'May detect virtualisation':  
312 - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ 317 + 'May detect virtualization':
  318 + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
313 (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'), 319 (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'),
314 'May detect Anubis Sandbox': 320 'May detect Anubis Sandbox':
315 - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/  
316 - #NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA  
317 - #ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf  
318 - ('GetVolumeInformationA', 'GetVolumeInformation', #with kernel32.dll 321 + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  322 + # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA
  323 + # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf
  324 + ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll
319 '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId', 325 '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId',
320 '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller' 326 '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller'
321 ), 327 ),
322 'May detect Sandboxie': 328 'May detect Sandboxie':
323 - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/  
324 - #ref: http://www.cplusplus.com/forum/windows/96874/ 329 + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  330 + # ref: http://www.cplusplus.com/forum/windows/96874/
325 ('SbieDll.dll', 'SandboxieControlWndClass'), 331 ('SbieDll.dll', 'SandboxieControlWndClass'),
326 'May detect Sunbelt Sandbox': 332 'May detect Sunbelt Sandbox':
327 - #ref: http://www.cplusplus.com/forum/windows/96874/ 333 + # ref: http://www.cplusplus.com/forum/windows/96874/
328 (r'C:\file.exe',), 334 (r'C:\file.exe',),
329 'May detect Norman Sandbox': 335 'May detect Norman Sandbox':
330 - #ref: http://www.cplusplus.com/forum/windows/96874/ 336 + # ref: http://www.cplusplus.com/forum/windows/96874/
331 ('currentuser',), 337 ('currentuser',),
332 'May detect CW Sandbox': 338 'May detect CW Sandbox':
333 - #ref: http://www.cplusplus.com/forum/windows/96874/ 339 + # ref: http://www.cplusplus.com/forum/windows/96874/
334 ('Schmidti',), 340 ('Schmidti',),
335 'May detect WinJail Sandbox': 341 'May detect WinJail Sandbox':
336 - #ref: http://www.cplusplus.com/forum/windows/96874/ 342 + # ref: http://www.cplusplus.com/forum/windows/96874/
337 ('Afx:400000:0',), 343 ('Afx:400000:0',),
338 } 344 }
339 345
@@ -1215,8 +1221,12 @@ class VBA_Parser(object): @@ -1215,8 +1221,12 @@ class VBA_Parser(object):
1215 """ 1221 """
1216 Class to parse MS Office files, to detect VBA macros and extract VBA source code 1222 Class to parse MS Office files, to detect VBA macros and extract VBA source code
1217 Supported file formats: 1223 Supported file formats:
1218 - - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)  
1219 - - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) 1224 + - Word 97-2003 (.doc, .dot)
  1225 + - Word 2007+ (.docm, .dotm)
  1226 + - Word 2003 XML (.xml)
  1227 + - Word MHT - Single File Web Page / MHTML (.mht)
  1228 + - Excel 97-2003 (.xls)
  1229 + - Excel 2007+ (.xlsm, .xlsb)
1220 - PowerPoint 2007+ (.pptm, .ppsm) 1230 - PowerPoint 2007+ (.pptm, .ppsm)
1221 """ 1231 """
1222 1232
@@ -1287,6 +1297,7 @@ class VBA_Parser(object): @@ -1287,6 +1297,7 @@ class VBA_Parser(object):
1287 # or a plain text file containing VBA code 1297 # or a plain text file containing VBA code
1288 if data is None: 1298 if data is None:
1289 data = open(filename, 'rb').read() 1299 data = open(filename, 'rb').read()
  1300 + # TODO: move each format parser to a separate method
1290 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace 1301 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
1291 if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: 1302 if 'http://schemas.microsoft.com/office/word/2003/wordml' in data:
1292 logging.info('Opening Word 2003 XML file %s' % self.filename) 1303 logging.info('Opening Word 2003 XML file %s' % self.filename)
@@ -1308,6 +1319,33 @@ class VBA_Parser(object): @@ -1308,6 +1319,33 @@ class VBA_Parser(object):
1308 except: 1319 except:
1309 logging.debug('%s is not a valid OLE file' % fname) 1320 logging.debug('%s is not a valid OLE file' % fname)
1310 continue 1321 continue
  1322 + # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
  1323 + # TODO: check if Word accepts data before the MIME header, if is case-sensitive, etc.
  1324 + elif data.lower().startswith('mime-version:'):
  1325 + logging.info('Opening Word MHTML file %s' % self.filename)
  1326 + self.type = TYPE_MHTML
  1327 + # parse the MIME content
  1328 + mhtml = email.message_from_string(data)
  1329 + # find all the attached files:
  1330 + for part in mhtml.walk():
  1331 + content_type = part.get_content_type() # always returns a value
  1332 + fname = part.get_filename(None) # returns None if it fails
  1333 + logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type))
  1334 + part_data = part.get_payload(decode=True)
  1335 + # VBA macros are stored in a binary file named "editdata.mso".
  1336 + # the data content is an OLE container for the VBA project, compressed
  1337 + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
  1338 + # decompress the zlib data starting at offset 0x32, which is the OLE container:
  1339 + try:
  1340 + ole_data = zlib.decompress(part_data[0x32:])
  1341 + except:
  1342 + logging.debug('%s is not an ActiveMime container' % fname)
  1343 + continue
  1344 + try:
  1345 + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
  1346 + except:
  1347 + logging.debug('%s is not a valid OLE file' % fname)
  1348 + continue
1311 #TODO: handle exceptions 1349 #TODO: handle exceptions
1312 #TODO: Excel 2003 XML 1350 #TODO: Excel 2003 XML
1313 #TODO: plain text VBA file 1351 #TODO: plain text VBA file
@@ -1575,6 +1613,8 @@ def process_file_triage (container, filename, data): @@ -1575,6 +1613,8 @@ def process_file_triage (container, filename, data):
1575 flags = 'OpX:' 1613 flags = 'OpX:'
1576 elif vba.type == TYPE_Word2003_XML: 1614 elif vba.type == TYPE_Word2003_XML:
1577 flags = 'XML:' 1615 flags = 'XML:'
  1616 + elif vba.type == TYPE_MHTML:
  1617 + flags = 'MHT:'
1578 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-' 1618 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-'
1579 if nb_macros: macros = 'M' 1619 if nb_macros: macros = 'M'
1580 if nb_autoexec: autoexec = 'A' 1620 if nb_autoexec: autoexec = 'A'
@@ -1664,6 +1704,7 @@ def main(): @@ -1664,6 +1704,7 @@ def main():
1664 # print banner with version 1704 # print banner with version
1665 print 'olevba %s - http://decalage.info/python/oletools' % __version__ 1705 print 'olevba %s - http://decalage.info/python/oletools' % __version__
1666 1706
  1707 + # TODO: option to set logging level, none by default
1667 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) 1708 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO)
1668 # For now, all logging is disabled: 1709 # For now, all logging is disabled:
1669 logging.disable(logging.CRITICAL) 1710 logging.disable(logging.CRITICAL)
@@ -1701,7 +1742,7 @@ def main(): @@ -1701,7 +1742,7 @@ def main():
1701 process_file_triage(container, filename, data) 1742 process_file_triage(container, filename, data)
1702 count += 1 1743 count += 1
1703 if not options.detailed_mode or options.triage_mode: 1744 if not options.detailed_mode or options.triage_mode:
1704 - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n' 1745 + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n'
1705 1746
1706 if count == 1 and not options.triage_mode and not options.detailed_mode: 1747 if count == 1 and not options.triage_mode and not options.detailed_mode:
1707 # if options -t and -d were not specified and it's a single file, print details: 1748 # if options -t and -d were not specified and it's a single file, print details: