Commit 0762f5bbb297bc1bf2e39331321f39f2575b2d22

Authored by Philippe Lagadec
1 parent dc628fab

added support for Word MHTML files with macros (Single File Web Page), fixed issue #10

Showing 1 changed file with 57 additions and 16 deletions
oletools/olevba.py
... ... @@ -11,6 +11,7 @@ Supported formats:
11 11 - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
12 12 - PowerPoint 2007+ (.pptm, .ppsm)
13 13 - Word 2003 XML (.xml)
  14 +- Word Single File Web Page / MHTML (.mht)
14 15  
15 16 Author: Philippe Lagadec - http://www.decalage.info
16 17 License: BSD, see source code or documentation
... ... @@ -127,8 +128,10 @@ https://github.com/unixfreak0037/officeparser
127 128 # 2015-03-04 v0.25 PL: - added support for Word 2003 XML
128 129 # 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and
129 130 # virtualisation detection
  131 +# 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros
  132 +# (issue #10 reported by Greg from SpamStopsHere)
130 133  
131   -__version__ = '0.26'
  134 +__version__ = '0.27'
132 135  
133 136 #------------------------------------------------------------------------------
134 137 # TODO:
... ... @@ -175,6 +178,7 @@ import binascii
175 178 import base64
176 179 import traceback
177 180 import zlib
  181 +import email # for MHTML parsing
178 182  
179 183 # import lxml or ElementTree for XML parsing:
180 184 try:
... ... @@ -199,9 +203,11 @@ from thirdparty.xglob import xglob
199 203  
200 204 #--- CONSTANTS ----------------------------------------------------------------
201 205  
  206 +# Container types:
202 207 TYPE_OLE = 'OLE'
203 208 TYPE_OpenXML = 'OpenXML'
204 209 TYPE_Word2003_XML = 'Word2003_XML'
  210 +TYPE_MHTML = 'MHTML'
205 211  
206 212 MODULE_EXTENSION = "bas"
207 213 CLASS_EXTENSION = "cls"
... ... @@ -308,32 +314,32 @@ SUSPICIOUS_KEYWORDS = {
308 314 ('RegQueryValueExA', 'RegQueryValueEx',
309 315 'RegRead', #with Wscript.Shell
310 316 ),
311   - 'May detect virtualisation':
312   - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  317 + 'May detect virtualization':
  318 + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
313 319 (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'),
314 320 'May detect Anubis Sandbox':
315   - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
316   - #NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA
317   - #ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf
318   - ('GetVolumeInformationA', 'GetVolumeInformation', #with kernel32.dll
  321 + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  322 + # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA
  323 + # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf
  324 + ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll
319 325 '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId',
320 326 '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller'
321 327 ),
322 328 'May detect Sandboxie':
323   - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
324   - #ref: http://www.cplusplus.com/forum/windows/96874/
  329 + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/
  330 + # ref: http://www.cplusplus.com/forum/windows/96874/
325 331 ('SbieDll.dll', 'SandboxieControlWndClass'),
326 332 'May detect Sunbelt Sandbox':
327   - #ref: http://www.cplusplus.com/forum/windows/96874/
  333 + # ref: http://www.cplusplus.com/forum/windows/96874/
328 334 (r'C:\file.exe',),
329 335 'May detect Norman Sandbox':
330   - #ref: http://www.cplusplus.com/forum/windows/96874/
  336 + # ref: http://www.cplusplus.com/forum/windows/96874/
331 337 ('currentuser',),
332 338 'May detect CW Sandbox':
333   - #ref: http://www.cplusplus.com/forum/windows/96874/
  339 + # ref: http://www.cplusplus.com/forum/windows/96874/
334 340 ('Schmidti',),
335 341 'May detect WinJail Sandbox':
336   - #ref: http://www.cplusplus.com/forum/windows/96874/
  342 + # ref: http://www.cplusplus.com/forum/windows/96874/
337 343 ('Afx:400000:0',),
338 344 }
339 345  
... ... @@ -1215,8 +1221,12 @@ class VBA_Parser(object):
1215 1221 """
1216 1222 Class to parse MS Office files, to detect VBA macros and extract VBA source code
1217 1223 Supported file formats:
1218   - - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
1219   - - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
  1224 + - Word 97-2003 (.doc, .dot)
  1225 + - Word 2007+ (.docm, .dotm)
  1226 + - Word 2003 XML (.xml)
  1227 + - Word MHT - Single File Web Page / MHTML (.mht)
  1228 + - Excel 97-2003 (.xls)
  1229 + - Excel 2007+ (.xlsm, .xlsb)
1220 1230 - PowerPoint 2007+ (.pptm, .ppsm)
1221 1231 """
1222 1232  
... ... @@ -1287,6 +1297,7 @@ class VBA_Parser(object):
1287 1297 # or a plain text file containing VBA code
1288 1298 if data is None:
1289 1299 data = open(filename, 'rb').read()
  1300 + # TODO: move each format parser to a separate method
1290 1301 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
1291 1302 if 'http://schemas.microsoft.com/office/word/2003/wordml' in data:
1292 1303 logging.info('Opening Word 2003 XML file %s' % self.filename)
... ... @@ -1308,6 +1319,33 @@ class VBA_Parser(object):
1308 1319 except:
1309 1320 logging.debug('%s is not a valid OLE file' % fname)
1310 1321 continue
  1322 + # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
  1323 + # TODO: check if Word accepts data before the MIME header, if is case-sensitive, etc.
  1324 + elif data.lower().startswith('mime-version:'):
  1325 + logging.info('Opening Word MHTML file %s' % self.filename)
  1326 + self.type = TYPE_MHTML
  1327 + # parse the MIME content
  1328 + mhtml = email.message_from_string(data)
  1329 + # find all the attached files:
  1330 + for part in mhtml.walk():
  1331 + content_type = part.get_content_type() # always returns a value
  1332 + fname = part.get_filename(None) # returns None if it fails
  1333 + logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type))
  1334 + part_data = part.get_payload(decode=True)
  1335 + # VBA macros are stored in a binary file named "editdata.mso".
  1336 + # the data content is an OLE container for the VBA project, compressed
  1337 + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
  1338 + # decompress the zlib data starting at offset 0x32, which is the OLE container:
  1339 + try:
  1340 + ole_data = zlib.decompress(part_data[0x32:])
  1341 + except:
  1342 + logging.debug('%s is not an ActiveMime container' % fname)
  1343 + continue
  1344 + try:
  1345 + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
  1346 + except:
  1347 + logging.debug('%s is not a valid OLE file' % fname)
  1348 + continue
1311 1349 #TODO: handle exceptions
1312 1350 #TODO: Excel 2003 XML
1313 1351 #TODO: plain text VBA file
... ... @@ -1575,6 +1613,8 @@ def process_file_triage (container, filename, data):
1575 1613 flags = 'OpX:'
1576 1614 elif vba.type == TYPE_Word2003_XML:
1577 1615 flags = 'XML:'
  1616 + elif vba.type == TYPE_MHTML:
  1617 + flags = 'MHT:'
1578 1618 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-'
1579 1619 if nb_macros: macros = 'M'
1580 1620 if nb_autoexec: autoexec = 'A'
... ... @@ -1664,6 +1704,7 @@ def main():
1664 1704 # print banner with version
1665 1705 print 'olevba %s - http://decalage.info/python/oletools' % __version__
1666 1706  
  1707 + # TODO: option to set logging level, none by default
1667 1708 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO)
1668 1709 # For now, all logging is disabled:
1669 1710 logging.disable(logging.CRITICAL)
... ... @@ -1701,7 +1742,7 @@ def main():
1701 1742 process_file_triage(container, filename, data)
1702 1743 count += 1
1703 1744 if not options.detailed_mode or options.triage_mode:
1704   - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n'
  1745 + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n'
1705 1746  
1706 1747 if count == 1 and not options.triage_mode and not options.detailed_mode:
1707 1748 # if options -t and -d were not specified and it's a single file, print details:
... ...