Commit 70b7bfb66b58f8d779440893299c7d76d6386e7d

Authored by Philippe Lagadec
1 parent ec719fa9

olevba: added support for Word 2003 XML

Showing 1 changed file with 63 additions and 6 deletions
oletools/olevba.py
@@ -123,8 +123,9 @@ https://github.com/unixfreak0037/officeparser @@ -123,8 +123,9 @@ https://github.com/unixfreak0037/officeparser
123 # - added several suspicious keywords 123 # - added several suspicious keywords
124 # - improved Base64 detection and decoding 124 # - improved Base64 detection and decoding
125 # - fixed triage mode not to scan attrib lines 125 # - fixed triage mode not to scan attrib lines
  126 +# 2015-03-04 v0.25 PL: - added support for Word 2003 XML
126 127
127 -__version__ = '0.24' 128 +__version__ = '0.25'
128 129
129 #------------------------------------------------------------------------------ 130 #------------------------------------------------------------------------------
130 # TODO: 131 # TODO:
@@ -170,6 +171,24 @@ import os.path @@ -170,6 +171,24 @@ import os.path
170 import binascii 171 import binascii
171 import base64 172 import base64
172 import traceback 173 import traceback
  174 +import zlib
  175 +
  176 +# import lxml or ElementTree for XML parsing:
  177 +try:
  178 + # lxml: best performance for XML processing
  179 + import lxml.etree as ET
  180 +except ImportError:
  181 + try:
  182 + # Python 2.5+: batteries included
  183 + import xml.etree.cElementTree as ET
  184 + except ImportError:
  185 + try:
  186 + # Python <2.5: standalone ElementTree install
  187 + import elementtree.cElementTree as ET
  188 + except ImportError:
  189 + raise ImportError, "lxml or ElementTree are not installed, "\
  190 + +"see http://codespeak.net/lxml "\
  191 + +"or http://effbot.org/zone/element-index.htm"
173 192
174 import thirdparty.olefile as olefile 193 import thirdparty.olefile as olefile
175 from thirdparty.prettytable import prettytable 194 from thirdparty.prettytable import prettytable
@@ -179,11 +198,18 @@ from thirdparty.xglob import xglob @@ -179,11 +198,18 @@ from thirdparty.xglob import xglob
179 198
180 TYPE_OLE = 'OLE' 199 TYPE_OLE = 'OLE'
181 TYPE_OpenXML = 'OpenXML' 200 TYPE_OpenXML = 'OpenXML'
  201 +TYPE_Word2003_XML = 'Word2003_XML'
182 202
183 MODULE_EXTENSION = "bas" 203 MODULE_EXTENSION = "bas"
184 CLASS_EXTENSION = "cls" 204 CLASS_EXTENSION = "cls"
185 FORM_EXTENSION = "frm" 205 FORM_EXTENSION = "frm"
186 206
  207 +# Namespaces and tags for Word2003 XML parsing:
  208 +NS_W = '{http://schemas.microsoft.com/office/word/2003/wordml}'
  209 +# the tag <w:binData w:name="editdata.mso"> contains the VBA macro code:
  210 +TAG_BINDATA = NS_W + 'binData'
  211 +ATTR_NAME = NS_W + 'name'
  212 +
187 # Keywords to detect auto-executable macros 213 # Keywords to detect auto-executable macros
188 AUTOEXEC_KEYWORDS = { 214 AUTOEXEC_KEYWORDS = {
189 # MS Word: 215 # MS Word:
@@ -1213,9 +1239,38 @@ class VBA_Parser(object): @@ -1213,9 +1239,38 @@ class VBA_Parser(object):
1213 continue 1239 continue
1214 z.close() 1240 z.close()
1215 else: 1241 else:
1216 - msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename  
1217 - logging.error(msg)  
1218 - raise TypeError(msg) 1242 + # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML,
  1243 + # or a plain text file containing VBA code
  1244 + if data is None:
  1245 + data = open(filename, 'rb').read()
  1246 + # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
  1247 + if 'http://schemas.microsoft.com/office/word/2003/wordml' in data:
  1248 + logging.info('Opening Word 2003 XML file %s' % self.filename)
  1249 + self.type = TYPE_Word2003_XML
  1250 + # parse the XML content
  1251 + et = ET.fromstring(data)
  1252 + # find all the binData elements:
  1253 + for bindata in et.getiterator(TAG_BINDATA):
  1254 + # the binData content is an OLE container for the VBA project, compressed
  1255 + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
  1256 + # get the filename:
  1257 + fname = bindata.get(ATTR_NAME, 'noname.mso')
  1258 + # decode the base64 activemime
  1259 + activemime = binascii.a2b_base64(bindata.text)
  1260 + # decompress the zlib data starting at offset 0x32, which is the OLE container:
  1261 + ole_data = zlib.decompress(activemime[0x32:])
  1262 + try:
  1263 + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
  1264 + except:
  1265 + logging.debug('%s is not a valid OLE file' % fname)
  1266 + continue
  1267 + #TODO: handle exceptions
  1268 + #TODO: Excel 2003 XML
  1269 + #TODO: plain text VBA file
  1270 + else:
  1271 + msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename
  1272 + logging.error(msg)
  1273 + raise TypeError(msg)
1219 1274
1220 def find_vba_projects (self): 1275 def find_vba_projects (self):
1221 """ 1276 """
@@ -1472,8 +1527,10 @@ def process_file_triage (container, filename, data): @@ -1472,8 +1527,10 @@ def process_file_triage (container, filename, data):
1472 nb_dridexstrings += dridex 1527 nb_dridexstrings += dridex
1473 if vba.type == TYPE_OLE: 1528 if vba.type == TYPE_OLE:
1474 flags = 'OLE:' 1529 flags = 'OLE:'
1475 - else: 1530 + elif vba.type == TYPE_OpenXML:
1476 flags = 'OpX:' 1531 flags = 'OpX:'
  1532 + elif vba.type == TYPE_Word2003_XML:
  1533 + flags = 'XML:'
1477 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-' 1534 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-'
1478 if nb_macros: macros = 'M' 1535 if nb_macros: macros = 'M'
1479 if nb_autoexec: autoexec = 'A' 1536 if nb_autoexec: autoexec = 'A'
@@ -1597,7 +1654,7 @@ def main(): @@ -1597,7 +1654,7 @@ def main():
1597 process_file_triage(container, filename, data) 1654 process_file_triage(container, filename, data)
1598 count += 1 1655 count += 1
1599 if not options.detailed_mode or options.triage_mode: 1656 if not options.detailed_mode or options.triage_mode:
1600 - print '\n(Flags: OpX=OpenXML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n' 1657 + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n'
1601 1658
1602 if count == 1 and not options.triage_mode and not options.detailed_mode: 1659 if count == 1 and not options.triage_mode and not options.detailed_mode:
1603 # if options -t and -d were not specified and it's a single file, print details: 1660 # if options -t and -d were not specified and it's a single file, print details: