Commit 70b7bfb66b58f8d779440893299c7d76d6386e7d

Authored by Philippe Lagadec
1 parent ec719fa9

olevba: added support for Word 2003 XML

Showing 1 changed file with 63 additions and 6 deletions
oletools/olevba.py
... ... @@ -123,8 +123,9 @@ https://github.com/unixfreak0037/officeparser
123 123 # - added several suspicious keywords
124 124 # - improved Base64 detection and decoding
125 125 # - fixed triage mode not to scan attrib lines
  126 +# 2015-03-04 v0.25 PL: - added support for Word 2003 XML
126 127  
127   -__version__ = '0.24'
  128 +__version__ = '0.25'
128 129  
129 130 #------------------------------------------------------------------------------
130 131 # TODO:
... ... @@ -170,6 +171,24 @@ import os.path
170 171 import binascii
171 172 import base64
172 173 import traceback
  174 +import zlib
  175 +
  176 +# import lxml or ElementTree for XML parsing:
  177 +try:
  178 + # lxml: best performance for XML processing
  179 + import lxml.etree as ET
  180 +except ImportError:
  181 + try:
  182 + # Python 2.5+: batteries included
  183 + import xml.etree.cElementTree as ET
  184 + except ImportError:
  185 + try:
  186 + # Python <2.5: standalone ElementTree install
  187 + import elementtree.cElementTree as ET
  188 + except ImportError:
  189 + raise ImportError, "lxml or ElementTree are not installed, "\
  190 + +"see http://codespeak.net/lxml "\
  191 + +"or http://effbot.org/zone/element-index.htm"
173 192  
174 193 import thirdparty.olefile as olefile
175 194 from thirdparty.prettytable import prettytable
... ... @@ -179,11 +198,18 @@ from thirdparty.xglob import xglob
179 198  
180 199 TYPE_OLE = 'OLE'
181 200 TYPE_OpenXML = 'OpenXML'
  201 +TYPE_Word2003_XML = 'Word2003_XML'
182 202  
183 203 MODULE_EXTENSION = "bas"
184 204 CLASS_EXTENSION = "cls"
185 205 FORM_EXTENSION = "frm"
186 206  
  207 +# Namespaces and tags for Word2003 XML parsing:
  208 +NS_W = '{http://schemas.microsoft.com/office/word/2003/wordml}'
  209 +# the tag <w:binData w:name="editdata.mso"> contains the VBA macro code:
  210 +TAG_BINDATA = NS_W + 'binData'
  211 +ATTR_NAME = NS_W + 'name'
  212 +
187 213 # Keywords to detect auto-executable macros
188 214 AUTOEXEC_KEYWORDS = {
189 215 # MS Word:
... ... @@ -1213,9 +1239,38 @@ class VBA_Parser(object):
1213 1239 continue
1214 1240 z.close()
1215 1241 else:
1216   - msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename
1217   - logging.error(msg)
1218   - raise TypeError(msg)
  1242 + # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML,
  1243 + # or a plain text file containing VBA code
  1244 + if data is None:
  1245 + data = open(filename, 'rb').read()
  1246 + # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
  1247 + if 'http://schemas.microsoft.com/office/word/2003/wordml' in data:
  1248 + logging.info('Opening Word 2003 XML file %s' % self.filename)
  1249 + self.type = TYPE_Word2003_XML
  1250 + # parse the XML content
  1251 + et = ET.fromstring(data)
  1252 + # find all the binData elements:
  1253 + for bindata in et.getiterator(TAG_BINDATA):
  1254 + # the binData content is an OLE container for the VBA project, compressed
  1255 + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
  1256 + # get the filename:
  1257 + fname = bindata.get(ATTR_NAME, 'noname.mso')
  1258 + # decode the base64 activemime
  1259 + activemime = binascii.a2b_base64(bindata.text)
  1260 + # decompress the zlib data starting at offset 0x32, which is the OLE container:
  1261 + ole_data = zlib.decompress(activemime[0x32:])
  1262 + try:
  1263 + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
  1264 + except:
  1265 + logging.debug('%s is not a valid OLE file' % fname)
  1266 + continue
  1267 + #TODO: handle exceptions
  1268 + #TODO: Excel 2003 XML
  1269 + #TODO: plain text VBA file
  1270 + else:
  1271 + msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename
  1272 + logging.error(msg)
  1273 + raise TypeError(msg)
1219 1274  
1220 1275 def find_vba_projects (self):
1221 1276 """
... ... @@ -1472,8 +1527,10 @@ def process_file_triage (container, filename, data):
1472 1527 nb_dridexstrings += dridex
1473 1528 if vba.type == TYPE_OLE:
1474 1529 flags = 'OLE:'
1475   - else:
  1530 + elif vba.type == TYPE_OpenXML:
1476 1531 flags = 'OpX:'
  1532 + elif vba.type == TYPE_Word2003_XML:
  1533 + flags = 'XML:'
1477 1534 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-'
1478 1535 if nb_macros: macros = 'M'
1479 1536 if nb_autoexec: autoexec = 'A'
... ... @@ -1597,7 +1654,7 @@ def main():
1597 1654 process_file_triage(container, filename, data)
1598 1655 count += 1
1599 1656 if not options.detailed_mode or options.triage_mode:
1600   - print '\n(Flags: OpX=OpenXML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n'
  1657 + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n'
1601 1658  
1602 1659 if count == 1 and not options.triage_mode and not options.detailed_mode:
1603 1660 # if options -t and -d were not specified and it's a single file, print details:
... ...