Commit 70b7bfb66b58f8d779440893299c7d76d6386e7d
1 parent
ec719fa9
olevba: added support for Word 2003 XML
Showing
1 changed file
with
63 additions
and
6 deletions
oletools/olevba.py
| ... | ... | @@ -123,8 +123,9 @@ https://github.com/unixfreak0037/officeparser |
| 123 | 123 | # - added several suspicious keywords |
| 124 | 124 | # - improved Base64 detection and decoding |
| 125 | 125 | # - fixed triage mode not to scan attrib lines |
| 126 | +# 2015-03-04 v0.25 PL: - added support for Word 2003 XML | |
| 126 | 127 | |
| 127 | -__version__ = '0.24' | |
| 128 | +__version__ = '0.25' | |
| 128 | 129 | |
| 129 | 130 | #------------------------------------------------------------------------------ |
| 130 | 131 | # TODO: |
| ... | ... | @@ -170,6 +171,24 @@ import os.path |
| 170 | 171 | import binascii |
| 171 | 172 | import base64 |
| 172 | 173 | import traceback |
| 174 | +import zlib | |
| 175 | + | |
| 176 | +# import lxml or ElementTree for XML parsing: | |
| 177 | +try: | |
| 178 | + # lxml: best performance for XML processing | |
| 179 | + import lxml.etree as ET | |
| 180 | +except ImportError: | |
| 181 | + try: | |
| 182 | + # Python 2.5+: batteries included | |
| 183 | + import xml.etree.cElementTree as ET | |
| 184 | + except ImportError: | |
| 185 | + try: | |
| 186 | + # Python <2.5: standalone ElementTree install | |
| 187 | + import elementtree.cElementTree as ET | |
| 188 | + except ImportError: | |
| 189 | + raise ImportError, "lxml or ElementTree are not installed, "\ | |
| 190 | + +"see http://codespeak.net/lxml "\ | |
| 191 | + +"or http://effbot.org/zone/element-index.htm" | |
| 173 | 192 | |
| 174 | 193 | import thirdparty.olefile as olefile |
| 175 | 194 | from thirdparty.prettytable import prettytable |
| ... | ... | @@ -179,11 +198,18 @@ from thirdparty.xglob import xglob |
| 179 | 198 | |
| 180 | 199 | TYPE_OLE = 'OLE' |
| 181 | 200 | TYPE_OpenXML = 'OpenXML' |
| 201 | +TYPE_Word2003_XML = 'Word2003_XML' | |
| 182 | 202 | |
| 183 | 203 | MODULE_EXTENSION = "bas" |
| 184 | 204 | CLASS_EXTENSION = "cls" |
| 185 | 205 | FORM_EXTENSION = "frm" |
| 186 | 206 | |
| 207 | +# Namespaces and tags for Word2003 XML parsing: | |
| 208 | +NS_W = '{http://schemas.microsoft.com/office/word/2003/wordml}' | |
| 209 | +# the tag <w:binData w:name="editdata.mso"> contains the VBA macro code: | |
| 210 | +TAG_BINDATA = NS_W + 'binData' | |
| 211 | +ATTR_NAME = NS_W + 'name' | |
| 212 | + | |
| 187 | 213 | # Keywords to detect auto-executable macros |
| 188 | 214 | AUTOEXEC_KEYWORDS = { |
| 189 | 215 | # MS Word: |
| ... | ... | @@ -1213,9 +1239,38 @@ class VBA_Parser(object): |
| 1213 | 1239 | continue |
| 1214 | 1240 | z.close() |
| 1215 | 1241 | else: |
| 1216 | - msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename | |
| 1217 | - logging.error(msg) | |
| 1218 | - raise TypeError(msg) | |
| 1242 | + # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML, | |
| 1243 | + # or a plain text file containing VBA code | |
| 1244 | + if data is None: | |
| 1245 | + data = open(filename, 'rb').read() | |
| 1246 | + # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | |
| 1247 | + if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | |
| 1248 | + logging.info('Opening Word 2003 XML file %s' % self.filename) | |
| 1249 | + self.type = TYPE_Word2003_XML | |
| 1250 | + # parse the XML content | |
| 1251 | + et = ET.fromstring(data) | |
| 1252 | + # find all the binData elements: | |
| 1253 | + for bindata in et.getiterator(TAG_BINDATA): | |
| 1254 | + # the binData content is an OLE container for the VBA project, compressed | |
| 1255 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | |
| 1256 | + # get the filename: | |
| 1257 | + fname = bindata.get(ATTR_NAME, 'noname.mso') | |
| 1258 | + # decode the base64 activemime | |
| 1259 | + activemime = binascii.a2b_base64(bindata.text) | |
| 1260 | + # decompress the zlib data starting at offset 0x32, which is the OLE container: | |
| 1261 | + ole_data = zlib.decompress(activemime[0x32:]) | |
| 1262 | + try: | |
| 1263 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | |
| 1264 | + except: | |
| 1265 | + logging.debug('%s is not a valid OLE file' % fname) | |
| 1266 | + continue | |
| 1267 | + #TODO: handle exceptions | |
| 1268 | + #TODO: Excel 2003 XML | |
| 1269 | + #TODO: plain text VBA file | |
| 1270 | + else: | |
| 1271 | + msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename | |
| 1272 | + logging.error(msg) | |
| 1273 | + raise TypeError(msg) | |
| 1219 | 1274 | |
| 1220 | 1275 | def find_vba_projects (self): |
| 1221 | 1276 | """ |
| ... | ... | @@ -1472,8 +1527,10 @@ def process_file_triage (container, filename, data): |
| 1472 | 1527 | nb_dridexstrings += dridex |
| 1473 | 1528 | if vba.type == TYPE_OLE: |
| 1474 | 1529 | flags = 'OLE:' |
| 1475 | - else: | |
| 1530 | + elif vba.type == TYPE_OpenXML: | |
| 1476 | 1531 | flags = 'OpX:' |
| 1532 | + elif vba.type == TYPE_Word2003_XML: | |
| 1533 | + flags = 'XML:' | |
| 1477 | 1534 | macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-' |
| 1478 | 1535 | if nb_macros: macros = 'M' |
| 1479 | 1536 | if nb_autoexec: autoexec = 'A' |
| ... | ... | @@ -1597,7 +1654,7 @@ def main(): |
| 1597 | 1654 | process_file_triage(container, filename, data) |
| 1598 | 1655 | count += 1 |
| 1599 | 1656 | if not options.detailed_mode or options.triage_mode: |
| 1600 | - print '\n(Flags: OpX=OpenXML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n' | |
| 1657 | + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n' | |
| 1601 | 1658 | |
| 1602 | 1659 | if count == 1 and not options.triage_mode and not options.detailed_mode: |
| 1603 | 1660 | # if options -t and -d were not specified and it's a single file, print details: | ... | ... |