Commit 0762f5bbb297bc1bf2e39331321f39f2575b2d22
1 parent
dc628fab
added support for Word MHTML files with macros (Single File Web Page), fixed issue #10
Showing
1 changed file
with
57 additions
and
16 deletions
oletools/olevba.py
| ... | ... | @@ -11,6 +11,7 @@ Supported formats: |
| 11 | 11 | - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) |
| 12 | 12 | - PowerPoint 2007+ (.pptm, .ppsm) |
| 13 | 13 | - Word 2003 XML (.xml) |
| 14 | +- Word Single File Web Page / MHTML (.mht) | |
| 14 | 15 | |
| 15 | 16 | Author: Philippe Lagadec - http://www.decalage.info |
| 16 | 17 | License: BSD, see source code or documentation |
| ... | ... | @@ -127,8 +128,10 @@ https://github.com/unixfreak0037/officeparser |
| 127 | 128 | # 2015-03-04 v0.25 PL: - added support for Word 2003 XML |
| 128 | 129 | # 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and |
| 129 | 130 | # virtualisation detection |
| 131 | +# 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros | |
| 132 | +# (issue #10 reported by Greg from SpamStopsHere) | |
| 130 | 133 | |
| 131 | -__version__ = '0.26' | |
| 134 | +__version__ = '0.27' | |
| 132 | 135 | |
| 133 | 136 | #------------------------------------------------------------------------------ |
| 134 | 137 | # TODO: |
| ... | ... | @@ -175,6 +178,7 @@ import binascii |
| 175 | 178 | import base64 |
| 176 | 179 | import traceback |
| 177 | 180 | import zlib |
| 181 | +import email # for MHTML parsing | |
| 178 | 182 | |
| 179 | 183 | # import lxml or ElementTree for XML parsing: |
| 180 | 184 | try: |
| ... | ... | @@ -199,9 +203,11 @@ from thirdparty.xglob import xglob |
| 199 | 203 | |
| 200 | 204 | #--- CONSTANTS ---------------------------------------------------------------- |
| 201 | 205 | |
| 206 | +# Container types: | |
| 202 | 207 | TYPE_OLE = 'OLE' |
| 203 | 208 | TYPE_OpenXML = 'OpenXML' |
| 204 | 209 | TYPE_Word2003_XML = 'Word2003_XML' |
| 210 | +TYPE_MHTML = 'MHTML' | |
| 205 | 211 | |
| 206 | 212 | MODULE_EXTENSION = "bas" |
| 207 | 213 | CLASS_EXTENSION = "cls" |
| ... | ... | @@ -308,32 +314,32 @@ SUSPICIOUS_KEYWORDS = { |
| 308 | 314 | ('RegQueryValueExA', 'RegQueryValueEx', |
| 309 | 315 | 'RegRead', #with Wscript.Shell |
| 310 | 316 | ), |
| 311 | - 'May detect virtualisation': | |
| 312 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 317 | + 'May detect virtualization': | |
| 318 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 313 | 319 | (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'), |
| 314 | 320 | 'May detect Anubis Sandbox': |
| 315 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 316 | - #NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA | |
| 317 | - #ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf | |
| 318 | - ('GetVolumeInformationA', 'GetVolumeInformation', #with kernel32.dll | |
| 321 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 322 | + # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA | |
| 323 | + # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf | |
| 324 | + ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll | |
| 319 | 325 | '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId', |
| 320 | 326 | '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller' |
| 321 | 327 | ), |
| 322 | 328 | 'May detect Sandboxie': |
| 323 | - #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 324 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 329 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 330 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 325 | 331 | ('SbieDll.dll', 'SandboxieControlWndClass'), |
| 326 | 332 | 'May detect Sunbelt Sandbox': |
| 327 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 333 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 328 | 334 | (r'C:\file.exe',), |
| 329 | 335 | 'May detect Norman Sandbox': |
| 330 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 336 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 331 | 337 | ('currentuser',), |
| 332 | 338 | 'May detect CW Sandbox': |
| 333 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 339 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 334 | 340 | ('Schmidti',), |
| 335 | 341 | 'May detect WinJail Sandbox': |
| 336 | - #ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 342 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 337 | 343 | ('Afx:400000:0',), |
| 338 | 344 | } |
| 339 | 345 | |
| ... | ... | @@ -1215,8 +1221,12 @@ class VBA_Parser(object): |
| 1215 | 1221 | """ |
| 1216 | 1222 | Class to parse MS Office files, to detect VBA macros and extract VBA source code |
| 1217 | 1223 | Supported file formats: |
| 1218 | - - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) | |
| 1219 | - - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) | |
| 1224 | + - Word 97-2003 (.doc, .dot) | |
| 1225 | + - Word 2007+ (.docm, .dotm) | |
| 1226 | + - Word 2003 XML (.xml) | |
| 1227 | + - Word MHT - Single File Web Page / MHTML (.mht) | |
| 1228 | + - Excel 97-2003 (.xls) | |
| 1229 | + - Excel 2007+ (.xlsm, .xlsb) | |
| 1220 | 1230 | - PowerPoint 2007+ (.pptm, .ppsm) |
| 1221 | 1231 | """ |
| 1222 | 1232 | |
| ... | ... | @@ -1287,6 +1297,7 @@ class VBA_Parser(object): |
| 1287 | 1297 | # or a plain text file containing VBA code |
| 1288 | 1298 | if data is None: |
| 1289 | 1299 | data = open(filename, 'rb').read() |
| 1300 | + # TODO: move each format parser to a separate method | |
| 1290 | 1301 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 1291 | 1302 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 1292 | 1303 | logging.info('Opening Word 2003 XML file %s' % self.filename) |
| ... | ... | @@ -1308,6 +1319,33 @@ class VBA_Parser(object): |
| 1308 | 1319 | except: |
| 1309 | 1320 | logging.debug('%s is not a valid OLE file' % fname) |
| 1310 | 1321 | continue |
| 1322 | + # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | |
| 1323 | + # TODO: check if Word accepts data before the MIME header, if is case-sensitive, etc. | |
| 1324 | + elif data.lower().startswith('mime-version:'): | |
| 1325 | + logging.info('Opening Word MHTML file %s' % self.filename) | |
| 1326 | + self.type = TYPE_MHTML | |
| 1327 | + # parse the MIME content | |
| 1328 | + mhtml = email.message_from_string(data) | |
| 1329 | + # find all the attached files: | |
| 1330 | + for part in mhtml.walk(): | |
| 1331 | + content_type = part.get_content_type() # always returns a value | |
| 1332 | + fname = part.get_filename(None) # returns None if it fails | |
| 1333 | + logging.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type)) | |
| 1334 | + part_data = part.get_payload(decode=True) | |
| 1335 | + # VBA macros are stored in a binary file named "editdata.mso". | |
| 1336 | + # the data content is an OLE container for the VBA project, compressed | |
| 1337 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | |
| 1338 | + # decompress the zlib data starting at offset 0x32, which is the OLE container: | |
| 1339 | + try: | |
| 1340 | + ole_data = zlib.decompress(part_data[0x32:]) | |
| 1341 | + except: | |
| 1342 | + logging.debug('%s is not an ActiveMime container' % fname) | |
| 1343 | + continue | |
| 1344 | + try: | |
| 1345 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | |
| 1346 | + except: | |
| 1347 | + logging.debug('%s is not a valid OLE file' % fname) | |
| 1348 | + continue | |
| 1311 | 1349 | #TODO: handle exceptions |
| 1312 | 1350 | #TODO: Excel 2003 XML |
| 1313 | 1351 | #TODO: plain text VBA file |
| ... | ... | @@ -1575,6 +1613,8 @@ def process_file_triage (container, filename, data): |
| 1575 | 1613 | flags = 'OpX:' |
| 1576 | 1614 | elif vba.type == TYPE_Word2003_XML: |
| 1577 | 1615 | flags = 'XML:' |
| 1616 | + elif vba.type == TYPE_MHTML: | |
| 1617 | + flags = 'MHT:' | |
| 1578 | 1618 | macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-' |
| 1579 | 1619 | if nb_macros: macros = 'M' |
| 1580 | 1620 | if nb_autoexec: autoexec = 'A' |
| ... | ... | @@ -1664,6 +1704,7 @@ def main(): |
| 1664 | 1704 | # print banner with version |
| 1665 | 1705 | print 'olevba %s - http://decalage.info/python/oletools' % __version__ |
| 1666 | 1706 | |
| 1707 | + # TODO: option to set logging level, none by default | |
| 1667 | 1708 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) |
| 1668 | 1709 | # For now, all logging is disabled: |
| 1669 | 1710 | logging.disable(logging.CRITICAL) |
| ... | ... | @@ -1701,7 +1742,7 @@ def main(): |
| 1701 | 1742 | process_file_triage(container, filename, data) |
| 1702 | 1743 | count += 1 |
| 1703 | 1744 | if not options.detailed_mode or options.triage_mode: |
| 1704 | - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n' | |
| 1745 | + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, B=Base64 strings, D=Dridex strings, ?=Unknown)\n' | |
| 1705 | 1746 | |
| 1706 | 1747 | if count == 1 and not options.triage_mode and not options.detailed_mode: |
| 1707 | 1748 | # if options -t and -d were not specified and it's a single file, print details: | ... | ... |