Commit b7b13bb58d7e362a470ff0febed7d7714d0256d0
1 parent
31b535f3
olevba 0.53dev4: added support for Word/PowerPoint 2007+ XML format, aka Flat OPC (issue #283)
Showing
2 changed files
with
67 additions
and
2 deletions
oletools/olevba.py
| ... | ... | @@ -10,6 +10,7 @@ Supported formats: |
| 10 | 10 | - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) |
| 11 | 11 | - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) |
| 12 | 12 | - PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm) |
| 13 | +- Word/PowerPoint 2007+ XML (aka Flat OPC) | |
| 13 | 14 | - Word 2003 XML (.xml) |
| 14 | 15 | - Word/Excel Single File Web Page / MHTML (.mht) |
| 15 | 16 | - Publisher (.pub) |
| ... | ... | @@ -203,8 +204,10 @@ from __future__ import print_function |
| 203 | 204 | # 2017-11-24 PL: - added keywords to detect self-modifying macros and |
| 204 | 205 | # attempts to disable macro security (issue #221) |
| 205 | 206 | # 2018-03-19 PL: - removed pyparsing from the thirdparty subfolder |
| 207 | +# 2018-04-15 v0.53 PL: - added support for Word/PowerPoint 2007+ XML (FlatOPC) | |
| 208 | +# (issue #283) | |
| 206 | 209 | |
| 207 | -__version__ = '0.52.3' | |
| 210 | +__version__ = '0.53dev4' | |
| 208 | 211 | |
| 209 | 212 | #------------------------------------------------------------------------------ |
| 210 | 213 | # TODO: |
| ... | ... | @@ -482,6 +485,7 @@ MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES |
| 482 | 485 | # Container types: |
| 483 | 486 | TYPE_OLE = 'OLE' |
| 484 | 487 | TYPE_OpenXML = 'OpenXML' |
| 488 | +TYPE_FlatOPC_XML = 'FlatOPC_XML' | |
| 485 | 489 | TYPE_Word2003_XML = 'Word2003_XML' |
| 486 | 490 | TYPE_MHTML = 'MHTML' |
| 487 | 491 | TYPE_TEXT = 'Text' |
| ... | ... | @@ -491,6 +495,7 @@ TYPE_PPT = 'PPT' |
| 491 | 495 | TYPE2TAG = { |
| 492 | 496 | TYPE_OLE: 'OLE:', |
| 493 | 497 | TYPE_OpenXML: 'OpX:', |
| 498 | + TYPE_FlatOPC_XML: 'FlX:', | |
| 494 | 499 | TYPE_Word2003_XML: 'XML:', |
| 495 | 500 | TYPE_MHTML: 'MHT:', |
| 496 | 501 | TYPE_TEXT: 'TXT:', |
| ... | ... | @@ -511,6 +516,18 @@ NS_W = '{http://schemas.microsoft.com/office/word/2003/wordml}' |
| 511 | 516 | TAG_BINDATA = NS_W + 'binData' |
| 512 | 517 | ATTR_NAME = NS_W + 'name' |
| 513 | 518 | |
| 519 | +# Namespaces and tags for Word/PowerPoint 2007+ XML parsing: | |
| 520 | +# root: <pkg:package xmlns:pkg="http://schemas.microsoft.com/office/2006/xmlPackage"> | |
| 521 | +NS_XMLPACKAGE = '{http://schemas.microsoft.com/office/2006/xmlPackage}' | |
| 522 | +TAG_PACKAGE = NS_XMLPACKAGE + 'package' | |
| 523 | +# the tag <pkg:part> includes <pkg:binaryData> that contains the VBA macro code in Base64: | |
| 524 | +# <pkg:part pkg:name="/word/vbaProject.bin" pkg:contentType="application/vnd.ms-office.vbaProject"><pkg:binaryData> | |
| 525 | +TAG_PKGPART = NS_XMLPACKAGE + 'part' | |
| 526 | +ATTR_PKG_NAME = NS_XMLPACKAGE + 'name' | |
| 527 | +ATTR_PKG_CONTENTTYPE = NS_XMLPACKAGE + 'contentType' | |
| 528 | +CTYPE_VBAPROJECT = "application/vnd.ms-office.vbaProject" | |
| 529 | +TAG_PKGBINDATA = NS_XMLPACKAGE + 'binaryData' | |
| 530 | + | |
| 514 | 531 | # Keywords to detect auto-executable macros |
| 515 | 532 | AUTOEXEC_KEYWORDS = { |
| 516 | 533 | # MS Word: |
| ... | ... | @@ -2343,6 +2360,9 @@ class VBA_Parser(object): |
| 2343 | 2360 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 2344 | 2361 | if b'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 2345 | 2362 | self.open_word2003xml(data) |
| 2363 | + # check if it is a Word/PowerPoint 2007+ XML file (Flat OPC): must contain the namespace | |
| 2364 | + if b'http://schemas.microsoft.com/office/2006/xmlPackage' in data: | |
| 2365 | + self.open_flatopc(data) | |
| 2346 | 2366 | # store a lowercase version for the next tests: |
| 2347 | 2367 | data_lowercase = data.lower() |
| 2348 | 2368 | # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): |
| ... | ... | @@ -2493,6 +2513,51 @@ class VBA_Parser(object): |
| 2493 | 2513 | log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc)) |
| 2494 | 2514 | log.debug('Trace:', exc_info=True) |
| 2495 | 2515 | |
| 2516 | + def open_flatopc(self, data): | |
| 2517 | + """ | |
| 2518 | + Open a Word or PowerPoint 2007+ XML file, aka "Flat OPC" | |
| 2519 | + :param data: file contents in a string or bytes | |
| 2520 | + :return: nothing | |
| 2521 | + """ | |
| 2522 | + log.info('Opening Flat OPC Word/PowerPoint XML file %s' % self.filename) | |
| 2523 | + try: | |
| 2524 | + # parse the XML content | |
| 2525 | + # TODO: handle XML parsing exceptions | |
| 2526 | + et = ET.fromstring(data) | |
| 2527 | + # TODO: check root node namespace and tag | |
| 2528 | + # find all the pkg:part elements: | |
| 2529 | + for pkgpart in et.iter(TAG_PKGPART): | |
| 2530 | + fname = pkgpart.get(ATTR_PKG_NAME, 'unknown') | |
| 2531 | + content_type = pkgpart.get(ATTR_PKG_CONTENTTYPE, 'unknown') | |
| 2532 | + if content_type == CTYPE_VBAPROJECT: | |
| 2533 | + for bindata in pkgpart.iterfind(TAG_PKGBINDATA): | |
| 2534 | + try: | |
| 2535 | + ole_data = binascii.a2b_base64(bindata.text) | |
| 2536 | + self.ole_subfiles.append( | |
| 2537 | + VBA_Parser(filename=fname, data=ole_data, | |
| 2538 | + relaxed=self.relaxed)) | |
| 2539 | + except OlevbaBaseException as exc: | |
| 2540 | + if self.relaxed: | |
| 2541 | + log.info('Error parsing subfile {0}: {1}' | |
| 2542 | + .format(fname, exc)) | |
| 2543 | + log.debug('Trace:', exc_info=True) | |
| 2544 | + else: | |
| 2545 | + raise SubstreamOpenError(self.filename, fname, exc) | |
| 2546 | + # set type only if parsing succeeds | |
| 2547 | + self.type = TYPE_FlatOPC_XML | |
| 2548 | + except OlevbaBaseException as exc: | |
| 2549 | + if self.relaxed: | |
| 2550 | + log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc)) | |
| 2551 | + log.debug('Trace:', exc_info=True) | |
| 2552 | + else: | |
| 2553 | + raise | |
| 2554 | + except Exception as exc: | |
| 2555 | + # TODO: differentiate exceptions for each parsing stage | |
| 2556 | + # (but ET is different libs, no good exception description in API) | |
| 2557 | + # found: XMLSyntaxError | |
| 2558 | + log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc)) | |
| 2559 | + log.debug('Trace:', exc_info=True) | |
| 2560 | + | |
| 2496 | 2561 | def open_mht(self, data): |
| 2497 | 2562 | """ |
| 2498 | 2563 | Open a MHTML file | ... | ... |
setup.py
| ... | ... | @@ -43,7 +43,7 @@ import os, fnmatch |
| 43 | 43 | #--- METADATA ----------------------------------------------------------------- |
| 44 | 44 | |
| 45 | 45 | name = "oletools" |
| 46 | -version = '0.53dev3' | |
| 46 | +version = '0.53dev4' | |
| 47 | 47 | desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" |
| 48 | 48 | long_desc = open('oletools/README.rst').read() |
| 49 | 49 | author = "Philippe Lagadec" | ... | ... |