Commit 4795c8b97dd14ff62c3e9bd9d2096dd3d3aeeace

Authored by Philippe Lagadec
1 parent 77842b93

olevba: added is_mso_file function

Showing 1 changed file with 17 additions and 2 deletions
oletools/olevba.py
... ... @@ -132,8 +132,9 @@ https://github.com/unixfreak0037/officeparser
132 132 # (issue #10 reported by Greg from SpamStopsHere)
133 133 # 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header
134 134 # (issue #11 reported by Thomas Chopitea)
  135 +# 2015-05-26 v0.29 PL: - improved MSO files parsing (issue #12)
135 136  
136   -__version__ = '0.28'
  137 +__version__ = '0.29'
137 138  
138 139 #------------------------------------------------------------------------------
139 140 # TODO:
... ... @@ -406,6 +407,20 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"')
406 407 # regex to check that it is not just a hex string:
407 408 re_nothex_check = re.compile(r'[G-Zg-z]')
408 409  
  410 +
  411 +# === MSO/ActiveMime files parsing ===========================================
  412 +
  413 +def is_mso_file(data):
  414 + """
  415 + Check if the provided data is the content of a MSO/ActiveMime file, such as
  416 + the ones created by Outlook in some cases, or Word/Excel when saving a
  417 + file with the MHTML format or the Word 2003 XML format.
  418 + This function only checks the ActiveMime magic at the beginning of data.
  419 + :param data: bytes string
  420 + :return: bool, True if the file is MSO, False otherwise
  421 + """
  422 + return data.startswith(MSO_ACTIVEMIME_HEADER)
  423 +
409 424 #--- FUNCTIONS ----------------------------------------------------------------
410 425  
411 426 def copytoken_help(decompressed_current, decompressed_chunk_start):
... ... @@ -1373,7 +1388,7 @@ class VBA_Parser(object):
1373 1388 # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
1374 1389 # decompress the zlib data starting at offset 0x32, which is the OLE container:
1375 1390 # check ActiveMime header:
1376   - if isinstance(part_data, str) and part_data.startswith(MSO_ACTIVEMIME_HEADER):
  1391 + if isinstance(part_data, str) and is_mso_file(part_data):
1377 1392 logging.debug('Found ActiveMime header, decompressing MSO container')
1378 1393 try:
1379 1394 ole_data = zlib.decompress(part_data[0x32:])
... ...