Commit 4795c8b97dd14ff62c3e9bd9d2096dd3d3aeeace

Authored by Philippe Lagadec
1 parent 77842b93

olevba: added is_mso_file function

Showing 1 changed file with 17 additions and 2 deletions
oletools/olevba.py
@@ -132,8 +132,9 @@ https://github.com/unixfreak0037/officeparser @@ -132,8 +132,9 @@ https://github.com/unixfreak0037/officeparser
132 # (issue #10 reported by Greg from SpamStopsHere) 132 # (issue #10 reported by Greg from SpamStopsHere)
133 # 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header 133 # 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header
134 # (issue #11 reported by Thomas Chopitea) 134 # (issue #11 reported by Thomas Chopitea)
  135 +# 2015-05-26 v0.29 PL: - improved MSO files parsing (issue #12)
135 136
136 -__version__ = '0.28' 137 +__version__ = '0.29'
137 138
138 #------------------------------------------------------------------------------ 139 #------------------------------------------------------------------------------
139 # TODO: 140 # TODO:
@@ -406,6 +407,20 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') @@ -406,6 +407,20 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"')
406 # regex to check that it is not just a hex string: 407 # regex to check that it is not just a hex string:
407 re_nothex_check = re.compile(r'[G-Zg-z]') 408 re_nothex_check = re.compile(r'[G-Zg-z]')
408 409
  410 +
  411 +# === MSO/ActiveMime files parsing ===========================================
  412 +
  413 +def is_mso_file(data):
  414 + """
  415 + Check if the provided data is the content of a MSO/ActiveMime file, such as
  416 + the ones created by Outlook in some cases, or Word/Excel when saving a
  417 + file with the MHTML format or the Word 2003 XML format.
  418 + This function only checks the ActiveMime magic at the beginning of data.
  419 + :param data: bytes string
  420 + :return: bool, True if the file is MSO, False otherwise
  421 + """
  422 + return data.startswith(MSO_ACTIVEMIME_HEADER)
  423 +
409 #--- FUNCTIONS ---------------------------------------------------------------- 424 #--- FUNCTIONS ----------------------------------------------------------------
410 425
411 def copytoken_help(decompressed_current, decompressed_chunk_start): 426 def copytoken_help(decompressed_current, decompressed_chunk_start):
@@ -1373,7 +1388,7 @@ class VBA_Parser(object): @@ -1373,7 +1388,7 @@ class VBA_Parser(object):
1373 # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. 1388 # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
1374 # decompress the zlib data starting at offset 0x32, which is the OLE container: 1389 # decompress the zlib data starting at offset 0x32, which is the OLE container:
1375 # check ActiveMime header: 1390 # check ActiveMime header:
1376 - if isinstance(part_data, str) and part_data.startswith(MSO_ACTIVEMIME_HEADER): 1391 + if isinstance(part_data, str) and is_mso_file(part_data):
1377 logging.debug('Found ActiveMime header, decompressing MSO container') 1392 logging.debug('Found ActiveMime header, decompressing MSO container')
1378 try: 1393 try:
1379 ole_data = zlib.decompress(part_data[0x32:]) 1394 ole_data = zlib.decompress(part_data[0x32:])