Commit 4795c8b97dd14ff62c3e9bd9d2096dd3d3aeeace
1 parent
77842b93
olevba: added is_mso_file function
Showing
1 changed file
with
17 additions
and
2 deletions
oletools/olevba.py
| @@ -132,8 +132,9 @@ https://github.com/unixfreak0037/officeparser | @@ -132,8 +132,9 @@ https://github.com/unixfreak0037/officeparser | ||
| 132 | # (issue #10 reported by Greg from SpamStopsHere) | 132 | # (issue #10 reported by Greg from SpamStopsHere) |
| 133 | # 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header | 133 | # 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header |
| 134 | # (issue #11 reported by Thomas Chopitea) | 134 | # (issue #11 reported by Thomas Chopitea) |
| 135 | +# 2015-05-26 v0.29 PL: - improved MSO files parsing (issue #12) | ||
| 135 | 136 | ||
| 136 | -__version__ = '0.28' | 137 | +__version__ = '0.29' |
| 137 | 138 | ||
| 138 | #------------------------------------------------------------------------------ | 139 | #------------------------------------------------------------------------------ |
| 139 | # TODO: | 140 | # TODO: |
| @@ -406,6 +407,20 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') | @@ -406,6 +407,20 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') | ||
| 406 | # regex to check that it is not just a hex string: | 407 | # regex to check that it is not just a hex string: |
| 407 | re_nothex_check = re.compile(r'[G-Zg-z]') | 408 | re_nothex_check = re.compile(r'[G-Zg-z]') |
| 408 | 409 | ||
| 410 | + | ||
| 411 | +# === MSO/ActiveMime files parsing =========================================== | ||
| 412 | + | ||
| 413 | +def is_mso_file(data): | ||
| 414 | + """ | ||
| 415 | + Check if the provided data is the content of a MSO/ActiveMime file, such as | ||
| 416 | + the ones created by Outlook in some cases, or Word/Excel when saving a | ||
| 417 | + file with the MHTML format or the Word 2003 XML format. | ||
| 418 | + This function only checks the ActiveMime magic at the beginning of data. | ||
| 419 | + :param data: bytes string | ||
| 420 | + :return: bool, True if the file is MSO, False otherwise | ||
| 421 | + """ | ||
| 422 | + return data.startswith(MSO_ACTIVEMIME_HEADER) | ||
| 423 | + | ||
| 409 | #--- FUNCTIONS ---------------------------------------------------------------- | 424 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 410 | 425 | ||
| 411 | def copytoken_help(decompressed_current, decompressed_chunk_start): | 426 | def copytoken_help(decompressed_current, decompressed_chunk_start): |
| @@ -1373,7 +1388,7 @@ class VBA_Parser(object): | @@ -1373,7 +1388,7 @@ class VBA_Parser(object): | ||
| 1373 | # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | 1388 | # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. |
| 1374 | # decompress the zlib data starting at offset 0x32, which is the OLE container: | 1389 | # decompress the zlib data starting at offset 0x32, which is the OLE container: |
| 1375 | # check ActiveMime header: | 1390 | # check ActiveMime header: |
| 1376 | - if isinstance(part_data, str) and part_data.startswith(MSO_ACTIVEMIME_HEADER): | 1391 | + if isinstance(part_data, str) and is_mso_file(part_data): |
| 1377 | logging.debug('Found ActiveMime header, decompressing MSO container') | 1392 | logging.debug('Found ActiveMime header, decompressing MSO container') |
| 1378 | try: | 1393 | try: |
| 1379 | ole_data = zlib.decompress(part_data[0x32:]) | 1394 | ole_data = zlib.decompress(part_data[0x32:]) |