Commit 4795c8b97dd14ff62c3e9bd9d2096dd3d3aeeace
1 parent
77842b93
olevba: added is_mso_file function
Showing
1 changed file
with
17 additions
and
2 deletions
oletools/olevba.py
| ... | ... | @@ -132,8 +132,9 @@ https://github.com/unixfreak0037/officeparser |
| 132 | 132 | # (issue #10 reported by Greg from SpamStopsHere) |
| 133 | 133 | # 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header |
| 134 | 134 | # (issue #11 reported by Thomas Chopitea) |
| 135 | +# 2015-05-26 v0.29 PL: - improved MSO files parsing (issue #12) | |
| 135 | 136 | |
| 136 | -__version__ = '0.28' | |
| 137 | +__version__ = '0.29' | |
| 137 | 138 | |
| 138 | 139 | #------------------------------------------------------------------------------ |
| 139 | 140 | # TODO: |
| ... | ... | @@ -406,6 +407,20 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') |
| 406 | 407 | # regex to check that it is not just a hex string: |
| 407 | 408 | re_nothex_check = re.compile(r'[G-Zg-z]') |
| 408 | 409 | |
| 410 | + | |
| 411 | +# === MSO/ActiveMime files parsing =========================================== | |
| 412 | + | |
| 413 | +def is_mso_file(data): | |
| 414 | + """ | |
| 415 | + Check if the provided data is the content of a MSO/ActiveMime file, such as | |
| 416 | + the ones created by Outlook in some cases, or Word/Excel when saving a | |
| 417 | + file with the MHTML format or the Word 2003 XML format. | |
| 418 | + This function only checks the ActiveMime magic at the beginning of data. | |
| 419 | + :param data: bytes string | |
| 420 | + :return: bool, True if the file is MSO, False otherwise | |
| 421 | + """ | |
| 422 | + return data.startswith(MSO_ACTIVEMIME_HEADER) | |
| 423 | + | |
| 409 | 424 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 410 | 425 | |
| 411 | 426 | def copytoken_help(decompressed_current, decompressed_chunk_start): |
| ... | ... | @@ -1373,7 +1388,7 @@ class VBA_Parser(object): |
| 1373 | 1388 | # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. |
| 1374 | 1389 | # decompress the zlib data starting at offset 0x32, which is the OLE container: |
| 1375 | 1390 | # check ActiveMime header: |
| 1376 | - if isinstance(part_data, str) and part_data.startswith(MSO_ACTIVEMIME_HEADER): | |
| 1391 | + if isinstance(part_data, str) and is_mso_file(part_data): | |
| 1377 | 1392 | logging.debug('Found ActiveMime header, decompressing MSO container') |
| 1378 | 1393 | try: |
| 1379 | 1394 | ole_data = zlib.decompress(part_data[0x32:]) | ... | ... |