Commit e636b4f8ad1666964596f6a7cae79414d32571bd
1 parent
1541d5de
olevba: reverted to python 2.7 version, moved python 3 version to olevba3.py
Showing
2 changed files
with
3465 additions
and
93 deletions
oletools/olevba.py
| ... | ... | @@ -215,7 +215,7 @@ __version__ = '0.50' |
| 215 | 215 | |
| 216 | 216 | import sys, logging |
| 217 | 217 | import struct |
| 218 | -from _io import StringIO,BytesIO | |
| 218 | +import cStringIO | |
| 219 | 219 | import math |
| 220 | 220 | import zipfile |
| 221 | 221 | import re |
| ... | ... | @@ -240,9 +240,9 @@ except ImportError: |
| 240 | 240 | # Python <2.5: standalone ElementTree install |
| 241 | 241 | import elementtree.cElementTree as ET |
| 242 | 242 | except ImportError: |
| 243 | - raise(ImportError, "lxml or ElementTree are not installed, " \ | |
| 243 | + raise ImportError, "lxml or ElementTree are not installed, " \ | |
| 244 | 244 | + "see http://codespeak.net/lxml " \ |
| 245 | - + "or http://effbot.org/zone/element-index.htm") | |
| 245 | + + "or http://effbot.org/zone/element-index.htm" | |
| 246 | 246 | |
| 247 | 247 | import thirdparty.olefile as olefile |
| 248 | 248 | from thirdparty.prettytable import prettytable |
| ... | ... | @@ -421,7 +421,7 @@ TYPE2TAG = { |
| 421 | 421 | |
| 422 | 422 | |
| 423 | 423 | # MSO files ActiveMime header magic |
| 424 | -MSO_ACTIVEMIME_HEADER = b'ActiveMime' | |
| 424 | +MSO_ACTIVEMIME_HEADER = 'ActiveMime' | |
| 425 | 425 | |
| 426 | 426 | MODULE_EXTENSION = "bas" |
| 427 | 427 | CLASS_EXTENSION = "cls" |
| ... | ... | @@ -630,7 +630,7 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') |
| 630 | 630 | re_nothex_check = re.compile(r'[G-Zg-z]') |
| 631 | 631 | |
| 632 | 632 | # regex to extract printable strings (at least 5 chars) from VBA Forms: |
| 633 | -re_printable_string = re.compile(rb'[\t\r\n\x20-\xFF]{5,}') | |
| 633 | +re_printable_string = re.compile(r'[\t\r\n\x20-\xFF]{5,}') | |
| 634 | 634 | |
| 635 | 635 | |
| 636 | 636 | # === PARTIAL VBA GRAMMAR ==================================================== |
| ... | ... | @@ -1060,10 +1060,10 @@ def decompress_stream(compressed_container): |
| 1060 | 1060 | # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the |
| 1061 | 1061 | # DecompressedBuffer (section 2.4.1.1.2). |
| 1062 | 1062 | |
| 1063 | - decompressed_container = b'' # result | |
| 1063 | + decompressed_container = '' # result | |
| 1064 | 1064 | compressed_current = 0 |
| 1065 | 1065 | |
| 1066 | - sig_byte = compressed_container[compressed_current] | |
| 1066 | + sig_byte = ord(compressed_container[compressed_current]) | |
| 1067 | 1067 | if sig_byte != 0x01: |
| 1068 | 1068 | raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) |
| 1069 | 1069 | |
| ... | ... | @@ -1109,7 +1109,7 @@ def decompress_stream(compressed_container): |
| 1109 | 1109 | # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk |
| 1110 | 1110 | # uncompressed chunk: read the next 4096 bytes as-is |
| 1111 | 1111 | #TODO: check if there are at least 4096 bytes left |
| 1112 | - decompressed_container += bytes([compressed_container[compressed_current:compressed_current + 4096]]) | |
| 1112 | + decompressed_container += compressed_container[compressed_current:compressed_current + 4096] | |
| 1113 | 1113 | compressed_current += 4096 |
| 1114 | 1114 | else: |
| 1115 | 1115 | # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk |
| ... | ... | @@ -1120,9 +1120,9 @@ def decompress_stream(compressed_container): |
| 1120 | 1120 | # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) |
| 1121 | 1121 | # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or |
| 1122 | 1122 | # copy tokens (reference to a previous literal token) |
| 1123 | - flag_byte = compressed_container[compressed_current] | |
| 1123 | + flag_byte = ord(compressed_container[compressed_current]) | |
| 1124 | 1124 | compressed_current += 1 |
| 1125 | - for bit_index in range(0, 8): | |
| 1125 | + for bit_index in xrange(0, 8): | |
| 1126 | 1126 | # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) |
| 1127 | 1127 | if compressed_current >= compressed_end: |
| 1128 | 1128 | break |
| ... | ... | @@ -1132,7 +1132,7 @@ def decompress_stream(compressed_container): |
| 1132 | 1132 | #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) |
| 1133 | 1133 | if flag_bit == 0: # LiteralToken |
| 1134 | 1134 | # copy one byte directly to output |
| 1135 | - decompressed_container += bytes([compressed_container[compressed_current]]) | |
| 1135 | + decompressed_container += compressed_container[compressed_current] | |
| 1136 | 1136 | compressed_current += 1 |
| 1137 | 1137 | else: # CopyToken |
| 1138 | 1138 | # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken |
| ... | ... | @@ -1147,8 +1147,8 @@ def decompress_stream(compressed_container): |
| 1147 | 1147 | offset = (temp1 >> temp2) + 1 |
| 1148 | 1148 | #log.debug('offset=%d length=%d' % (offset, length)) |
| 1149 | 1149 | copy_source = len(decompressed_container) - offset |
| 1150 | - for index in range(copy_source, copy_source + length): | |
| 1151 | - decompressed_container += bytes([decompressed_container[index]]) | |
| 1150 | + for index in xrange(copy_source, copy_source + length): | |
| 1151 | + decompressed_container += decompressed_container[index] | |
| 1152 | 1152 | compressed_current += 2 |
| 1153 | 1153 | return decompressed_container |
| 1154 | 1154 | |
| ... | ... | @@ -1191,7 +1191,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1191 | 1191 | code_modules = {} |
| 1192 | 1192 | |
| 1193 | 1193 | for line in project: |
| 1194 | - line = line.strip().decode('utf-8','ignore') | |
| 1194 | + line = line.strip() | |
| 1195 | 1195 | if '=' in line: |
| 1196 | 1196 | # split line at the 1st equal sign: |
| 1197 | 1197 | name, value = line.split('=', 1) |
| ... | ... | @@ -1222,7 +1222,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1222 | 1222 | else: |
| 1223 | 1223 | raise UnexpectedDataError(dir_path, name, expected, value) |
| 1224 | 1224 | |
| 1225 | - dir_stream = BytesIO(decompress_stream(dir_compressed)) | |
| 1225 | + dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed)) | |
| 1226 | 1226 | |
| 1227 | 1227 | # PROJECTSYSKIND Record |
| 1228 | 1228 | projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] |
| ... | ... | @@ -1484,7 +1484,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1484 | 1484 | uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') |
| 1485 | 1485 | |
| 1486 | 1486 | log.debug("parsing {0} modules".format(projectmodules_count)) |
| 1487 | - for projectmodule_index in range(0, projectmodules_count): | |
| 1487 | + for projectmodule_index in xrange(0, projectmodules_count): | |
| 1488 | 1488 | try: |
| 1489 | 1489 | modulename_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1490 | 1490 | check_value('MODULENAME_Id', 0x0019, modulename_id) |
| ... | ... | @@ -1881,19 +1881,19 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): |
| 1881 | 1881 | pass |
| 1882 | 1882 | elif isinstance(json_obj, str): |
| 1883 | 1883 | # de-code and re-encode |
| 1884 | - dencoded = json_obj | |
| 1884 | + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) | |
| 1885 | 1885 | if dencoded != json_obj: |
| 1886 | 1886 | log.debug('json2ascii: replaced: {0} (len {1})' |
| 1887 | 1887 | .format(json_obj, len(json_obj))) |
| 1888 | 1888 | log.debug('json2ascii: with: {0} (len {1})' |
| 1889 | 1889 | .format(dencoded, len(dencoded))) |
| 1890 | 1890 | return dencoded |
| 1891 | - elif isinstance(json_obj, bytes): | |
| 1891 | + elif isinstance(json_obj, unicode): | |
| 1892 | 1892 | log.debug('json2ascii: encode unicode: {0}' |
| 1893 | - .format(json_obj.decode(encoding, errors))) | |
| 1893 | + .format(json_obj.encode(encoding, errors))) | |
| 1894 | 1894 | # cannot put original into logger |
| 1895 | 1895 | # print 'original: ' json_obj |
| 1896 | - return json_obj.decode(encoding, errors) | |
| 1896 | + return json_obj.encode(encoding, errors) | |
| 1897 | 1897 | elif isinstance(json_obj, dict): |
| 1898 | 1898 | for key in json_obj: |
| 1899 | 1899 | json_obj[key] = json2ascii(json_obj[key]) |
| ... | ... | @@ -1931,18 +1931,18 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts): |
| 1931 | 1931 | json_dict = json_parts |
| 1932 | 1932 | |
| 1933 | 1933 | if not _have_printed_json_start: |
| 1934 | - print('[') | |
| 1934 | + print '[' | |
| 1935 | 1935 | _have_printed_json_start = True |
| 1936 | 1936 | |
| 1937 | 1937 | lines = json.dumps(json2ascii(json_dict), check_circular=False, |
| 1938 | 1938 | indent=4, ensure_ascii=False).splitlines() |
| 1939 | 1939 | for line in lines[:-1]: |
| 1940 | - print(' {0}'.format(line)) | |
| 1940 | + print ' {0}'.format(line) | |
| 1941 | 1941 | if _json_is_last: |
| 1942 | - print(' {0}'.format(lines[-1])) # print last line without comma | |
| 1943 | - print(']') | |
| 1942 | + print ' {0}'.format(lines[-1]) # print last line without comma | |
| 1943 | + print ']' | |
| 1944 | 1944 | else: |
| 1945 | - print(' {0},'.format(lines[-1])) # print last line with comma | |
| 1945 | + print ' {0},'.format(lines[-1]) # print last line with comma | |
| 1946 | 1946 | |
| 1947 | 1947 | |
| 1948 | 1948 | class VBA_Scanner(object): |
| ... | ... | @@ -1959,10 +1959,10 @@ class VBA_Scanner(object): |
| 1959 | 1959 | """ |
| 1960 | 1960 | # join long lines ending with " _": |
| 1961 | 1961 | self.code = vba_collapse_long_lines(vba_code) |
| 1962 | - self.code_hex = b'' | |
| 1963 | - self.code_hex_rev = b'' | |
| 1964 | - self.code_rev_hex = b'' | |
| 1965 | - self.code_base64 = b'' | |
| 1962 | + self.code_hex = '' | |
| 1963 | + self.code_hex_rev = '' | |
| 1964 | + self.code_rev_hex = '' | |
| 1965 | + self.code_base64 = '' | |
| 1966 | 1966 | self.code_dridex = '' |
| 1967 | 1967 | self.code_vba = '' |
| 1968 | 1968 | self.strReverse = None |
| ... | ... | @@ -1995,19 +1995,19 @@ class VBA_Scanner(object): |
| 1995 | 1995 | if 'strreverse' in self.code.lower(): self.strReverse = True |
| 1996 | 1996 | # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: |
| 1997 | 1997 | for encoded, decoded in self.hex_strings: |
| 1998 | - self.code_hex += b'\n' + decoded | |
| 1998 | + self.code_hex += '\n' + decoded | |
| 1999 | 1999 | # if the code contains "StrReverse", also append the hex strings in reverse order: |
| 2000 | 2000 | if self.strReverse: |
| 2001 | 2001 | # StrReverse after hex decoding: |
| 2002 | - self.code_hex_rev += b'\n' + decoded[::-1] | |
| 2002 | + self.code_hex_rev += '\n' + decoded[::-1] | |
| 2003 | 2003 | # StrReverse before hex decoding: |
| 2004 | - self.code_rev_hex += b'\n' + binascii.unhexlify(encoded[::-1]) | |
| 2004 | + self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1]) | |
| 2005 | 2005 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 2006 | 2006 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 2007 | 2007 | # Detect Base64-encoded strings |
| 2008 | 2008 | self.base64_strings = detect_base64_strings(self.code) |
| 2009 | 2009 | for encoded, decoded in self.base64_strings: |
| 2010 | - self.code_base64 += b'\n' + decoded | |
| 2010 | + self.code_base64 += '\n' + decoded | |
| 2011 | 2011 | # Detect Dridex-encoded strings |
| 2012 | 2012 | self.dridex_strings = detect_dridex_strings(self.code) |
| 2013 | 2013 | for encoded, decoded in self.dridex_strings: |
| ... | ... | @@ -2026,15 +2026,13 @@ class VBA_Scanner(object): |
| 2026 | 2026 | |
| 2027 | 2027 | for code, obfuscation in ( |
| 2028 | 2028 | (self.code, None), |
| 2029 | - (self.code_hex.decode('utf-8','replace'), 'Hex'), | |
| 2029 | + (self.code_hex, 'Hex'), | |
| 2030 | 2030 | (self.code_hex_rev, 'Hex+StrReverse'), |
| 2031 | 2031 | (self.code_rev_hex, 'StrReverse+Hex'), |
| 2032 | - (self.code_base64.decode('utf-8', 'replace'), 'Base64'), | |
| 2032 | + (self.code_base64, 'Base64'), | |
| 2033 | 2033 | (self.code_dridex, 'Dridex'), |
| 2034 | 2034 | (self.code_vba, 'VBA expression'), |
| 2035 | 2035 | ): |
| 2036 | - if isinstance(code,bytes): | |
| 2037 | - code=code.decode('utf-8','replace') | |
| 2038 | 2036 | self.autoexec_keywords += detect_autoexec(code, obfuscation) |
| 2039 | 2037 | self.suspicious_keywords += detect_suspicious(code, obfuscation) |
| 2040 | 2038 | self.iocs += detect_patterns(code, obfuscation) |
| ... | ... | @@ -2160,7 +2158,7 @@ class VBA_Parser(object): |
| 2160 | 2158 | _file = filename |
| 2161 | 2159 | else: |
| 2162 | 2160 | # file already read in memory, make it a file-like object for zipfile: |
| 2163 | - _file = BytesIO(data) | |
| 2161 | + _file = cStringIO.StringIO(data) | |
| 2164 | 2162 | #self.file = _file |
| 2165 | 2163 | self.ole_file = None |
| 2166 | 2164 | self.ole_subfiles = [] |
| ... | ... | @@ -2209,7 +2207,7 @@ class VBA_Parser(object): |
| 2209 | 2207 | if data is None: |
| 2210 | 2208 | data = open(filename, 'rb').read() |
| 2211 | 2209 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 2212 | - if b'http://schemas.microsoft.com/office/word/2003/wordml' in data: | |
| 2210 | + if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | |
| 2213 | 2211 | self.open_word2003xml(data) |
| 2214 | 2212 | # store a lowercase version for the next tests: |
| 2215 | 2213 | data_lowercase = data.lower() |
| ... | ... | @@ -2219,14 +2217,14 @@ class VBA_Parser(object): |
| 2219 | 2217 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. |
| 2220 | 2218 | # And the line is case insensitive. |
| 2221 | 2219 | # so we'll just check the presence of mime, version and multipart anywhere: |
| 2222 | - if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \ | |
| 2223 | - and b'multipart' in data_lowercase: | |
| 2220 | + if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \ | |
| 2221 | + and 'multipart' in data_lowercase: | |
| 2224 | 2222 | self.open_mht(data) |
| 2225 | 2223 | #TODO: handle exceptions |
| 2226 | 2224 | #TODO: Excel 2003 XML |
| 2227 | 2225 | # Check if this is a plain text VBA or VBScript file: |
| 2228 | 2226 | # To avoid scanning binary files, we simply check for some control chars: |
| 2229 | - if self.type is None and b'\x00' not in data: | |
| 2227 | + if self.type is None and '\x00' not in data: | |
| 2230 | 2228 | self.open_text(data) |
| 2231 | 2229 | if self.type is None: |
| 2232 | 2230 | # At this stage, could not match a known format: |
| ... | ... | @@ -2360,8 +2358,6 @@ class VBA_Parser(object): |
| 2360 | 2358 | """ |
| 2361 | 2359 | log.info('Opening MHTML file %s' % self.filename) |
| 2362 | 2360 | try: |
| 2363 | - if isinstance(data,bytes): | |
| 2364 | - data = data.decode('utf8', 'replace') | |
| 2365 | 2361 | # parse the MIME content |
| 2366 | 2362 | # remove any leading whitespace or newline (workaround for issue in email package) |
| 2367 | 2363 | stripped_data = data.lstrip('\r\n\t ') |
| ... | ... | @@ -2391,8 +2387,7 @@ class VBA_Parser(object): |
| 2391 | 2387 | # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. |
| 2392 | 2388 | # decompress the zlib data starting at offset 0x32, which is the OLE container: |
| 2393 | 2389 | # check ActiveMime header: |
| 2394 | - | |
| 2395 | - if (isinstance(part_data, str) or isinstance(part_data, bytes)) and is_mso_file(part_data): | |
| 2390 | + if isinstance(part_data, str) and is_mso_file(part_data): | |
| 2396 | 2391 | log.debug('Found ActiveMime header, decompressing MSO container') |
| 2397 | 2392 | try: |
| 2398 | 2393 | ole_data = mso_file_extract(part_data) |
| ... | ... | @@ -2463,8 +2458,6 @@ class VBA_Parser(object): |
| 2463 | 2458 | """ |
| 2464 | 2459 | log.info('Opening text file %s' % self.filename) |
| 2465 | 2460 | # directly store the source code: |
| 2466 | - if isinstance(data,bytes): | |
| 2467 | - data=data.decode('utf8','replace') | |
| 2468 | 2461 | self.vba_code_all_modules = data |
| 2469 | 2462 | self.contains_macros = True |
| 2470 | 2463 | # set type only if parsing succeeds |
| ... | ... | @@ -2603,7 +2596,7 @@ class VBA_Parser(object): |
| 2603 | 2596 | # Also look for VBA code in any stream including orphans |
| 2604 | 2597 | # (happens in some malformed files) |
| 2605 | 2598 | ole = self.ole_file |
| 2606 | - for sid in range(len(ole.direntries)): | |
| 2599 | + for sid in xrange(len(ole.direntries)): | |
| 2607 | 2600 | # check if id is already done above: |
| 2608 | 2601 | log.debug('Checking DirEntry #%d' % sid) |
| 2609 | 2602 | d = ole.direntries[sid] |
| ... | ... | @@ -2621,7 +2614,7 @@ class VBA_Parser(object): |
| 2621 | 2614 | log.debug('%r...[much more data]...%r' % (data[:100], data[-50:])) |
| 2622 | 2615 | else: |
| 2623 | 2616 | log.debug(repr(data)) |
| 2624 | - if 'Attribut' in data.decode('utf-8','ignore'): | |
| 2617 | + if 'Attribut' in data: | |
| 2625 | 2618 | log.debug('Found VBA compressed code') |
| 2626 | 2619 | self.contains_macros = True |
| 2627 | 2620 | except IOError as exc: |
| ... | ... | @@ -2669,7 +2662,7 @@ class VBA_Parser(object): |
| 2669 | 2662 | # Also look for VBA code in any stream including orphans |
| 2670 | 2663 | # (happens in some malformed files) |
| 2671 | 2664 | ole = self.ole_file |
| 2672 | - for sid in range(len(ole.direntries)): | |
| 2665 | + for sid in xrange(len(ole.direntries)): | |
| 2673 | 2666 | # check if id is already done above: |
| 2674 | 2667 | log.debug('Checking DirEntry #%d' % sid) |
| 2675 | 2668 | if sid in vba_stream_ids: |
| ... | ... | @@ -2684,7 +2677,7 @@ class VBA_Parser(object): |
| 2684 | 2677 | # read data |
| 2685 | 2678 | log.debug('Reading data from stream %r' % d.name) |
| 2686 | 2679 | data = ole._open(d.isectStart, d.size).read() |
| 2687 | - for match in re.finditer(rb'\x00Attribut[^e]', data, flags=re.IGNORECASE): | |
| 2680 | + for match in re.finditer(r'\x00Attribut[^e]', data, flags=re.IGNORECASE): | |
| 2688 | 2681 | start = match.start() - 3 |
| 2689 | 2682 | log.debug('Found VBA compressed code at index %X' % start) |
| 2690 | 2683 | compressed_code = data[start:] |
| ... | ... | @@ -2727,9 +2720,9 @@ class VBA_Parser(object): |
| 2727 | 2720 | self.vba_code_all_modules = '' |
| 2728 | 2721 | for (_, _, _, vba_code) in self.extract_all_macros(): |
| 2729 | 2722 | #TODO: filter code? (each module) |
| 2730 | - self.vba_code_all_modules += vba_code.decode('utf-8', 'ignore') + '\n' | |
| 2723 | + self.vba_code_all_modules += vba_code + '\n' | |
| 2731 | 2724 | for (_, _, form_string) in self.extract_form_strings(): |
| 2732 | - self.vba_code_all_modules += form_string.decode('utf-8', 'ignore') + '\n' | |
| 2725 | + self.vba_code_all_modules += form_string + '\n' | |
| 2733 | 2726 | # Analyze the whole code at once: |
| 2734 | 2727 | scanner = VBA_Scanner(self.vba_code_all_modules) |
| 2735 | 2728 | self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate) |
| ... | ... | @@ -2904,7 +2897,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2904 | 2897 | """ |
| 2905 | 2898 | # print a waiting message only if the output is not redirected to a file: |
| 2906 | 2899 | if sys.stdout.isatty(): |
| 2907 | - print('Analysis...\r') | |
| 2900 | + print 'Analysis...\r', | |
| 2908 | 2901 | sys.stdout.flush() |
| 2909 | 2902 | results = self.analyze_macros(show_decoded_strings, deobfuscate) |
| 2910 | 2903 | if results: |
| ... | ... | @@ -2920,9 +2913,9 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2920 | 2913 | if not is_printable(description): |
| 2921 | 2914 | description = repr(description) |
| 2922 | 2915 | t.add_row((kw_type, keyword, description)) |
| 2923 | - print(t) | |
| 2916 | + print t | |
| 2924 | 2917 | else: |
| 2925 | - print('No suspicious keyword or IOC found.') | |
| 2918 | + print 'No suspicious keyword or IOC found.' | |
| 2926 | 2919 | |
| 2927 | 2920 | def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False): |
| 2928 | 2921 | """ |
| ... | ... | @@ -2936,7 +2929,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2936 | 2929 | """ |
| 2937 | 2930 | # print a waiting message only if the output is not redirected to a file: |
| 2938 | 2931 | if sys.stdout.isatty(): |
| 2939 | - print('Analysis...\r') | |
| 2932 | + print 'Analysis...\r', | |
| 2940 | 2933 | sys.stdout.flush() |
| 2941 | 2934 | return [dict(type=kw_type, keyword=keyword, description=description) |
| 2942 | 2935 | for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)] |
| ... | ... | @@ -2965,44 +2958,42 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2965 | 2958 | display_filename = '%s in %s' % (self.filename, self.container) |
| 2966 | 2959 | else: |
| 2967 | 2960 | display_filename = self.filename |
| 2968 | - print('=' * 79) | |
| 2969 | - print('FILE:', display_filename) | |
| 2961 | + print '=' * 79 | |
| 2962 | + print 'FILE:', display_filename | |
| 2970 | 2963 | try: |
| 2971 | 2964 | #TODO: handle olefile errors, when an OLE file is malformed |
| 2972 | - print('Type: %s' % self.type) | |
| 2965 | + print 'Type:', self.type | |
| 2973 | 2966 | if self.detect_vba_macros(): |
| 2974 | 2967 | #print 'Contains VBA Macros:' |
| 2975 | 2968 | for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): |
| 2976 | 2969 | if hide_attributes: |
| 2977 | 2970 | # hide attribute lines: |
| 2978 | - if isinstance(vba_code,bytes): | |
| 2979 | - vba_code =vba_code.decode('utf-8','replace') | |
| 2980 | 2971 | vba_code_filtered = filter_vba(vba_code) |
| 2981 | 2972 | else: |
| 2982 | 2973 | vba_code_filtered = vba_code |
| 2983 | - print('-' * 79) | |
| 2984 | - print('VBA MACRO %s ' % vba_filename) | |
| 2985 | - print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))) | |
| 2974 | + print '-' * 79 | |
| 2975 | + print 'VBA MACRO %s ' % vba_filename | |
| 2976 | + print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) | |
| 2986 | 2977 | if display_code: |
| 2987 | - print('- ' * 39) | |
| 2978 | + print '- ' * 39 | |
| 2988 | 2979 | # detect empty macros: |
| 2989 | 2980 | if vba_code_filtered.strip() == '': |
| 2990 | - print('(empty macro)') | |
| 2981 | + print '(empty macro)' | |
| 2991 | 2982 | else: |
| 2992 | - print(vba_code_filtered) | |
| 2983 | + print vba_code_filtered | |
| 2993 | 2984 | for (subfilename, stream_path, form_string) in self.extract_form_strings(): |
| 2994 | - print('-' * 79) | |
| 2995 | - print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) | |
| 2996 | - print('- ' * 39) | |
| 2997 | - print(form_string.decode('utf-8', 'ignore')) | |
| 2985 | + print '-' * 79 | |
| 2986 | + print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path) | |
| 2987 | + print '- ' * 39 | |
| 2988 | + print form_string | |
| 2998 | 2989 | if not vba_code_only: |
| 2999 | 2990 | # analyse the code from all modules at once: |
| 3000 | 2991 | self.print_analysis(show_decoded_strings, deobfuscate) |
| 3001 | 2992 | if show_deobfuscated_code: |
| 3002 | - print('MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n') | |
| 3003 | - print(self.reveal()) | |
| 2993 | + print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n' | |
| 2994 | + print self.reveal() | |
| 3004 | 2995 | else: |
| 3005 | - print('No VBA macros found.') | |
| 2996 | + print 'No VBA macros found.' | |
| 3006 | 2997 | except OlevbaBaseException: |
| 3007 | 2998 | raise |
| 3008 | 2999 | except Exception as exc: |
| ... | ... | @@ -3010,7 +3001,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3010 | 3001 | log.info('Error processing file %s (%s)' % (self.filename, exc)) |
| 3011 | 3002 | log.debug('Traceback:', exc_info=True) |
| 3012 | 3003 | raise ProcessingError(self.filename, exc) |
| 3013 | - print('') | |
| 3004 | + print '' | |
| 3014 | 3005 | |
| 3015 | 3006 | |
| 3016 | 3007 | def process_file_json(self, show_decoded_strings=False, |
| ... | ... | @@ -3057,7 +3048,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3057 | 3048 | curr_macro = {} |
| 3058 | 3049 | if hide_attributes: |
| 3059 | 3050 | # hide attribute lines: |
| 3060 | - vba_code_filtered = filter_vba(vba_code.decode('utf-8','replace')) | |
| 3051 | + vba_code_filtered = filter_vba(vba_code) | |
| 3061 | 3052 | else: |
| 3062 | 3053 | vba_code_filtered = vba_code |
| 3063 | 3054 | |
| ... | ... | @@ -3096,7 +3087,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3096 | 3087 | if self.detect_vba_macros(): |
| 3097 | 3088 | # print a waiting message only if the output is not redirected to a file: |
| 3098 | 3089 | if sys.stdout.isatty(): |
| 3099 | - print('Analysis...\r') | |
| 3090 | + print 'Analysis...\r', | |
| 3100 | 3091 | sys.stdout.flush() |
| 3101 | 3092 | self.analyze_macros(show_decoded_strings=show_decoded_strings, |
| 3102 | 3093 | deobfuscate=deobfuscate) |
| ... | ... | @@ -3114,7 +3105,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3114 | 3105 | base64obf, dridex, vba_obf) |
| 3115 | 3106 | |
| 3116 | 3107 | line = '%-12s %s' % (flags, self.filename) |
| 3117 | - print(line) | |
| 3108 | + print line | |
| 3118 | 3109 | |
| 3119 | 3110 | # old table display: |
| 3120 | 3111 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' |
| ... | ... | @@ -3207,7 +3198,7 @@ def main(): |
| 3207 | 3198 | |
| 3208 | 3199 | # Print help if no arguments are passed |
| 3209 | 3200 | if len(args) == 0: |
| 3210 | - print(__doc__) | |
| 3201 | + print __doc__ | |
| 3211 | 3202 | parser.print_help() |
| 3212 | 3203 | sys.exit(RETURN_WRONG_ARGS) |
| 3213 | 3204 | |
| ... | ... | @@ -3218,7 +3209,7 @@ def main(): |
| 3218 | 3209 | url='http://decalage.info/python/oletools', |
| 3219 | 3210 | type='MetaInformation') |
| 3220 | 3211 | else: |
| 3221 | - print('olevba %s - http://decalage.info/python/oletools' % __version__) | |
| 3212 | + print 'olevba %s - http://decalage.info/python/oletools' % __version__ | |
| 3222 | 3213 | |
| 3223 | 3214 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 3224 | 3215 | # enable logging in the modules: |
| ... | ... | @@ -3238,8 +3229,8 @@ def main(): |
| 3238 | 3229 | # Column headers (do not know how many files there will be yet, so if no output_mode |
| 3239 | 3230 | # was specified, we will print triage for first file --> need these headers) |
| 3240 | 3231 | if options.output_mode in ('triage', 'unspecified'): |
| 3241 | - print('%-12s %-65s' % ('Flags', 'Filename')) | |
| 3242 | - print('%-12s %-65s' % ('-' * 11, '-' * 65)) | |
| 3232 | + print '%-12s %-65s' % ('Flags', 'Filename') | |
| 3233 | + print '%-12s %-65s' % ('-' * 11, '-' * 65) | |
| 3243 | 3234 | |
| 3244 | 3235 | previous_container = None |
| 3245 | 3236 | count = 0 |
| ... | ... | @@ -3257,14 +3248,14 @@ def main(): |
| 3257 | 3248 | if isinstance(data, Exception): |
| 3258 | 3249 | if isinstance(data, PathNotFoundException): |
| 3259 | 3250 | if options.output_mode in ('triage', 'unspecified'): |
| 3260 | - print('%-12s %s - File not found' % ('?', filename)) | |
| 3251 | + print '%-12s %s - File not found' % ('?', filename) | |
| 3261 | 3252 | elif options.output_mode != 'json': |
| 3262 | 3253 | log.error('Given path %r does not exist!' % filename) |
| 3263 | 3254 | return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ |
| 3264 | 3255 | else RETURN_SEVERAL_ERRS |
| 3265 | 3256 | else: |
| 3266 | 3257 | if options.output_mode in ('triage', 'unspecified'): |
| 3267 | - print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container)) | |
| 3258 | + print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container) | |
| 3268 | 3259 | elif options.output_mode != 'json': |
| 3269 | 3260 | log.error('Exception opening/reading %r from zip file %r: %s' |
| 3270 | 3261 | % (filename, container, data)) |
| ... | ... | @@ -3291,7 +3282,7 @@ def main(): |
| 3291 | 3282 | # print container name when it changes: |
| 3292 | 3283 | if container != previous_container: |
| 3293 | 3284 | if container is not None: |
| 3294 | - print('\nFiles in %s:' % container) | |
| 3285 | + print '\nFiles in %s:' % container | |
| 3295 | 3286 | previous_container = container |
| 3296 | 3287 | # summarized output for triage: |
| 3297 | 3288 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, |
| ... | ... | @@ -3309,8 +3300,8 @@ def main(): |
| 3309 | 3300 | |
| 3310 | 3301 | except (SubstreamOpenError, UnexpectedDataError) as exc: |
| 3311 | 3302 | if options.output_mode in ('triage', 'unspecified'): |
| 3312 | - print('%-12s %s - Error opening substream or uenxpected ' \ | |
| 3313 | - 'content' % ('?', filename)) | |
| 3303 | + print '%-12s %s - Error opening substream or uenxpected ' \ | |
| 3304 | + 'content' % ('?', filename) | |
| 3314 | 3305 | elif options.output_mode == 'json': |
| 3315 | 3306 | print_json(file=filename, type='error', |
| 3316 | 3307 | error=type(exc).__name__, message=str(exc)) |
| ... | ... | @@ -3321,7 +3312,7 @@ def main(): |
| 3321 | 3312 | else RETURN_SEVERAL_ERRS |
| 3322 | 3313 | except FileOpenError as exc: |
| 3323 | 3314 | if options.output_mode in ('triage', 'unspecified'): |
| 3324 | - print('%-12s %s - File format not supported' % ('?', filename)) | |
| 3315 | + print '%-12s %s - File format not supported' % ('?', filename) | |
| 3325 | 3316 | elif options.output_mode == 'json': |
| 3326 | 3317 | print_json(file=filename, type='error', |
| 3327 | 3318 | error=type(exc).__name__, message=str(exc)) |
| ... | ... | @@ -3331,7 +3322,7 @@ def main(): |
| 3331 | 3322 | else RETURN_SEVERAL_ERRS |
| 3332 | 3323 | except ProcessingError as exc: |
| 3333 | 3324 | if options.output_mode in ('triage', 'unspecified'): |
| 3334 | - print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc)) | |
| 3325 | + print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc) | |
| 3335 | 3326 | elif options.output_mode == 'json': |
| 3336 | 3327 | print_json(file=filename, type='error', |
| 3337 | 3328 | error=type(exc).__name__, |
| ... | ... | @@ -3346,9 +3337,9 @@ def main(): |
| 3346 | 3337 | vba_parser.close() |
| 3347 | 3338 | |
| 3348 | 3339 | if options.output_mode == 'triage': |
| 3349 | - print('\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 3340 | + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 3350 | 3341 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ |
| 3351 | - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n') | |
| 3342 | + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' | |
| 3352 | 3343 | |
| 3353 | 3344 | if count == 1 and options.output_mode == 'unspecified': |
| 3354 | 3345 | # if options -t, -d and -j were not specified and it's a single file, print details: | ... | ... |
oletools/olevba3.py
0 โ 100755
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | +olevba.py | |
| 4 | + | |
| 5 | +olevba is a script to parse OLE and OpenXML files such as MS Office documents | |
| 6 | +(e.g. Word, Excel), to extract VBA Macro code in clear text, deobfuscate | |
| 7 | +and analyze malicious macros. | |
| 8 | + | |
| 9 | +Supported formats: | |
| 10 | +- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) | |
| 11 | +- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) | |
| 12 | +- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm) | |
| 13 | +- Word 2003 XML (.xml) | |
| 14 | +- Word/Excel Single File Web Page / MHTML (.mht) | |
| 15 | + | |
| 16 | +Author: Philippe Lagadec - http://www.decalage.info | |
| 17 | +License: BSD, see source code or documentation | |
| 18 | + | |
| 19 | +olevba is part of the python-oletools package: | |
| 20 | +http://www.decalage.info/python/oletools | |
| 21 | + | |
| 22 | +olevba is based on source code from officeparser by John William Davison | |
| 23 | +https://github.com/unixfreak0037/officeparser | |
| 24 | +""" | |
| 25 | + | |
| 26 | +# === LICENSE ================================================================== | |
| 27 | + | |
| 28 | +# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info) | |
| 29 | +# All rights reserved. | |
| 30 | +# | |
| 31 | +# Redistribution and use in source and binary forms, with or without modification, | |
| 32 | +# are permitted provided that the following conditions are met: | |
| 33 | +# | |
| 34 | +# * Redistributions of source code must retain the above copyright notice, this | |
| 35 | +# list of conditions and the following disclaimer. | |
| 36 | +# * Redistributions in binary form must reproduce the above copyright notice, | |
| 37 | +# this list of conditions and the following disclaimer in the documentation | |
| 38 | +# and/or other materials provided with the distribution. | |
| 39 | +# | |
| 40 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 41 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 42 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 43 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 44 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 45 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 46 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 47 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 48 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 49 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 50 | + | |
| 51 | + | |
| 52 | +# olevba contains modified source code from the officeparser project, published | |
| 53 | +# under the following MIT License (MIT): | |
| 54 | +# | |
| 55 | +# officeparser is copyright (c) 2014 John William Davison | |
| 56 | +# | |
| 57 | +# Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 58 | +# of this software and associated documentation files (the "Software"), to deal | |
| 59 | +# in the Software without restriction, including without limitation the rights | |
| 60 | +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 61 | +# copies of the Software, and to permit persons to whom the Software is | |
| 62 | +# furnished to do so, subject to the following conditions: | |
| 63 | +# | |
| 64 | +# The above copyright notice and this permission notice shall be included in all | |
| 65 | +# copies or substantial portions of the Software. | |
| 66 | +# | |
| 67 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 68 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 69 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 70 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 71 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 72 | +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| 73 | +# SOFTWARE. | |
| 74 | + | |
| 75 | +#------------------------------------------------------------------------------ | |
| 76 | +# CHANGELOG: | |
| 77 | +# 2014-08-05 v0.01 PL: - first version based on officeparser code | |
| 78 | +# 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser | |
| 79 | +# 2014-08-15 PL: - fixed incorrect value check in projecthelpfilepath Record | |
| 80 | +# 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats | |
| 81 | +# and to find the VBA project root anywhere in the file | |
| 82 | +# 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL | |
| 83 | +# 2014-12-05 v0.05 PL: - refactored most functions into a class, new API | |
| 84 | +# - added detect_vba_macros | |
| 85 | +# 2014-12-10 v0.06 PL: - hide first lines with VB attributes | |
| 86 | +# - detect auto-executable macros | |
| 87 | +# - ignore empty macros | |
| 88 | +# 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive | |
| 89 | +# 2014-12-15 v0.08 PL: - improved display for empty macros | |
| 90 | +# - added pattern extraction | |
| 91 | +# 2014-12-25 v0.09 PL: - added suspicious keywords detection | |
| 92 | +# 2014-12-27 v0.10 PL: - added OptionParser, main and process_file | |
| 93 | +# - uses xglob to scan several files with wildcards | |
| 94 | +# - option -r to recurse subdirectories | |
| 95 | +# - option -z to scan files in password-protected zips | |
| 96 | +# 2015-01-02 v0.11 PL: - improved filter_vba to detect colons | |
| 97 | +# 2015-01-03 v0.12 PL: - fixed detect_patterns to detect all patterns | |
| 98 | +# - process_file: improved display, shows container file | |
| 99 | +# - improved list of executable file extensions | |
| 100 | +# 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display | |
| 101 | +# 2015-01-08 v0.14 PL: - added hex strings detection and decoding | |
| 102 | +# - fixed issue #2, decoding VBA stream names using | |
| 103 | +# specified codepage and unicode stream names | |
| 104 | +# 2015-01-11 v0.15 PL: - added new triage mode, options -t and -d | |
| 105 | +# 2015-01-16 v0.16 PL: - fix for issue #3 (exception when module name="text") | |
| 106 | +# - added several suspicious keywords | |
| 107 | +# - added option -i to analyze VBA source code directly | |
| 108 | +# 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions | |
| 109 | +# - added scan_vba to run all detection algorithms | |
| 110 | +# - decoded hex strings are now also scanned + reversed | |
| 111 | +# 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules | |
| 112 | +# 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex | |
| 113 | +# strings and StrReverse | |
| 114 | +# 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded | |
| 115 | +# 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding | |
| 116 | +# - improved display, shows obfuscation name | |
| 117 | +# 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename | |
| 118 | +# - added Base64 obfuscation decoding (contribution from | |
| 119 | +# @JamesHabben) | |
| 120 | +# 2015-02-03 v0.23 PL: - triage now uses VBA_Scanner results, shows Base64 and | |
| 121 | +# Dridex strings | |
| 122 | +# - exception handling in detect_base64_strings | |
| 123 | +# 2015-02-07 v0.24 PL: - renamed option --hex to --decode, fixed display | |
| 124 | +# - display exceptions with stack trace | |
| 125 | +# - added several suspicious keywords | |
| 126 | +# - improved Base64 detection and decoding | |
| 127 | +# - fixed triage mode not to scan attrib lines | |
| 128 | +# 2015-03-04 v0.25 PL: - added support for Word 2003 XML | |
| 129 | +# 2015-03-22 v0.26 PL: - added suspicious keywords for sandboxing and | |
| 130 | +# virtualisation detection | |
| 131 | +# 2015-05-06 v0.27 PL: - added support for MHTML files with VBA macros | |
| 132 | +# (issue #10 reported by Greg from SpamStopsHere) | |
| 133 | +# 2015-05-24 v0.28 PL: - improved support for MHTML files with modified header | |
| 134 | +# (issue #11 reported by Thomas Chopitea) | |
| 135 | +# 2015-05-26 v0.29 PL: - improved MSO files parsing, taking into account | |
| 136 | +# various data offsets (issue #12) | |
| 137 | +# - improved detection of MSO files, avoiding incorrect | |
| 138 | +# parsing errors (issue #7) | |
| 139 | +# 2015-05-29 v0.30 PL: - added suspicious keywords suggested by @ozhermit, | |
| 140 | +# Davy Douhine (issue #9), issue #13 | |
| 141 | +# 2015-06-16 v0.31 PL: - added generic VBA expression deobfuscation (chr,asc,etc) | |
| 142 | +# 2015-06-19 PL: - added options -a, -c, --each, --attr | |
| 143 | +# 2015-06-21 v0.32 PL: - always display decoded strings which are printable | |
| 144 | +# - fix VBA_Scanner.scan to return raw strings, not repr() | |
| 145 | +# 2015-07-09 v0.40 PL: - removed usage of sys.stderr which causes issues | |
| 146 | +# 2015-07-12 PL: - added Hex function decoding to VBA Parser | |
| 147 | +# 2015-07-13 PL: - added Base64 function decoding to VBA Parser | |
| 148 | +# 2015-09-06 PL: - improved VBA_Parser, refactored the main functions | |
| 149 | +# 2015-09-13 PL: - moved main functions to a class VBA_Parser_CLI | |
| 150 | +# - fixed issue when analysis was done twice | |
| 151 | +# 2015-09-15 PL: - remove duplicate IOCs from results | |
| 152 | +# 2015-09-16 PL: - join long VBA lines ending with underscore before scan | |
| 153 | +# - disabled unused option --each | |
| 154 | +# 2015-09-22 v0.41 PL: - added new option --reveal | |
| 155 | +# - added suspicious strings for PowerShell.exe options | |
| 156 | +# 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method | |
| 157 | +# 2015-10-10 PL: - added support for text files with VBA source code | |
| 158 | +# 2015-11-17 PL: - fixed bug with --decode option | |
| 159 | +# 2015-12-16 PL: - fixed bug in main (no options input anymore) | |
| 160 | +# - improved logging, added -l option | |
| 161 | +# 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht | |
| 162 | +# - fixed issue #32 by monkeypatching email.feedparser | |
| 163 | +# 2016-02-07 PL: - KeyboardInterrupt is now raised properly | |
| 164 | +# 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr | |
| 165 | +# 2016-02-29 PL: - added Workbook_Activate to suspicious keywords | |
| 166 | +# 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis | |
| 167 | +# 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck) | |
| 168 | +# 2016-03-16 CH: - added option --no-deobfuscate (temporary) | |
| 169 | +# 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate | |
| 170 | +# - updated suspicious keywords | |
| 171 | +# 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans | |
| 172 | +# 2016-04-28 CH: - return an exit code depending on the results | |
| 173 | +# - improved error and exception handling | |
| 174 | +# - improved JSON output | |
| 175 | +# 2016-05-12 CH: - added support for PowerPoint 97-2003 files | |
| 176 | +# 2016-06-06 CH: - improved handling of unicode VBA module names | |
| 177 | +# 2016-06-07 CH: - added option --relaxed, stricter parsing by default | |
| 178 | +# 2016-06-12 v0.50 PL: - fixed small bugs in VBA parsing code | |
| 179 | +# 2016-07-01 PL: - fixed issue #58 with format() to support Python 2.6 | |
| 180 | +# 2016-07-29 CH: - fixed several bugs including #73 (Mac Roman encoding) | |
| 181 | + | |
| 182 | +__version__ = '0.50' | |
| 183 | + | |
| 184 | +#------------------------------------------------------------------------------ | |
| 185 | +# TODO: | |
| 186 | +# + setup logging (common with other oletools) | |
| 187 | +# + add xor bruteforcing like bbharvest | |
| 188 | +# + options -a and -c should imply -d | |
| 189 | + | |
| 190 | +# TODO later: | |
| 191 | +# + performance improvement: instead of searching each keyword separately, | |
| 192 | +# first split vba code into a list of words (per line), then check each | |
| 193 | +# word against a dict. (or put vba words into a set/dict?) | |
| 194 | +# + for regex, maybe combine them into a single re with named groups? | |
| 195 | +# + add Yara support, include sample rules? plugins like balbuzard? | |
| 196 | +# + add balbuzard support | |
| 197 | +# + output to file (replace print by file.write, sys.stdout by default) | |
| 198 | +# + look for VBA in embedded documents (e.g. Excel in Word) | |
| 199 | +# + support SRP streams (see Lenny's article + links and sample) | |
| 200 | +# - python 3.x support | |
| 201 | +# - check VBA macros in Visio, Access, Project, etc | |
| 202 | +# - extract_macros: convert to a class, split long function into smaller methods | |
| 203 | +# - extract_macros: read bytes from stream file objects instead of strings | |
| 204 | +# - extract_macros: use combined struct.unpack instead of many calls | |
| 205 | +# - all except clauses should target specific exceptions | |
| 206 | + | |
| 207 | +#------------------------------------------------------------------------------ | |
| 208 | +# REFERENCES: | |
| 209 | +# - [MS-OVBA]: Microsoft Office VBA File Format Structure | |
| 210 | +# http://msdn.microsoft.com/en-us/library/office/cc313094%28v=office.12%29.aspx | |
| 211 | +# - officeparser: https://github.com/unixfreak0037/officeparser | |
| 212 | + | |
| 213 | + | |
| 214 | +#--- IMPORTS ------------------------------------------------------------------ | |
| 215 | + | |
| 216 | +import sys, logging | |
| 217 | +import struct | |
| 218 | +from _io import StringIO,BytesIO | |
| 219 | +import math | |
| 220 | +import zipfile | |
| 221 | +import re | |
| 222 | +import optparse | |
| 223 | +import binascii | |
| 224 | +import base64 | |
| 225 | +import zlib | |
| 226 | +import email # for MHTML parsing | |
| 227 | +import string # for printable | |
| 228 | +import json # for json output mode (argument --json) | |
| 229 | + | |
| 230 | +# import lxml or ElementTree for XML parsing: | |
| 231 | +try: | |
| 232 | + # lxml: best performance for XML processing | |
| 233 | + import lxml.etree as ET | |
| 234 | +except ImportError: | |
| 235 | + try: | |
| 236 | + # Python 2.5+: batteries included | |
| 237 | + import xml.etree.cElementTree as ET | |
| 238 | + except ImportError: | |
| 239 | + try: | |
| 240 | + # Python <2.5: standalone ElementTree install | |
| 241 | + import elementtree.cElementTree as ET | |
| 242 | + except ImportError: | |
| 243 | + raise(ImportError, "lxml or ElementTree are not installed, " \ | |
| 244 | + + "see http://codespeak.net/lxml " \ | |
| 245 | + + "or http://effbot.org/zone/element-index.htm") | |
| 246 | + | |
| 247 | +import thirdparty.olefile as olefile | |
| 248 | +from thirdparty.prettytable import prettytable | |
| 249 | +from thirdparty.xglob import xglob, PathNotFoundException | |
| 250 | +from thirdparty.pyparsing.pyparsing import \ | |
| 251 | + CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \ | |
| 252 | + Optional, QuotedString,Regex, Suppress, Word, WordStart, \ | |
| 253 | + alphanums, alphas, hexnums,nums, opAssoc, srange, \ | |
| 254 | + infixNotation | |
| 255 | +import ppt_parser | |
| 256 | + | |
| 257 | +# monkeypatch email to fix issue #32: | |
| 258 | +# allow header lines without ":" | |
| 259 | +import email.feedparser | |
| 260 | +email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t ])') | |
| 261 | + | |
| 262 | + | |
| 263 | +# === LOGGING ================================================================= | |
| 264 | + | |
| 265 | +class NullHandler(logging.Handler): | |
| 266 | + """ | |
| 267 | + Log Handler without output, to avoid printing messages if logging is not | |
| 268 | + configured by the main application. | |
| 269 | + Python 2.7 has logging.NullHandler, but this is necessary for 2.6: | |
| 270 | + see https://docs.python.org/2.6/library/logging.html#configuring-logging-for-a-library | |
| 271 | + """ | |
| 272 | + def emit(self, record): | |
| 273 | + pass | |
| 274 | + | |
| 275 | +def get_logger(name, level=logging.CRITICAL+1): | |
| 276 | + """ | |
| 277 | + Create a suitable logger object for this module. | |
| 278 | + The goal is not to change settings of the root logger, to avoid getting | |
| 279 | + other modules' logs on the screen. | |
| 280 | + If a logger exists with same name, reuse it. (Else it would have duplicate | |
| 281 | + handlers and messages would be doubled.) | |
| 282 | + The level is set to CRITICAL+1 by default, to avoid any logging. | |
| 283 | + """ | |
| 284 | + # First, test if there is already a logger with the same name, else it | |
| 285 | + # will generate duplicate messages (due to duplicate handlers): | |
| 286 | + if name in logging.Logger.manager.loggerDict: | |
| 287 | + #NOTE: another less intrusive but more "hackish" solution would be to | |
| 288 | + # use getLogger then test if its effective level is not default. | |
| 289 | + logger = logging.getLogger(name) | |
| 290 | + # make sure level is OK: | |
| 291 | + logger.setLevel(level) | |
| 292 | + return logger | |
| 293 | + # get a new logger: | |
| 294 | + logger = logging.getLogger(name) | |
| 295 | + # only add a NullHandler for this logger, it is up to the application | |
| 296 | + # to configure its own logging: | |
| 297 | + logger.addHandler(NullHandler()) | |
| 298 | + logger.setLevel(level) | |
| 299 | + return logger | |
| 300 | + | |
| 301 | +# a global logger object used for debugging: | |
| 302 | +log = get_logger('olevba') | |
| 303 | + | |
| 304 | + | |
| 305 | +#=== EXCEPTIONS ============================================================== | |
| 306 | + | |
| 307 | +class OlevbaBaseException(Exception): | |
| 308 | + """ Base class for exceptions produced here for simpler except clauses """ | |
| 309 | + def __init__(self, msg, filename=None, orig_exc=None, **kwargs): | |
| 310 | + if orig_exc: | |
| 311 | + super(OlevbaBaseException, self).__init__(msg + | |
| 312 | + ' ({0})'.format(orig_exc), | |
| 313 | + **kwargs) | |
| 314 | + else: | |
| 315 | + super(OlevbaBaseException, self).__init__(msg, **kwargs) | |
| 316 | + self.msg = msg | |
| 317 | + self.filename = filename | |
| 318 | + self.orig_exc = orig_exc | |
| 319 | + | |
| 320 | + | |
| 321 | +class FileOpenError(OlevbaBaseException): | |
| 322 | + """ raised by VBA_Parser constructor if all open_... attempts failed | |
| 323 | + | |
| 324 | + probably means the file type is not supported | |
| 325 | + """ | |
| 326 | + | |
| 327 | + def __init__(self, filename, orig_exc=None): | |
| 328 | + super(FileOpenError, self).__init__( | |
| 329 | + 'Failed to open file %s' % filename, filename, orig_exc) | |
| 330 | + | |
| 331 | + | |
| 332 | +class ProcessingError(OlevbaBaseException): | |
| 333 | + """ raised by VBA_Parser.process_file* functions """ | |
| 334 | + | |
| 335 | + def __init__(self, filename, orig_exc): | |
| 336 | + super(ProcessingError, self).__init__( | |
| 337 | + 'Error processing file %s' % filename, filename, orig_exc) | |
| 338 | + | |
| 339 | + | |
| 340 | +class MsoExtractionError(RuntimeError, OlevbaBaseException): | |
| 341 | + """ raised by mso_file_extract if parsing MSO/ActiveMIME data failed """ | |
| 342 | + | |
| 343 | + def __init__(self, msg): | |
| 344 | + MsoExtractionError.__init__(self, msg) | |
| 345 | + OlevbaBaseException.__init__(self, msg) | |
| 346 | + | |
| 347 | + | |
| 348 | +class SubstreamOpenError(FileOpenError): | |
| 349 | + """ special kind of FileOpenError: file is a substream of original file """ | |
| 350 | + | |
| 351 | + def __init__(self, filename, subfilename, orig_exc=None): | |
| 352 | + super(SubstreamOpenError, self).__init__( | |
| 353 | + str(filename) + '/' + str(subfilename), orig_exc) | |
| 354 | + self.filename = filename # overwrite setting in OlevbaBaseException | |
| 355 | + self.subfilename = subfilename | |
| 356 | + | |
| 357 | + | |
| 358 | +class UnexpectedDataError(OlevbaBaseException): | |
| 359 | + """ raised when parsing is strict (=not relaxed) and data is unexpected """ | |
| 360 | + | |
| 361 | + def __init__(self, stream_path, variable, expected, value): | |
| 362 | + super(UnexpectedDataError, self).__init__( | |
| 363 | + 'Unexpected value in {0} for variable {1}: ' | |
| 364 | + 'expected {2:04X} but found {3:04X}!' | |
| 365 | + .format(stream_path, variable, expected, value)) | |
| 366 | + self.stream_path = stream_path | |
| 367 | + self.variable = variable | |
| 368 | + self.expected = expected | |
| 369 | + self.value = value | |
| 370 | + | |
| 371 | +#--- CONSTANTS ---------------------------------------------------------------- | |
| 372 | + | |
| 373 | +# return codes | |
| 374 | +RETURN_OK = 0 | |
| 375 | +RETURN_WARNINGS = 1 # (reserved, not used yet) | |
| 376 | +RETURN_WRONG_ARGS = 2 # (fixed, built into optparse) | |
| 377 | +RETURN_FILE_NOT_FOUND = 3 | |
| 378 | +RETURN_XGLOB_ERR = 4 | |
| 379 | +RETURN_OPEN_ERROR = 5 | |
| 380 | +RETURN_PARSE_ERROR = 6 | |
| 381 | +RETURN_SEVERAL_ERRS = 7 | |
| 382 | +RETURN_UNEXPECTED = 8 | |
| 383 | + | |
| 384 | +# MAC codepages (from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python) | |
| 385 | +MAC_CODEPAGES = { | |
| 386 | + 10000: 'mac-roman', | |
| 387 | + 10001: 'shiftjis', # not found: 'mac-shift-jis', | |
| 388 | + 10003: 'ascii', # nothing appropriate found: 'mac-hangul', | |
| 389 | + 10008: 'gb2321', # not found: 'mac-gb2312', | |
| 390 | + 10002: 'big5', # not found: 'mac-big5', | |
| 391 | + 10005: 'hebrew', # not found: 'mac-hebrew', | |
| 392 | + 10004: 'mac-arabic', | |
| 393 | + 10006: 'mac-greek', | |
| 394 | + 10081: 'mac-turkish', | |
| 395 | + 10021: 'thai', # not found: mac-thai', | |
| 396 | + 10029: 'maccentraleurope', # not found: 'mac-east europe', | |
| 397 | + 10007: 'ascii', # nothing appropriate found: 'mac-russian', | |
| 398 | +} | |
| 399 | + | |
| 400 | +# URL and message to report issues: | |
| 401 | +URL_OLEVBA_ISSUES = 'https://github.com/decalage2/oletools/issues' | |
| 402 | +MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES | |
| 403 | + | |
| 404 | +# Container types: | |
| 405 | +TYPE_OLE = 'OLE' | |
| 406 | +TYPE_OpenXML = 'OpenXML' | |
| 407 | +TYPE_Word2003_XML = 'Word2003_XML' | |
| 408 | +TYPE_MHTML = 'MHTML' | |
| 409 | +TYPE_TEXT = 'Text' | |
| 410 | +TYPE_PPT = 'PPT' | |
| 411 | + | |
| 412 | +# short tag to display file types in triage mode: | |
| 413 | +TYPE2TAG = { | |
| 414 | + TYPE_OLE: 'OLE:', | |
| 415 | + TYPE_OpenXML: 'OpX:', | |
| 416 | + TYPE_Word2003_XML: 'XML:', | |
| 417 | + TYPE_MHTML: 'MHT:', | |
| 418 | + TYPE_TEXT: 'TXT:', | |
| 419 | + TYPE_PPT: 'PPT', | |
| 420 | +} | |
| 421 | + | |
| 422 | + | |
| 423 | +# MSO files ActiveMime header magic | |
| 424 | +MSO_ACTIVEMIME_HEADER = b'ActiveMime' | |
| 425 | + | |
| 426 | +MODULE_EXTENSION = "bas" | |
| 427 | +CLASS_EXTENSION = "cls" | |
| 428 | +FORM_EXTENSION = "frm" | |
| 429 | + | |
| 430 | +# Namespaces and tags for Word2003 XML parsing: | |
| 431 | +NS_W = '{http://schemas.microsoft.com/office/word/2003/wordml}' | |
| 432 | +# the tag <w:binData w:name="editdata.mso"> contains the VBA macro code: | |
| 433 | +TAG_BINDATA = NS_W + 'binData' | |
| 434 | +ATTR_NAME = NS_W + 'name' | |
| 435 | + | |
| 436 | +# Keywords to detect auto-executable macros | |
| 437 | +AUTOEXEC_KEYWORDS = { | |
| 438 | + # MS Word: | |
| 439 | + 'Runs when the Word document is opened': | |
| 440 | + ('AutoExec', 'AutoOpen', 'Document_Open', 'DocumentOpen'), | |
| 441 | + 'Runs when the Word document is closed': | |
| 442 | + ('AutoExit', 'AutoClose', 'Document_Close', 'DocumentBeforeClose'), | |
| 443 | + 'Runs when the Word document is modified': | |
| 444 | + ('DocumentChange',), | |
| 445 | + 'Runs when a new Word document is created': | |
| 446 | + ('AutoNew', 'Document_New', 'NewDocument'), | |
| 447 | + | |
| 448 | + # MS Excel: | |
| 449 | + 'Runs when the Excel Workbook is opened': | |
| 450 | + ('Auto_Open', 'Workbook_Open', 'Workbook_Activate'), | |
| 451 | + 'Runs when the Excel Workbook is closed': | |
| 452 | + ('Auto_Close', 'Workbook_Close'), | |
| 453 | + | |
| 454 | + #TODO: full list in MS specs?? | |
| 455 | +} | |
| 456 | + | |
| 457 | +# Suspicious Keywords that may be used by malware | |
| 458 | +# See VBA language reference: http://msdn.microsoft.com/en-us/library/office/jj692818%28v=office.15%29.aspx | |
| 459 | +SUSPICIOUS_KEYWORDS = { | |
| 460 | + #TODO: use regex to support variable whitespaces | |
| 461 | + 'May read system environment variables': | |
| 462 | + ('Environ',), | |
| 463 | + 'May open a file': | |
| 464 | + ('Open',), | |
| 465 | + 'May write to a file (if combined with Open)': | |
| 466 | + #TODO: regex to find Open+Write on same line | |
| 467 | + ('Write', 'Put', 'Output', 'Print #'), | |
| 468 | + 'May read or write a binary file (if combined with Open)': | |
| 469 | + #TODO: regex to find Open+Binary on same line | |
| 470 | + ('Binary',), | |
| 471 | + 'May copy a file': | |
| 472 | + ('FileCopy', 'CopyFile'), | |
| 473 | + #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx | |
| 474 | + #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx | |
| 475 | + 'May delete a file': | |
| 476 | + ('Kill',), | |
| 477 | + 'May create a text file': | |
| 478 | + ('CreateTextFile', 'ADODB.Stream', 'WriteText', 'SaveToFile'), | |
| 479 | + #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx | |
| 480 | + #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6 | |
| 481 | + 'May run an executable file or a system command': | |
| 482 | + ('Shell', 'vbNormal', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus', | |
| 483 | + 'vbMinimizedNoFocus', 'WScript.Shell', 'Run', 'ShellExecute'), | |
| 484 | + #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx | |
| 485 | + #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6 | |
| 486 | + 'May run PowerShell commands': | |
| 487 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 488 | + #also: https://bitbucket.org/decalage/oletools/issues/14/olevba-library-update-ioc | |
| 489 | + # ref: https://blog.netspi.com/15-ways-to-bypass-the-powershell-execution-policy/ | |
| 490 | + # TODO: add support for keywords starting with a non-alpha character, such as "-noexit" | |
| 491 | + # TODO: '-command', '-EncodedCommand', '-scriptblock' | |
| 492 | + ('PowerShell', 'noexit', 'ExecutionPolicy', 'noprofile', 'command', 'EncodedCommand', | |
| 493 | + 'invoke-command', 'scriptblock', 'Invoke-Expression', 'AuthorizationManager'), | |
| 494 | + 'May run an executable file or a system command using PowerShell': | |
| 495 | + ('Start-Process',), | |
| 496 | + 'May hide the application': | |
| 497 | + ('Application.Visible', 'ShowWindow', 'SW_HIDE'), | |
| 498 | + 'May create a directory': | |
| 499 | + ('MkDir',), | |
| 500 | + 'May save the current workbook': | |
| 501 | + ('ActiveWorkbook.SaveAs',), | |
| 502 | + 'May change which directory contains files to open at startup': | |
| 503 | + #TODO: confirm the actual effect | |
| 504 | + ('Application.AltStartupPath',), | |
| 505 | + 'May create an OLE object': | |
| 506 | + ('CreateObject',), | |
| 507 | + 'May create an OLE object using PowerShell': | |
| 508 | + ('New-Object',), | |
| 509 | + 'May run an application (if combined with CreateObject)': | |
| 510 | + ('Shell.Application',), | |
| 511 | + 'May enumerate application windows (if combined with Shell.Application object)': | |
| 512 | + ('Windows', 'FindWindow'), | |
| 513 | + 'May run code from a DLL': | |
| 514 | + #TODO: regex to find declare+lib on same line | |
| 515 | + ('Lib',), | |
| 516 | + 'May inject code into another process': | |
| 517 | + ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload | |
| 518 | + ), | |
| 519 | + 'May download files from the Internet': | |
| 520 | + #TODO: regex to find urlmon+URLDownloadToFileA on same line | |
| 521 | + ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP', | |
| 522 | + 'MSXML2.ServerXMLHTTP', # suggested in issue #13 | |
| 523 | + 'User-Agent', # sample from @ozhermit: http://pastebin.com/MPc3iV6z | |
| 524 | + ), | |
| 525 | + 'May download files from the Internet using PowerShell': | |
| 526 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 527 | + ('Net.WebClient', 'DownloadFile', 'DownloadString'), | |
| 528 | + 'May control another application by simulating user keystrokes': | |
| 529 | + ('SendKeys', 'AppActivate'), | |
| 530 | + #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx | |
| 531 | + 'May attempt to obfuscate malicious function calls': | |
| 532 | + ('CallByName',), | |
| 533 | + #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx | |
| 534 | + 'May attempt to obfuscate specific strings': | |
| 535 | + #TODO: regex to find several Chr*, not just one | |
| 536 | + ('Chr', 'ChrB', 'ChrW', 'StrReverse', 'Xor'), | |
| 537 | + #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx | |
| 538 | + 'May read or write registry keys': | |
| 539 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 540 | + ('RegOpenKeyExA', 'RegOpenKeyEx', 'RegCloseKey'), | |
| 541 | + 'May read registry keys': | |
| 542 | + #sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 543 | + ('RegQueryValueExA', 'RegQueryValueEx', | |
| 544 | + 'RegRead', #with Wscript.Shell | |
| 545 | + ), | |
| 546 | + 'May detect virtualization': | |
| 547 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 548 | + (r'SYSTEM\ControlSet001\Services\Disk\Enum', 'VIRTUAL', 'VMWARE', 'VBOX'), | |
| 549 | + 'May detect Anubis Sandbox': | |
| 550 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 551 | + # NOTES: this sample also checks App.EXEName but that seems to be a bug, it works in VB6 but not in VBA | |
| 552 | + # ref: http://www.syssec-project.eu/m/page-media/3/disarm-raid11.pdf | |
| 553 | + ('GetVolumeInformationA', 'GetVolumeInformation', # with kernel32.dll | |
| 554 | + '1824245000', r'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProductId', | |
| 555 | + '76487-337-8429955-22614', 'andy', 'sample', r'C:\exec\exec.exe', 'popupkiller' | |
| 556 | + ), | |
| 557 | + 'May detect Sandboxie': | |
| 558 | + # sample: https://malwr.com/analysis/M2NjZWNmMjA0YjVjNGVhYmJlZmFhNWY4NmQxZDllZTY/ | |
| 559 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 560 | + ('SbieDll.dll', 'SandboxieControlWndClass'), | |
| 561 | + 'May detect Sunbelt Sandbox': | |
| 562 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 563 | + (r'C:\file.exe',), | |
| 564 | + 'May detect Norman Sandbox': | |
| 565 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 566 | + ('currentuser',), | |
| 567 | + 'May detect CW Sandbox': | |
| 568 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 569 | + ('Schmidti',), | |
| 570 | + 'May detect WinJail Sandbox': | |
| 571 | + # ref: http://www.cplusplus.com/forum/windows/96874/ | |
| 572 | + ('Afx:400000:0',), | |
| 573 | +} | |
| 574 | + | |
| 575 | +# Regular Expression for a URL: | |
| 576 | +# http://en.wikipedia.org/wiki/Uniform_resource_locator | |
| 577 | +# http://www.w3.org/Addressing/URL/uri-spec.html | |
| 578 | +#TODO: also support username:password@server | |
| 579 | +#TODO: other protocols (file, gopher, wais, ...?) | |
| 580 | +SCHEME = r'\b(?:http|ftp)s?' | |
| 581 | +# see http://en.wikipedia.org/wiki/List_of_Internet_top-level_domains | |
| 582 | +TLD = r'(?:xn--[a-zA-Z0-9]{4,20}|[a-zA-Z]{2,20})' | |
| 583 | +DNS_NAME = r'(?:[a-zA-Z0-9\-\.]+\.' + TLD + ')' | |
| 584 | +#TODO: IPv6 - see https://www.debuggex.com/ | |
| 585 | +# A literal numeric IPv6 address may be given, but must be enclosed in [ ] e.g. [db8:0cec::99:123a] | |
| 586 | +NUMBER_0_255 = r'(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])' | |
| 587 | +IPv4 = r'(?:' + NUMBER_0_255 + r'\.){3}' + NUMBER_0_255 | |
| 588 | +# IPv4 must come before the DNS name because it is more specific | |
| 589 | +SERVER = r'(?:' + IPv4 + '|' + DNS_NAME + ')' | |
| 590 | +PORT = r'(?:\:[0-9]{1,5})?' | |
| 591 | +SERVER_PORT = SERVER + PORT | |
| 592 | +URL_PATH = r'(?:/[a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~]*)?' # [^\.\,\)\(\s"] | |
| 593 | +URL_RE = SCHEME + r'\://' + SERVER_PORT + URL_PATH | |
| 594 | +re_url = re.compile(URL_RE) | |
| 595 | + | |
| 596 | + | |
| 597 | +# Patterns to be extracted (IP addresses, URLs, etc) | |
| 598 | +# From patterns.py in balbuzard | |
| 599 | +RE_PATTERNS = ( | |
| 600 | + ('URL', re.compile(URL_RE)), | |
| 601 | + ('IPv4 address', re.compile(IPv4)), | |
| 602 | + # TODO: add IPv6 | |
| 603 | + ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@' + SERVER + '\b')), | |
| 604 | + # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')), | |
| 605 | + # Executable file name with known extensions (except .com which is present in many URLs, and .application): | |
| 606 | + ("Executable file name", re.compile( | |
| 607 | + r"(?i)\b\w+\.(EXE|PIF|GADGET|MSI|MSP|MSC|VBS|VBE|VB|JSE|JS|WSF|WSC|WSH|WS|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1XML|PS1|PS2XML|PS2|PSC1|PSC2|SCF|LNK|INF|REG)\b")), | |
| 608 | + # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ | |
| 609 | + # TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types | |
| 610 | + # TODO: add win & unix file paths | |
| 611 | + #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')), | |
| 612 | +) | |
| 613 | + | |
| 614 | +# regex to detect strings encoded in hexadecimal | |
| 615 | +re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | |
| 616 | + | |
| 617 | +# regex to detect strings encoded in base64 | |
| 618 | +#re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') | |
| 619 | +# better version from balbuzard, less false positives: | |
| 620 | +# (plain version without double quotes, used also below in quoted_base64_string) | |
| 621 | +BASE64_RE = r'(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?' | |
| 622 | +re_base64_string = re.compile('"' + BASE64_RE + '"') | |
| 623 | +# white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): | |
| 624 | +BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) | |
| 625 | + | |
| 626 | +# regex to detect strings encoded with a specific Dridex algorithm | |
| 627 | +# (see https://github.com/JamesHabben/MalwareStuff) | |
| 628 | +re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') | |
| 629 | +# regex to check that it is not just a hex string: | |
| 630 | +re_nothex_check = re.compile(r'[G-Zg-z]') | |
| 631 | + | |
| 632 | +# regex to extract printable strings (at least 5 chars) from VBA Forms: | |
| 633 | +re_printable_string = re.compile(rb'[\t\r\n\x20-\xFF]{5,}') | |
| 634 | + | |
| 635 | + | |
| 636 | +# === PARTIAL VBA GRAMMAR ==================================================== | |
| 637 | + | |
| 638 | +# REFERENCES: | |
| 639 | +# - [MS-VBAL]: VBA Language Specification | |
| 640 | +# https://msdn.microsoft.com/en-us/library/dd361851.aspx | |
| 641 | +# - pyparsing: http://pyparsing.wikispaces.com/ | |
| 642 | + | |
| 643 | +# TODO: set whitespaces according to VBA | |
| 644 | +# TODO: merge extended lines before parsing | |
| 645 | + | |
| 646 | +# VBA identifier chars (from MS-VBAL 3.3.5) | |
| 647 | +vba_identifier_chars = alphanums + '_' | |
| 648 | + | |
| 649 | +class VbaExpressionString(str): | |
| 650 | + """ | |
| 651 | + Class identical to str, used to distinguish plain strings from strings | |
| 652 | + obfuscated using VBA expressions (Chr, StrReverse, etc) | |
| 653 | + Usage: each VBA expression parse action should convert strings to | |
| 654 | + VbaExpressionString. | |
| 655 | + Then isinstance(s, VbaExpressionString) is True only for VBA expressions. | |
| 656 | + (see detect_vba_strings) | |
| 657 | + """ | |
| 658 | + # TODO: use Unicode everywhere instead of str | |
| 659 | + pass | |
| 660 | + | |
| 661 | + | |
| 662 | +# --- NUMBER TOKENS ---------------------------------------------------------- | |
| 663 | + | |
| 664 | +# 3.3.2 Number Tokens | |
| 665 | +# INTEGER = integer-literal ["%" / "&" / "^"] | |
| 666 | +# integer-literal = decimal-literal / octal-literal / hex-literal | |
| 667 | +# decimal-literal = 1*decimal-digit | |
| 668 | +# octal-literal = "&" [%x004F / %x006F] 1*octal-digit | |
| 669 | +# ; & or &o or &O | |
| 670 | +# hex-literal = "&" (%x0048 / %x0068) 1*hex-digit | |
| 671 | +# ; &h or &H | |
| 672 | +# octal-digit = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" | |
| 673 | +# decimal-digit = octal-digit / "8" / "9" | |
| 674 | +# hex-digit = decimal-digit / %x0041-0046 / %x0061-0066 ;A-F / a-f | |
| 675 | + | |
| 676 | +# NOTE: here Combine() is required to avoid spaces between elements | |
| 677 | +# NOTE: here WordStart is necessary to avoid matching a number preceded by | |
| 678 | +# letters or underscore (e.g. "VBT1" or "ABC_34"), when using scanString | |
| 679 | +decimal_literal = Combine(WordStart(vba_identifier_chars) + Word(nums) | |
| 680 | + + Suppress(Optional(Word('%&^', exact=1)))) | |
| 681 | +decimal_literal.setParseAction(lambda t: int(t[0])) | |
| 682 | + | |
| 683 | +octal_literal = Combine(Suppress(Literal('&') + Optional((CaselessLiteral('o')))) + Word(srange('[0-7]')) | |
| 684 | + + Suppress(Optional(Word('%&^', exact=1)))) | |
| 685 | +octal_literal.setParseAction(lambda t: int(t[0], base=8)) | |
| 686 | + | |
| 687 | +hex_literal = Combine(Suppress(CaselessLiteral('&h')) + Word(srange('[0-9a-fA-F]')) | |
| 688 | + + Suppress(Optional(Word('%&^', exact=1)))) | |
| 689 | +hex_literal.setParseAction(lambda t: int(t[0], base=16)) | |
| 690 | + | |
| 691 | +integer = decimal_literal | octal_literal | hex_literal | |
| 692 | + | |
| 693 | + | |
| 694 | +# --- QUOTED STRINGS --------------------------------------------------------- | |
| 695 | + | |
| 696 | +# 3.3.4 String Tokens | |
| 697 | +# STRING = double-quote *string-character (double-quote / line-continuation / LINE-END) | |
| 698 | +# double-quote = %x0022 ; " | |
| 699 | +# string-character = NO-LINE-CONTINUATION ((double-quote double-quote) termination-character) | |
| 700 | + | |
| 701 | +quoted_string = QuotedString('"', escQuote='""') | |
| 702 | +quoted_string.setParseAction(lambda t: str(t[0])) | |
| 703 | + | |
| 704 | + | |
| 705 | +#--- VBA Expressions --------------------------------------------------------- | |
| 706 | + | |
| 707 | +# See MS-VBAL 5.6 Expressions | |
| 708 | + | |
| 709 | +# need to pre-declare using Forward() because it is recursive | |
| 710 | +# VBA string expression and integer expression | |
| 711 | +vba_expr_str = Forward() | |
| 712 | +vba_expr_int = Forward() | |
| 713 | + | |
| 714 | +# --- CHR -------------------------------------------------------------------- | |
| 715 | + | |
| 716 | +# MS-VBAL 6.1.2.11.1.4 Chr / Chr$ | |
| 717 | +# Function Chr(CharCode As Long) As Variant | |
| 718 | +# Function Chr$(CharCode As Long) As String | |
| 719 | +# Parameter Description | |
| 720 | +# CharCode Long whose value is a code point. | |
| 721 | +# Returns a String data value consisting of a single character containing the character whose code | |
| 722 | +# point is the data value of the argument. | |
| 723 | +# - If the argument is not in the range 0 to 255, Error Number 5 ("Invalid procedure call or | |
| 724 | +# argument") is raised unless the implementation supports a character set with a larger code point | |
| 725 | +# range. | |
| 726 | +# - If the argument value is in the range of 0 to 127, it is interpreted as a 7-bit ASCII code point. | |
| 727 | +# - If the argument value is in the range of 128 to 255, the code point interpretation of the value is | |
| 728 | +# implementation defined. | |
| 729 | +# - Chr$ has the same runtime semantics as Chr, however the declared type of its function result is | |
| 730 | +# String rather than Variant. | |
| 731 | + | |
| 732 | +# 6.1.2.11.1.5 ChrB / ChrB$ | |
| 733 | +# Function ChrB(CharCode As Long) As Variant | |
| 734 | +# Function ChrB$(CharCode As Long) As String | |
| 735 | +# CharCode Long whose value is a code point. | |
| 736 | +# Returns a String data value consisting of a single byte character whose code point value is the | |
| 737 | +# data value of the argument. | |
| 738 | +# - If the argument is not in the range 0 to 255, Error Number 6 ("Overflow") is raised. | |
| 739 | +# - ChrB$ has the same runtime semantics as ChrB however the declared type of its function result | |
| 740 | +# is String rather than Variant. | |
| 741 | +# - Note: the ChrB function is used with byte data contained in a String. Instead of returning a | |
| 742 | +# character, which may be one or two bytes, ChrB always returns a single byte. The ChrW function | |
| 743 | +# returns a String containing the Unicode character except on platforms where Unicode is not | |
| 744 | +# supported, in which case, the behavior is identical to the Chr function. | |
| 745 | + | |
| 746 | +# 6.1.2.11.1.6 ChrW/ ChrW$ | |
| 747 | +# Function ChrW(CharCode As Long) As Variant | |
| 748 | +# Function ChrW$(CharCode As Long) As String | |
| 749 | +# CharCode Long whose value is a code point. | |
| 750 | +# Returns a String data value consisting of a single character containing the character whose code | |
| 751 | +# point is the data value of the argument. | |
| 752 | +# - If the argument is not in the range -32,767 to 65,535 then Error Number 5 ("Invalid procedure | |
| 753 | +# call or argument") is raised. | |
| 754 | +# - If the argument is a negative value it is treated as if it was the value: CharCode + 65,536. | |
| 755 | +# - If the implemented uses 16-bit Unicode code points argument, data value is interpreted as a 16- | |
| 756 | +# bit Unicode code point. | |
| 757 | +# - If the implementation does not support Unicode, ChrW has the same semantics as Chr. | |
| 758 | +# - ChrW$ has the same runtime semantics as ChrW, however the declared type of its function result | |
| 759 | +# is String rather than Variant. | |
| 760 | + | |
| 761 | +# Chr, Chr$, ChrB, ChrW(int) => char | |
| 762 | +vba_chr = Suppress( | |
| 763 | + Combine(WordStart(vba_identifier_chars) + CaselessLiteral('Chr') | |
| 764 | + + Optional(CaselessLiteral('B') | CaselessLiteral('W')) + Optional('$')) | |
| 765 | + + '(') + vba_expr_int + Suppress(')') | |
| 766 | + | |
| 767 | +def vba_chr_tostr(t): | |
| 768 | + try: | |
| 769 | + i = t[0] | |
| 770 | + # normal, non-unicode character: | |
| 771 | + if i>=0 and i<=255: | |
| 772 | + return VbaExpressionString(chr(i)) | |
| 773 | + else: | |
| 774 | + return VbaExpressionString(unichr(i).encode('utf-8', 'backslashreplace')) | |
| 775 | + except ValueError: | |
| 776 | + log.exception('ERROR: incorrect parameter value for chr(): %r' % i) | |
| 777 | + return VbaExpressionString('Chr(%r)' % i) | |
| 778 | + | |
| 779 | +vba_chr.setParseAction(vba_chr_tostr) | |
| 780 | + | |
| 781 | + | |
| 782 | +# --- ASC -------------------------------------------------------------------- | |
| 783 | + | |
| 784 | +# Asc(char) => int | |
| 785 | +#TODO: see MS-VBAL 6.1.2.11.1.1 page 240 => AscB, AscW | |
| 786 | +vba_asc = Suppress(CaselessKeyword('Asc') + '(') + vba_expr_str + Suppress(')') | |
| 787 | +vba_asc.setParseAction(lambda t: ord(t[0])) | |
| 788 | + | |
| 789 | + | |
| 790 | +# --- VAL -------------------------------------------------------------------- | |
| 791 | + | |
| 792 | +# Val(string) => int | |
| 793 | +# TODO: make sure the behavior of VBA's val is fully covered | |
| 794 | +vba_val = Suppress(CaselessKeyword('Val') + '(') + vba_expr_str + Suppress(')') | |
| 795 | +vba_val.setParseAction(lambda t: int(t[0].strip())) | |
| 796 | + | |
| 797 | + | |
| 798 | +# --- StrReverse() -------------------------------------------------------------------- | |
| 799 | + | |
| 800 | +# StrReverse(string) => string | |
| 801 | +strReverse = Suppress(CaselessKeyword('StrReverse') + '(') + vba_expr_str + Suppress(')') | |
| 802 | +strReverse.setParseAction(lambda t: VbaExpressionString(str(t[0])[::-1])) | |
| 803 | + | |
| 804 | + | |
| 805 | +# --- ENVIRON() -------------------------------------------------------------------- | |
| 806 | + | |
| 807 | +# Environ("name") => just translated to "%name%", that is enough for malware analysis | |
| 808 | +environ = Suppress(CaselessKeyword('Environ') + '(') + vba_expr_str + Suppress(')') | |
| 809 | +environ.setParseAction(lambda t: VbaExpressionString('%%%s%%' % t[0])) | |
| 810 | + | |
| 811 | + | |
| 812 | +# --- IDENTIFIER ------------------------------------------------------------- | |
| 813 | + | |
| 814 | +#TODO: see MS-VBAL 3.3.5 page 33 | |
| 815 | +# 3.3.5 Identifier Tokens | |
| 816 | +# Latin-identifier = first-Latin-identifier-character *subsequent-Latin-identifier-character | |
| 817 | +# first-Latin-identifier-character = (%x0041-005A / %x0061-007A) ; A-Z / a-z | |
| 818 | +# subsequent-Latin-identifier-character = first-Latin-identifier-character / DIGIT / %x5F ; underscore | |
| 819 | +latin_identifier = Word(initChars=alphas, bodyChars=alphanums + '_') | |
| 820 | + | |
| 821 | +# --- HEX FUNCTION ----------------------------------------------------------- | |
| 822 | + | |
| 823 | +# match any custom function name with a hex string as argument: | |
| 824 | +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime | |
| 825 | + | |
| 826 | +# quoted string of at least two hexadecimal numbers of two digits: | |
| 827 | +quoted_hex_string = Suppress('"') + Combine(Word(hexnums, exact=2) * (2, None)) + Suppress('"') | |
| 828 | +quoted_hex_string.setParseAction(lambda t: str(t[0])) | |
| 829 | + | |
| 830 | +hex_function_call = Suppress(latin_identifier) + Suppress('(') + \ | |
| 831 | + quoted_hex_string('hex_string') + Suppress(')') | |
| 832 | +hex_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_hex(t.hex_string))) | |
| 833 | + | |
| 834 | + | |
| 835 | +# --- BASE64 FUNCTION ----------------------------------------------------------- | |
| 836 | + | |
| 837 | +# match any custom function name with a Base64 string as argument: | |
| 838 | +# TODO: accept vba_expr_str_item as argument, check if it is a hex or base64 string at runtime | |
| 839 | + | |
| 840 | +# quoted string of at least two hexadecimal numbers of two digits: | |
| 841 | +quoted_base64_string = Suppress('"') + Regex(BASE64_RE) + Suppress('"') | |
| 842 | +quoted_base64_string.setParseAction(lambda t: str(t[0])) | |
| 843 | + | |
| 844 | +base64_function_call = Suppress(latin_identifier) + Suppress('(') + \ | |
| 845 | + quoted_base64_string('base64_string') + Suppress(')') | |
| 846 | +base64_function_call.setParseAction(lambda t: VbaExpressionString(binascii.a2b_base64(t.base64_string))) | |
| 847 | + | |
| 848 | + | |
| 849 | +# ---STRING EXPRESSION ------------------------------------------------------- | |
| 850 | + | |
| 851 | +def concat_strings_list(tokens): | |
| 852 | + """ | |
| 853 | + parse action to concatenate strings in a VBA expression with operators '+' or '&' | |
| 854 | + """ | |
| 855 | + # extract argument from the tokens: | |
| 856 | + # expected to be a tuple containing a list of strings such as [a,'&',b,'&',c,...] | |
| 857 | + strings = tokens[0][::2] | |
| 858 | + return VbaExpressionString(''.join(strings)) | |
| 859 | + | |
| 860 | + | |
| 861 | +vba_expr_str_item = (vba_chr | strReverse | environ | quoted_string | hex_function_call | base64_function_call) | |
| 862 | + | |
| 863 | +vba_expr_str <<= infixNotation(vba_expr_str_item, | |
| 864 | + [ | |
| 865 | + ("+", 2, opAssoc.LEFT, concat_strings_list), | |
| 866 | + ("&", 2, opAssoc.LEFT, concat_strings_list), | |
| 867 | + ]) | |
| 868 | + | |
| 869 | + | |
| 870 | +# --- INTEGER EXPRESSION ------------------------------------------------------- | |
| 871 | + | |
| 872 | +def sum_ints_list(tokens): | |
| 873 | + """ | |
| 874 | + parse action to sum integers in a VBA expression with operator '+' | |
| 875 | + """ | |
| 876 | + # extract argument from the tokens: | |
| 877 | + # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...] | |
| 878 | + integers = tokens[0][::2] | |
| 879 | + return sum(integers) | |
| 880 | + | |
| 881 | + | |
| 882 | +def subtract_ints_list(tokens): | |
| 883 | + """ | |
| 884 | + parse action to subtract integers in a VBA expression with operator '-' | |
| 885 | + """ | |
| 886 | + # extract argument from the tokens: | |
| 887 | + # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...] | |
| 888 | + integers = tokens[0][::2] | |
| 889 | + return reduce(lambda x,y:x-y, integers) | |
| 890 | + | |
| 891 | + | |
| 892 | +def multiply_ints_list(tokens): | |
| 893 | + """ | |
| 894 | + parse action to multiply integers in a VBA expression with operator '*' | |
| 895 | + """ | |
| 896 | + # extract argument from the tokens: | |
| 897 | + # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...] | |
| 898 | + integers = tokens[0][::2] | |
| 899 | + return reduce(lambda x,y:x*y, integers) | |
| 900 | + | |
| 901 | + | |
| 902 | +def divide_ints_list(tokens): | |
| 903 | + """ | |
| 904 | + parse action to divide integers in a VBA expression with operator '/' | |
| 905 | + """ | |
| 906 | + # extract argument from the tokens: | |
| 907 | + # expected to be a tuple containing a list of integers such as [a,'&',b,'&',c,...] | |
| 908 | + integers = tokens[0][::2] | |
| 909 | + return reduce(lambda x,y:x/y, integers) | |
| 910 | + | |
| 911 | + | |
| 912 | +vba_expr_int_item = (vba_asc | vba_val | integer) | |
| 913 | + | |
| 914 | +# operators associativity: | |
| 915 | +# https://en.wikipedia.org/wiki/Operator_associativity | |
| 916 | + | |
| 917 | +vba_expr_int <<= infixNotation(vba_expr_int_item, | |
| 918 | + [ | |
| 919 | + ("*", 2, opAssoc.LEFT, multiply_ints_list), | |
| 920 | + ("/", 2, opAssoc.LEFT, divide_ints_list), | |
| 921 | + ("-", 2, opAssoc.LEFT, subtract_ints_list), | |
| 922 | + ("+", 2, opAssoc.LEFT, sum_ints_list), | |
| 923 | + ]) | |
| 924 | + | |
| 925 | + | |
| 926 | +# see detect_vba_strings for the deobfuscation code using this grammar | |
| 927 | + | |
| 928 | +# === MSO/ActiveMime files parsing =========================================== | |
| 929 | + | |
| 930 | +def is_mso_file(data): | |
| 931 | + """ | |
| 932 | + Check if the provided data is the content of a MSO/ActiveMime file, such as | |
| 933 | + the ones created by Outlook in some cases, or Word/Excel when saving a | |
| 934 | + file with the MHTML format or the Word 2003 XML format. | |
| 935 | + This function only checks the ActiveMime magic at the beginning of data. | |
| 936 | + :param data: bytes string, MSO/ActiveMime file content | |
| 937 | + :return: bool, True if the file is MSO, False otherwise | |
| 938 | + """ | |
| 939 | + return data.startswith(MSO_ACTIVEMIME_HEADER) | |
| 940 | + | |
| 941 | + | |
| 942 | +# regex to find zlib block headers, starting with byte 0x78 = 'x' | |
| 943 | +re_zlib_header = re.compile(r'x') | |
| 944 | + | |
| 945 | + | |
| 946 | +def mso_file_extract(data): | |
| 947 | + """ | |
| 948 | + Extract the data stored into a MSO/ActiveMime file, such as | |
| 949 | + the ones created by Outlook in some cases, or Word/Excel when saving a | |
| 950 | + file with the MHTML format or the Word 2003 XML format. | |
| 951 | + | |
| 952 | + :param data: bytes string, MSO/ActiveMime file content | |
| 953 | + :return: bytes string, extracted data (uncompressed) | |
| 954 | + | |
| 955 | + raise a MsoExtractionError if the data cannot be extracted | |
| 956 | + """ | |
| 957 | + # check the magic: | |
| 958 | + assert is_mso_file(data) | |
| 959 | + | |
| 960 | + # In all the samples seen so far, Word always uses an offset of 0x32, | |
| 961 | + # and Excel 0x22A. But we read the offset from the header to be more | |
| 962 | + # generic. | |
| 963 | + offsets = [0x32, 0x22A] | |
| 964 | + | |
| 965 | + # First, attempt to get the compressed data offset from the header | |
| 966 | + # According to my tests, it should be an unsigned 16 bits integer, | |
| 967 | + # at offset 0x1E (little endian) + add 46: | |
| 968 | + try: | |
| 969 | + offset = struct.unpack_from('<H', data, offset=0x1E)[0] + 46 | |
| 970 | + log.debug('Parsing MSO file: data offset = 0x%X' % offset) | |
| 971 | + offsets.insert(0, offset) # insert at beginning of offsets | |
| 972 | + except struct.error as exc: | |
| 973 | + log.info('Unable to parse MSO/ActiveMime file header (%s)' % exc) | |
| 974 | + log.debug('Trace:', exc_info=True) | |
| 975 | + raise MsoExtractionError('Unable to parse MSO/ActiveMime file header') | |
| 976 | + # now try offsets | |
| 977 | + for start in offsets: | |
| 978 | + try: | |
| 979 | + log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start) | |
| 980 | + extracted_data = zlib.decompress(data[start:]) | |
| 981 | + return extracted_data | |
| 982 | + except zlib.error as exc: | |
| 983 | + log.info('zlib decompression failed for offset %s (%s)' | |
| 984 | + % (start, exc)) | |
| 985 | + log.debug('Trace:', exc_info=True) | |
| 986 | + # None of the guessed offsets worked, let's try brute-forcing by looking | |
| 987 | + # for potential zlib-compressed blocks starting with 0x78: | |
| 988 | + log.debug('Looking for potential zlib-compressed blocks in MSO file') | |
| 989 | + for match in re_zlib_header.finditer(data): | |
| 990 | + start = match.start() | |
| 991 | + try: | |
| 992 | + log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start) | |
| 993 | + extracted_data = zlib.decompress(data[start:]) | |
| 994 | + return extracted_data | |
| 995 | + except zlib.error as exc: | |
| 996 | + log.info('zlib decompression failed (%s)' % exc) | |
| 997 | + log.debug('Trace:', exc_info=True) | |
| 998 | + raise MsoExtractionError('Unable to decompress data from a MSO/ActiveMime file') | |
| 999 | + | |
| 1000 | + | |
| 1001 | +#--- FUNCTIONS ---------------------------------------------------------------- | |
| 1002 | + | |
| 1003 | +# set of printable characters, for is_printable | |
| 1004 | +_PRINTABLE_SET = set(string.printable) | |
| 1005 | + | |
| 1006 | +def is_printable(s): | |
| 1007 | + """ | |
| 1008 | + returns True if string s only contains printable ASCII characters | |
| 1009 | + (i.e. contained in string.printable) | |
| 1010 | + This is similar to Python 3's str.isprintable, for Python 2.x. | |
| 1011 | + :param s: str | |
| 1012 | + :return: bool | |
| 1013 | + """ | |
| 1014 | + # inspired from http://stackoverflow.com/questions/3636928/test-if-a-python-string-is-printable | |
| 1015 | + # check if the set of chars from s is contained into the set of printable chars: | |
| 1016 | + return set(s).issubset(_PRINTABLE_SET) | |
| 1017 | + | |
| 1018 | + | |
| 1019 | +def copytoken_help(decompressed_current, decompressed_chunk_start): | |
| 1020 | + """ | |
| 1021 | + compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help | |
| 1022 | + | |
| 1023 | + decompressed_current: number of decompressed bytes so far, i.e. len(decompressed_container) | |
| 1024 | + decompressed_chunk_start: offset of the current chunk in the decompressed container | |
| 1025 | + return length_mask, offset_mask, bit_count, maximum_length | |
| 1026 | + """ | |
| 1027 | + difference = decompressed_current - decompressed_chunk_start | |
| 1028 | + bit_count = int(math.ceil(math.log(difference, 2))) | |
| 1029 | + bit_count = max([bit_count, 4]) | |
| 1030 | + length_mask = 0xFFFF >> bit_count | |
| 1031 | + offset_mask = ~length_mask | |
| 1032 | + maximum_length = (0xFFFF >> bit_count) + 3 | |
| 1033 | + return length_mask, offset_mask, bit_count, maximum_length | |
| 1034 | + | |
| 1035 | + | |
| 1036 | +def decompress_stream(compressed_container): | |
| 1037 | + """ | |
| 1038 | + Decompress a stream according to MS-OVBA section 2.4.1 | |
| 1039 | + | |
| 1040 | + compressed_container: string compressed according to the MS-OVBA 2.4.1.3.6 Compression algorithm | |
| 1041 | + return the decompressed container as a string (bytes) | |
| 1042 | + """ | |
| 1043 | + # 2.4.1.2 State Variables | |
| 1044 | + | |
| 1045 | + # The following state is maintained for the CompressedContainer (section 2.4.1.1.1): | |
| 1046 | + # CompressedRecordEnd: The location of the byte after the last byte in the CompressedContainer (section 2.4.1.1.1). | |
| 1047 | + # CompressedCurrent: The location of the next byte in the CompressedContainer (section 2.4.1.1.1) to be read by | |
| 1048 | + # decompression or to be written by compression. | |
| 1049 | + | |
| 1050 | + # The following state is maintained for the current CompressedChunk (section 2.4.1.1.4): | |
| 1051 | + # CompressedChunkStart: The location of the first byte of the CompressedChunk (section 2.4.1.1.4) within the | |
| 1052 | + # CompressedContainer (section 2.4.1.1.1). | |
| 1053 | + | |
| 1054 | + # The following state is maintained for a DecompressedBuffer (section 2.4.1.1.2): | |
| 1055 | + # DecompressedCurrent: The location of the next byte in the DecompressedBuffer (section 2.4.1.1.2) to be written by | |
| 1056 | + # decompression or to be read by compression. | |
| 1057 | + # DecompressedBufferEnd: The location of the byte after the last byte in the DecompressedBuffer (section 2.4.1.1.2). | |
| 1058 | + | |
| 1059 | + # The following state is maintained for the current DecompressedChunk (section 2.4.1.1.3): | |
| 1060 | + # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the | |
| 1061 | + # DecompressedBuffer (section 2.4.1.1.2). | |
| 1062 | + | |
| 1063 | + decompressed_container = b'' # result | |
| 1064 | + compressed_current = 0 | |
| 1065 | + | |
| 1066 | + sig_byte = compressed_container[compressed_current] | |
| 1067 | + if sig_byte != 0x01: | |
| 1068 | + raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) | |
| 1069 | + | |
| 1070 | + compressed_current += 1 | |
| 1071 | + | |
| 1072 | + #NOTE: the definition of CompressedRecordEnd is ambiguous. Here we assume that | |
| 1073 | + # CompressedRecordEnd = len(compressed_container) | |
| 1074 | + while compressed_current < len(compressed_container): | |
| 1075 | + # 2.4.1.1.5 | |
| 1076 | + compressed_chunk_start = compressed_current | |
| 1077 | + # chunk header = first 16 bits | |
| 1078 | + compressed_chunk_header = \ | |
| 1079 | + struct.unpack("<H", compressed_container[compressed_chunk_start:compressed_chunk_start + 2])[0] | |
| 1080 | + # chunk size = 12 first bits of header + 3 | |
| 1081 | + chunk_size = (compressed_chunk_header & 0x0FFF) + 3 | |
| 1082 | + # chunk signature = 3 next bits - should always be 0b011 | |
| 1083 | + chunk_signature = (compressed_chunk_header >> 12) & 0x07 | |
| 1084 | + if chunk_signature != 0b011: | |
| 1085 | + raise ValueError('Invalid CompressedChunkSignature in VBA compressed stream') | |
| 1086 | + # chunk flag = next bit - 1 == compressed, 0 == uncompressed | |
| 1087 | + chunk_flag = (compressed_chunk_header >> 15) & 0x01 | |
| 1088 | + log.debug("chunk size = {0}, compressed flag = {1}".format(chunk_size, chunk_flag)) | |
| 1089 | + | |
| 1090 | + #MS-OVBA 2.4.1.3.12: the maximum size of a chunk including its header is 4098 bytes (header 2 + data 4096) | |
| 1091 | + # The minimum size is 3 bytes | |
| 1092 | + # NOTE: there seems to be a typo in MS-OVBA, the check should be with 4098, not 4095 (which is the max value | |
| 1093 | + # in chunk header before adding 3. | |
| 1094 | + # Also the first test is not useful since a 12 bits value cannot be larger than 4095. | |
| 1095 | + if chunk_flag == 1 and chunk_size > 4098: | |
| 1096 | + raise ValueError('CompressedChunkSize > 4098 but CompressedChunkFlag == 1') | |
| 1097 | + if chunk_flag == 0 and chunk_size != 4098: | |
| 1098 | + raise ValueError('CompressedChunkSize != 4098 but CompressedChunkFlag == 0') | |
| 1099 | + | |
| 1100 | + # check if chunk_size goes beyond the compressed data, instead of silently cutting it: | |
| 1101 | + #TODO: raise an exception? | |
| 1102 | + if compressed_chunk_start + chunk_size > len(compressed_container): | |
| 1103 | + log.warning('Chunk size is larger than remaining compressed data') | |
| 1104 | + compressed_end = min([len(compressed_container), compressed_chunk_start + chunk_size]) | |
| 1105 | + # read after chunk header: | |
| 1106 | + compressed_current = compressed_chunk_start + 2 | |
| 1107 | + | |
| 1108 | + if chunk_flag == 0: | |
| 1109 | + # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk | |
| 1110 | + # uncompressed chunk: read the next 4096 bytes as-is | |
| 1111 | + #TODO: check if there are at least 4096 bytes left | |
| 1112 | + decompressed_container += bytes([compressed_container[compressed_current:compressed_current + 4096]]) | |
| 1113 | + compressed_current += 4096 | |
| 1114 | + else: | |
| 1115 | + # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk | |
| 1116 | + # compressed chunk | |
| 1117 | + decompressed_chunk_start = len(decompressed_container) | |
| 1118 | + while compressed_current < compressed_end: | |
| 1119 | + # MS-OVBA 2.4.1.3.4 Decompressing a TokenSequence | |
| 1120 | + # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) | |
| 1121 | + # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or | |
| 1122 | + # copy tokens (reference to a previous literal token) | |
| 1123 | + flag_byte = compressed_container[compressed_current] | |
| 1124 | + compressed_current += 1 | |
| 1125 | + for bit_index in range(0, 8): | |
| 1126 | + # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) | |
| 1127 | + if compressed_current >= compressed_end: | |
| 1128 | + break | |
| 1129 | + # MS-OVBA 2.4.1.3.5 Decompressing a Token | |
| 1130 | + # MS-OVBA 2.4.1.3.17 Extract FlagBit | |
| 1131 | + flag_bit = (flag_byte >> bit_index) & 1 | |
| 1132 | + #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) | |
| 1133 | + if flag_bit == 0: # LiteralToken | |
| 1134 | + # copy one byte directly to output | |
| 1135 | + decompressed_container += bytes([compressed_container[compressed_current]]) | |
| 1136 | + compressed_current += 1 | |
| 1137 | + else: # CopyToken | |
| 1138 | + # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken | |
| 1139 | + copy_token = \ | |
| 1140 | + struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0] | |
| 1141 | + #TODO: check this | |
| 1142 | + length_mask, offset_mask, bit_count, _ = copytoken_help( | |
| 1143 | + len(decompressed_container), decompressed_chunk_start) | |
| 1144 | + length = (copy_token & length_mask) + 3 | |
| 1145 | + temp1 = copy_token & offset_mask | |
| 1146 | + temp2 = 16 - bit_count | |
| 1147 | + offset = (temp1 >> temp2) + 1 | |
| 1148 | + #log.debug('offset=%d length=%d' % (offset, length)) | |
| 1149 | + copy_source = len(decompressed_container) - offset | |
| 1150 | + for index in range(copy_source, copy_source + length): | |
| 1151 | + decompressed_container += bytes([decompressed_container[index]]) | |
| 1152 | + compressed_current += 2 | |
| 1153 | + return decompressed_container | |
| 1154 | + | |
| 1155 | + | |
| 1156 | +def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | |
| 1157 | + """ | |
| 1158 | + Extract VBA macros from an OleFileIO object. | |
| 1159 | + Internal function, do not call directly. | |
| 1160 | + | |
| 1161 | + vba_root: path to the VBA root storage, containing the VBA storage and the PROJECT stream | |
| 1162 | + vba_project: path to the PROJECT stream | |
| 1163 | + :param relaxed: If True, only create info/debug log entry if data is not as expected | |
| 1164 | + (e.g. opening substream fails); if False, raise an error in this case | |
| 1165 | + This is a generator, yielding (stream path, VBA filename, VBA source code) for each VBA code stream | |
| 1166 | + """ | |
| 1167 | + # Open the PROJECT stream: | |
| 1168 | + project = ole.openstream(project_path) | |
| 1169 | + log.debug('relaxed is %s' % relaxed) | |
| 1170 | + | |
| 1171 | + # sample content of the PROJECT stream: | |
| 1172 | + | |
| 1173 | + ## ID="{5312AC8A-349D-4950-BDD0-49BE3C4DD0F0}" | |
| 1174 | + ## Document=ThisDocument/&H00000000 | |
| 1175 | + ## Module=NewMacros | |
| 1176 | + ## Name="Project" | |
| 1177 | + ## HelpContextID="0" | |
| 1178 | + ## VersionCompatible32="393222000" | |
| 1179 | + ## CMG="F1F301E705E705E705E705" | |
| 1180 | + ## DPB="8F8D7FE3831F2020202020" | |
| 1181 | + ## GC="2D2FDD81E51EE61EE6E1" | |
| 1182 | + ## | |
| 1183 | + ## [Host Extender Info] | |
| 1184 | + ## &H00000001={3832D640-CF90-11CF-8E43-00A0C911005A};VBE;&H00000000 | |
| 1185 | + ## &H00000002={000209F2-0000-0000-C000-000000000046};Word8.0;&H00000000 | |
| 1186 | + ## | |
| 1187 | + ## [Workspace] | |
| 1188 | + ## ThisDocument=22, 29, 339, 477, Z | |
| 1189 | + ## NewMacros=-4, 42, 832, 510, C | |
| 1190 | + | |
| 1191 | + code_modules = {} | |
| 1192 | + | |
| 1193 | + for line in project: | |
| 1194 | + line = line.strip().decode('utf-8','ignore') | |
| 1195 | + if '=' in line: | |
| 1196 | + # split line at the 1st equal sign: | |
| 1197 | + name, value = line.split('=', 1) | |
| 1198 | + # looking for code modules | |
| 1199 | + # add the code module as a key in the dictionary | |
| 1200 | + # the value will be the extension needed later | |
| 1201 | + # The value is converted to lowercase, to allow case-insensitive matching (issue #3) | |
| 1202 | + value = value.lower() | |
| 1203 | + if name == 'Document': | |
| 1204 | + # split value at the 1st slash, keep 1st part: | |
| 1205 | + value = value.split('/', 1)[0] | |
| 1206 | + code_modules[value] = CLASS_EXTENSION | |
| 1207 | + elif name == 'Module': | |
| 1208 | + code_modules[value] = MODULE_EXTENSION | |
| 1209 | + elif name == 'Class': | |
| 1210 | + code_modules[value] = CLASS_EXTENSION | |
| 1211 | + elif name == 'BaseClass': | |
| 1212 | + code_modules[value] = FORM_EXTENSION | |
| 1213 | + | |
| 1214 | + # read data from dir stream (compressed) | |
| 1215 | + dir_compressed = ole.openstream(dir_path).read() | |
| 1216 | + | |
| 1217 | + def check_value(name, expected, value): | |
| 1218 | + if expected != value: | |
| 1219 | + if relaxed: | |
| 1220 | + log.error("invalid value for {0} expected {1:04X} got {2:04X}" | |
| 1221 | + .format(name, expected, value)) | |
| 1222 | + else: | |
| 1223 | + raise UnexpectedDataError(dir_path, name, expected, value) | |
| 1224 | + | |
| 1225 | + dir_stream = BytesIO(decompress_stream(dir_compressed)) | |
| 1226 | + | |
| 1227 | + # PROJECTSYSKIND Record | |
| 1228 | + projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1229 | + check_value('PROJECTSYSKIND_Id', 0x0001, projectsyskind_id) | |
| 1230 | + projectsyskind_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1231 | + check_value('PROJECTSYSKIND_Size', 0x0004, projectsyskind_size) | |
| 1232 | + projectsyskind_syskind = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1233 | + if projectsyskind_syskind == 0x00: | |
| 1234 | + log.debug("16-bit Windows") | |
| 1235 | + elif projectsyskind_syskind == 0x01: | |
| 1236 | + log.debug("32-bit Windows") | |
| 1237 | + elif projectsyskind_syskind == 0x02: | |
| 1238 | + log.debug("Macintosh") | |
| 1239 | + elif projectsyskind_syskind == 0x03: | |
| 1240 | + log.debug("64-bit Windows") | |
| 1241 | + else: | |
| 1242 | + log.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(projectsyskind_syskind)) | |
| 1243 | + | |
| 1244 | + # PROJECTLCID Record | |
| 1245 | + projectlcid_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1246 | + check_value('PROJECTLCID_Id', 0x0002, projectlcid_id) | |
| 1247 | + projectlcid_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1248 | + check_value('PROJECTLCID_Size', 0x0004, projectlcid_size) | |
| 1249 | + projectlcid_lcid = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1250 | + check_value('PROJECTLCID_Lcid', 0x409, projectlcid_lcid) | |
| 1251 | + | |
| 1252 | + # PROJECTLCIDINVOKE Record | |
| 1253 | + projectlcidinvoke_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1254 | + check_value('PROJECTLCIDINVOKE_Id', 0x0014, projectlcidinvoke_id) | |
| 1255 | + projectlcidinvoke_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1256 | + check_value('PROJECTLCIDINVOKE_Size', 0x0004, projectlcidinvoke_size) | |
| 1257 | + projectlcidinvoke_lcidinvoke = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1258 | + check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, projectlcidinvoke_lcidinvoke) | |
| 1259 | + | |
| 1260 | + # PROJECTCODEPAGE Record | |
| 1261 | + projectcodepage_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1262 | + check_value('PROJECTCODEPAGE_Id', 0x0003, projectcodepage_id) | |
| 1263 | + projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1264 | + check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size) | |
| 1265 | + projectcodepage_codepage = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1266 | + | |
| 1267 | + # PROJECTNAME Record | |
| 1268 | + projectname_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1269 | + check_value('PROJECTNAME_Id', 0x0004, projectname_id) | |
| 1270 | + projectname_sizeof_projectname = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1271 | + if projectname_sizeof_projectname < 1 or projectname_sizeof_projectname > 128: | |
| 1272 | + log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(projectname_sizeof_projectname)) | |
| 1273 | + projectname_projectname = dir_stream.read(projectname_sizeof_projectname) | |
| 1274 | + unused = projectname_projectname | |
| 1275 | + | |
| 1276 | + # PROJECTDOCSTRING Record | |
| 1277 | + projectdocstring_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1278 | + check_value('PROJECTDOCSTRING_Id', 0x0005, projectdocstring_id) | |
| 1279 | + projectdocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1280 | + if projectdocstring_sizeof_docstring > 2000: | |
| 1281 | + log.error( | |
| 1282 | + "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring)) | |
| 1283 | + projectdocstring_docstring = dir_stream.read(projectdocstring_sizeof_docstring) | |
| 1284 | + projectdocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1285 | + check_value('PROJECTDOCSTRING_Reserved', 0x0040, projectdocstring_reserved) | |
| 1286 | + projectdocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1287 | + if projectdocstring_sizeof_docstring_unicode % 2 != 0: | |
| 1288 | + log.error("PROJECTDOCSTRING_SizeOfDocStringUnicode is not even") | |
| 1289 | + projectdocstring_docstring_unicode = dir_stream.read(projectdocstring_sizeof_docstring_unicode) | |
| 1290 | + unused = projectdocstring_docstring | |
| 1291 | + unused = projectdocstring_docstring_unicode | |
| 1292 | + | |
| 1293 | + # PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7 | |
| 1294 | + projecthelpfilepath_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1295 | + check_value('PROJECTHELPFILEPATH_Id', 0x0006, projecthelpfilepath_id) | |
| 1296 | + projecthelpfilepath_sizeof_helpfile1 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1297 | + if projecthelpfilepath_sizeof_helpfile1 > 260: | |
| 1298 | + log.error( | |
| 1299 | + "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1)) | |
| 1300 | + projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1) | |
| 1301 | + projecthelpfilepath_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1302 | + check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, projecthelpfilepath_reserved) | |
| 1303 | + projecthelpfilepath_sizeof_helpfile2 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1304 | + if projecthelpfilepath_sizeof_helpfile2 != projecthelpfilepath_sizeof_helpfile1: | |
| 1305 | + log.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 does not equal PROJECTHELPFILEPATH_SizeOfHelpFile2") | |
| 1306 | + projecthelpfilepath_helpfile2 = dir_stream.read(projecthelpfilepath_sizeof_helpfile2) | |
| 1307 | + if projecthelpfilepath_helpfile2 != projecthelpfilepath_helpfile1: | |
| 1308 | + log.error("PROJECTHELPFILEPATH_HelpFile1 does not equal PROJECTHELPFILEPATH_HelpFile2") | |
| 1309 | + | |
| 1310 | + # PROJECTHELPCONTEXT Record | |
| 1311 | + projecthelpcontext_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1312 | + check_value('PROJECTHELPCONTEXT_Id', 0x0007, projecthelpcontext_id) | |
| 1313 | + projecthelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1314 | + check_value('PROJECTHELPCONTEXT_Size', 0x0004, projecthelpcontext_size) | |
| 1315 | + projecthelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1316 | + unused = projecthelpcontext_helpcontext | |
| 1317 | + | |
| 1318 | + # PROJECTLIBFLAGS Record | |
| 1319 | + projectlibflags_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1320 | + check_value('PROJECTLIBFLAGS_Id', 0x0008, projectlibflags_id) | |
| 1321 | + projectlibflags_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1322 | + check_value('PROJECTLIBFLAGS_Size', 0x0004, projectlibflags_size) | |
| 1323 | + projectlibflags_projectlibflags = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1324 | + check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, projectlibflags_projectlibflags) | |
| 1325 | + | |
| 1326 | + # PROJECTVERSION Record | |
| 1327 | + projectversion_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1328 | + check_value('PROJECTVERSION_Id', 0x0009, projectversion_id) | |
| 1329 | + projectversion_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1330 | + check_value('PROJECTVERSION_Reserved', 0x0004, projectversion_reserved) | |
| 1331 | + projectversion_versionmajor = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1332 | + projectversion_versionminor = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1333 | + unused = projectversion_versionmajor | |
| 1334 | + unused = projectversion_versionminor | |
| 1335 | + | |
| 1336 | + # PROJECTCONSTANTS Record | |
| 1337 | + projectconstants_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1338 | + check_value('PROJECTCONSTANTS_Id', 0x000C, projectconstants_id) | |
| 1339 | + projectconstants_sizeof_constants = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1340 | + if projectconstants_sizeof_constants > 1015: | |
| 1341 | + log.error( | |
| 1342 | + "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants)) | |
| 1343 | + projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants) | |
| 1344 | + projectconstants_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1345 | + check_value('PROJECTCONSTANTS_Reserved', 0x003C, projectconstants_reserved) | |
| 1346 | + projectconstants_sizeof_constants_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1347 | + if projectconstants_sizeof_constants_unicode % 2 != 0: | |
| 1348 | + log.error("PROJECTCONSTANTS_SizeOfConstantsUnicode is not even") | |
| 1349 | + projectconstants_constants_unicode = dir_stream.read(projectconstants_sizeof_constants_unicode) | |
| 1350 | + unused = projectconstants_constants | |
| 1351 | + unused = projectconstants_constants_unicode | |
| 1352 | + | |
| 1353 | + # array of REFERENCE records | |
| 1354 | + check = None | |
| 1355 | + while True: | |
| 1356 | + check = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1357 | + log.debug("reference type = {0:04X}".format(check)) | |
| 1358 | + if check == 0x000F: | |
| 1359 | + break | |
| 1360 | + | |
| 1361 | + if check == 0x0016: | |
| 1362 | + # REFERENCENAME | |
| 1363 | + reference_id = check | |
| 1364 | + reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1365 | + reference_name = dir_stream.read(reference_sizeof_name) | |
| 1366 | + reference_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1367 | + if reference_reserved not in (0x003E, 0x000D): | |
| 1368 | + raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved', | |
| 1369 | + (0x003E, 0x000D), value) | |
| 1370 | + reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1371 | + reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode) | |
| 1372 | + unused = reference_id | |
| 1373 | + unused = reference_name | |
| 1374 | + unused = reference_name_unicode | |
| 1375 | + continue | |
| 1376 | + | |
| 1377 | + if check == 0x0033: | |
| 1378 | + # REFERENCEORIGINAL (followed by REFERENCECONTROL) | |
| 1379 | + referenceoriginal_id = check | |
| 1380 | + referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1381 | + referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal) | |
| 1382 | + unused = referenceoriginal_id | |
| 1383 | + unused = referenceoriginal_libidoriginal | |
| 1384 | + continue | |
| 1385 | + | |
| 1386 | + if check == 0x002F: | |
| 1387 | + # REFERENCECONTROL | |
| 1388 | + referencecontrol_id = check | |
| 1389 | + referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 1390 | + referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1391 | + referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled) | |
| 1392 | + referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 1393 | + check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1) | |
| 1394 | + referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore | |
| 1395 | + check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2) | |
| 1396 | + unused = referencecontrol_id | |
| 1397 | + unused = referencecontrol_sizetwiddled | |
| 1398 | + unused = referencecontrol_libidtwiddled | |
| 1399 | + # optional field | |
| 1400 | + check2 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1401 | + if check2 == 0x0016: | |
| 1402 | + referencecontrol_namerecordextended_id = check | |
| 1403 | + referencecontrol_namerecordextended_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1404 | + referencecontrol_namerecordextended_name = dir_stream.read( | |
| 1405 | + referencecontrol_namerecordextended_sizeof_name) | |
| 1406 | + referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1407 | + check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, | |
| 1408 | + referencecontrol_namerecordextended_reserved) | |
| 1409 | + referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1410 | + referencecontrol_namerecordextended_name_unicode = dir_stream.read( | |
| 1411 | + referencecontrol_namerecordextended_sizeof_name_unicode) | |
| 1412 | + referencecontrol_reserved3 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1413 | + unused = referencecontrol_namerecordextended_id | |
| 1414 | + unused = referencecontrol_namerecordextended_name | |
| 1415 | + unused = referencecontrol_namerecordextended_name_unicode | |
| 1416 | + else: | |
| 1417 | + referencecontrol_reserved3 = check2 | |
| 1418 | + | |
| 1419 | + check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3) | |
| 1420 | + referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1421 | + referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1422 | + referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended) | |
| 1423 | + referencecontrol_reserved4 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1424 | + referencecontrol_reserved5 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1425 | + referencecontrol_originaltypelib = dir_stream.read(16) | |
| 1426 | + referencecontrol_cookie = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1427 | + unused = referencecontrol_sizeextended | |
| 1428 | + unused = referencecontrol_libidextended | |
| 1429 | + unused = referencecontrol_reserved4 | |
| 1430 | + unused = referencecontrol_reserved5 | |
| 1431 | + unused = referencecontrol_originaltypelib | |
| 1432 | + unused = referencecontrol_cookie | |
| 1433 | + continue | |
| 1434 | + | |
| 1435 | + if check == 0x000D: | |
| 1436 | + # REFERENCEREGISTERED | |
| 1437 | + referenceregistered_id = check | |
| 1438 | + referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1439 | + referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1440 | + referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid) | |
| 1441 | + referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1442 | + check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1) | |
| 1443 | + referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1444 | + check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2) | |
| 1445 | + unused = referenceregistered_id | |
| 1446 | + unused = referenceregistered_size | |
| 1447 | + unused = referenceregistered_libid | |
| 1448 | + continue | |
| 1449 | + | |
| 1450 | + if check == 0x000E: | |
| 1451 | + # REFERENCEPROJECT | |
| 1452 | + referenceproject_id = check | |
| 1453 | + referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1454 | + referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1455 | + referenceproject_libidabsolute = dir_stream.read(referenceproject_sizeof_libidabsolute) | |
| 1456 | + referenceproject_sizeof_libidrelative = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1457 | + referenceproject_libidrelative = dir_stream.read(referenceproject_sizeof_libidrelative) | |
| 1458 | + referenceproject_majorversion = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1459 | + referenceproject_minorversion = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1460 | + unused = referenceproject_id | |
| 1461 | + unused = referenceproject_size | |
| 1462 | + unused = referenceproject_libidabsolute | |
| 1463 | + unused = referenceproject_libidrelative | |
| 1464 | + unused = referenceproject_majorversion | |
| 1465 | + unused = referenceproject_minorversion | |
| 1466 | + continue | |
| 1467 | + | |
| 1468 | + log.error('invalid or unknown check Id {0:04X}'.format(check)) | |
| 1469 | + sys.exit(0) | |
| 1470 | + | |
| 1471 | + projectmodules_id = check #struct.unpack("<H", dir_stream.read(2))[0] | |
| 1472 | + check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id) | |
| 1473 | + projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1474 | + check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size) | |
| 1475 | + projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1476 | + projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1477 | + check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id) | |
| 1478 | + projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1479 | + check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, projectmodules_projectcookierecord_size) | |
| 1480 | + projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1481 | + unused = projectmodules_projectcookierecord_cookie | |
| 1482 | + | |
| 1483 | + # short function to simplify unicode text output | |
| 1484 | + uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') | |
| 1485 | + | |
| 1486 | + log.debug("parsing {0} modules".format(projectmodules_count)) | |
| 1487 | + for projectmodule_index in range(0, projectmodules_count): | |
| 1488 | + try: | |
| 1489 | + modulename_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1490 | + check_value('MODULENAME_Id', 0x0019, modulename_id) | |
| 1491 | + modulename_sizeof_modulename = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1492 | + modulename_modulename = dir_stream.read(modulename_sizeof_modulename) | |
| 1493 | + # TODO: preset variables to avoid "referenced before assignment" errors | |
| 1494 | + modulename_unicode_modulename_unicode = '' | |
| 1495 | + # account for optional sections | |
| 1496 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1497 | + if section_id == 0x0047: | |
| 1498 | + modulename_unicode_id = section_id | |
| 1499 | + modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1500 | + modulename_unicode_modulename_unicode = dir_stream.read( | |
| 1501 | + modulename_unicode_sizeof_modulename_unicode).decode('UTF-16LE', 'replace') | |
| 1502 | + # just guessing that this is the same encoding as used in OleFileIO | |
| 1503 | + unused = modulename_unicode_id | |
| 1504 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1505 | + if section_id == 0x001A: | |
| 1506 | + modulestreamname_id = section_id | |
| 1507 | + modulestreamname_sizeof_streamname = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1508 | + modulestreamname_streamname = dir_stream.read(modulestreamname_sizeof_streamname) | |
| 1509 | + modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1510 | + check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved) | |
| 1511 | + modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1512 | + modulestreamname_streamname_unicode = dir_stream.read( | |
| 1513 | + modulestreamname_sizeof_streamname_unicode).decode('UTF-16LE', 'replace') | |
| 1514 | + # just guessing that this is the same encoding as used in OleFileIO | |
| 1515 | + unused = modulestreamname_id | |
| 1516 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1517 | + if section_id == 0x001C: | |
| 1518 | + moduledocstring_id = section_id | |
| 1519 | + check_value('MODULEDOCSTRING_Id', 0x001C, moduledocstring_id) | |
| 1520 | + moduledocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1521 | + moduledocstring_docstring = dir_stream.read(moduledocstring_sizeof_docstring) | |
| 1522 | + moduledocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1523 | + check_value('MODULEDOCSTRING_Reserved', 0x0048, moduledocstring_reserved) | |
| 1524 | + moduledocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1525 | + moduledocstring_docstring_unicode = dir_stream.read(moduledocstring_sizeof_docstring_unicode) | |
| 1526 | + unused = moduledocstring_docstring | |
| 1527 | + unused = moduledocstring_docstring_unicode | |
| 1528 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1529 | + if section_id == 0x0031: | |
| 1530 | + moduleoffset_id = section_id | |
| 1531 | + check_value('MODULEOFFSET_Id', 0x0031, moduleoffset_id) | |
| 1532 | + moduleoffset_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1533 | + check_value('MODULEOFFSET_Size', 0x0004, moduleoffset_size) | |
| 1534 | + moduleoffset_textoffset = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1535 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1536 | + if section_id == 0x001E: | |
| 1537 | + modulehelpcontext_id = section_id | |
| 1538 | + check_value('MODULEHELPCONTEXT_Id', 0x001E, modulehelpcontext_id) | |
| 1539 | + modulehelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1540 | + check_value('MODULEHELPCONTEXT_Size', 0x0004, modulehelpcontext_size) | |
| 1541 | + modulehelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1542 | + unused = modulehelpcontext_helpcontext | |
| 1543 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1544 | + if section_id == 0x002C: | |
| 1545 | + modulecookie_id = section_id | |
| 1546 | + check_value('MODULECOOKIE_Id', 0x002C, modulecookie_id) | |
| 1547 | + modulecookie_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1548 | + check_value('MODULECOOKIE_Size', 0x0002, modulecookie_size) | |
| 1549 | + modulecookie_cookie = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1550 | + unused = modulecookie_cookie | |
| 1551 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1552 | + if section_id == 0x0021 or section_id == 0x0022: | |
| 1553 | + moduletype_id = section_id | |
| 1554 | + moduletype_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1555 | + unused = moduletype_id | |
| 1556 | + unused = moduletype_reserved | |
| 1557 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1558 | + if section_id == 0x0025: | |
| 1559 | + modulereadonly_id = section_id | |
| 1560 | + check_value('MODULEREADONLY_Id', 0x0025, modulereadonly_id) | |
| 1561 | + modulereadonly_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1562 | + check_value('MODULEREADONLY_Reserved', 0x0000, modulereadonly_reserved) | |
| 1563 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1564 | + if section_id == 0x0028: | |
| 1565 | + moduleprivate_id = section_id | |
| 1566 | + check_value('MODULEPRIVATE_Id', 0x0028, moduleprivate_id) | |
| 1567 | + moduleprivate_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1568 | + check_value('MODULEPRIVATE_Reserved', 0x0000, moduleprivate_reserved) | |
| 1569 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1570 | + if section_id == 0x002B: # TERMINATOR | |
| 1571 | + module_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1572 | + check_value('MODULE_Reserved', 0x0000, module_reserved) | |
| 1573 | + section_id = None | |
| 1574 | + if section_id != None: | |
| 1575 | + log.warning('unknown or invalid module section id {0:04X}'.format(section_id)) | |
| 1576 | + | |
| 1577 | + log.debug('Project CodePage = %d' % projectcodepage_codepage) | |
| 1578 | + if projectcodepage_codepage in MAC_CODEPAGES: | |
| 1579 | + vba_codec = MAC_CODEPAGES[projectcodepage_codepage] | |
| 1580 | + else: | |
| 1581 | + vba_codec = 'cp%d' % projectcodepage_codepage | |
| 1582 | + log.debug("ModuleName = {0}".format(modulename_modulename)) | |
| 1583 | + log.debug("ModuleNameUnicode = {0}".format(uni_out(modulename_unicode_modulename_unicode))) | |
| 1584 | + log.debug("StreamName = {0}".format(modulestreamname_streamname)) | |
| 1585 | + try: | |
| 1586 | + streamname_unicode = modulestreamname_streamname.decode(vba_codec) | |
| 1587 | + except UnicodeError as ue: | |
| 1588 | + log.debug('failed to decode stream name {0!r} with codec {1}' | |
| 1589 | + .format(uni_out(streamname_unicode), vba_codec)) | |
| 1590 | + streamname_unicode = modulestreamname_streamname.decode(vba_codec, errors='replace') | |
| 1591 | + log.debug("StreamName.decode('%s') = %s" % (vba_codec, uni_out(streamname_unicode))) | |
| 1592 | + log.debug("StreamNameUnicode = {0}".format(uni_out(modulestreamname_streamname_unicode))) | |
| 1593 | + log.debug("TextOffset = {0}".format(moduleoffset_textoffset)) | |
| 1594 | + | |
| 1595 | + code_data = None | |
| 1596 | + try_names = streamname_unicode, \ | |
| 1597 | + modulename_unicode_modulename_unicode, \ | |
| 1598 | + modulestreamname_streamname_unicode | |
| 1599 | + for stream_name in try_names: | |
| 1600 | + # TODO: if olefile._find were less private, could replace this | |
| 1601 | + # try-except with calls to it | |
| 1602 | + try: | |
| 1603 | + code_path = vba_root + u'VBA/' + stream_name | |
| 1604 | + log.debug('opening VBA code stream %s' % uni_out(code_path)) | |
| 1605 | + code_data = ole.openstream(code_path).read() | |
| 1606 | + break | |
| 1607 | + except IOError as ioe: | |
| 1608 | + log.debug('failed to open stream VBA/%r (%r), try other name' | |
| 1609 | + % (uni_out(stream_name), ioe)) | |
| 1610 | + | |
| 1611 | + if code_data is None: | |
| 1612 | + log.info("Could not open stream %d of %d ('VBA/' + one of %r)!" | |
| 1613 | + % (projectmodule_index, projectmodules_count, | |
| 1614 | + '/'.join("'" + uni_out(stream_name) + "'" | |
| 1615 | + for stream_name in try_names))) | |
| 1616 | + if relaxed: | |
| 1617 | + continue # ... with next submodule | |
| 1618 | + else: | |
| 1619 | + raise SubstreamOpenError('[BASE]', 'VBA/' + | |
| 1620 | + uni_out(modulename_unicode_modulename_unicode)) | |
| 1621 | + | |
| 1622 | + log.debug("length of code_data = {0}".format(len(code_data))) | |
| 1623 | + log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) | |
| 1624 | + code_data = code_data[moduleoffset_textoffset:] | |
| 1625 | + if len(code_data) > 0: | |
| 1626 | + code_data = decompress_stream(code_data) | |
| 1627 | + # case-insensitive search in the code_modules dict to find the file extension: | |
| 1628 | + filext = code_modules.get(modulename_modulename.lower(), 'bin') | |
| 1629 | + filename = '{0}.{1}'.format(modulename_modulename, filext) | |
| 1630 | + #TODO: also yield the codepage so that callers can decode it properly | |
| 1631 | + yield (code_path, filename, code_data) | |
| 1632 | + # print '-'*79 | |
| 1633 | + # print filename | |
| 1634 | + # print '' | |
| 1635 | + # print code_data | |
| 1636 | + # print '' | |
| 1637 | + log.debug('extracted file {0}'.format(filename)) | |
| 1638 | + else: | |
| 1639 | + log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname)) | |
| 1640 | + except (UnexpectedDataError, SubstreamOpenError): | |
| 1641 | + raise | |
| 1642 | + except Exception as exc: | |
| 1643 | + log.info('Error parsing module {0} of {1} in _extract_vba:' | |
| 1644 | + .format(projectmodule_index, projectmodules_count), | |
| 1645 | + exc_info=True) | |
| 1646 | + if not relaxed: | |
| 1647 | + raise | |
| 1648 | + _ = unused # make pylint happy: now variable "unused" is being used ;-) | |
| 1649 | + return | |
| 1650 | + | |
| 1651 | + | |
| 1652 | +def vba_collapse_long_lines(vba_code): | |
| 1653 | + """ | |
| 1654 | + Parse a VBA module code to detect continuation line characters (underscore) and | |
| 1655 | + collapse split lines. Continuation line characters are replaced by spaces. | |
| 1656 | + | |
| 1657 | + :param vba_code: str, VBA module code | |
| 1658 | + :return: str, VBA module code with long lines collapsed | |
| 1659 | + """ | |
| 1660 | + # TODO: use a regex instead, to allow whitespaces after the underscore? | |
| 1661 | + vba_code = vba_code.replace(' _\r\n', ' ') | |
| 1662 | + vba_code = vba_code.replace(' _\r', ' ') | |
| 1663 | + vba_code = vba_code.replace(' _\n', ' ') | |
| 1664 | + return vba_code | |
| 1665 | + | |
| 1666 | + | |
| 1667 | +def filter_vba(vba_code): | |
| 1668 | + """ | |
| 1669 | + Filter VBA source code to remove the first lines starting with "Attribute VB_", | |
| 1670 | + which are automatically added by MS Office and not displayed in the VBA Editor. | |
| 1671 | + This should only be used when displaying source code for human analysis. | |
| 1672 | + | |
| 1673 | + Note: lines are not filtered if they contain a colon, because it could be | |
| 1674 | + used to hide malicious instructions. | |
| 1675 | + | |
| 1676 | + :param vba_code: str, VBA source code | |
| 1677 | + :return: str, filtered VBA source code | |
| 1678 | + """ | |
| 1679 | + vba_lines = vba_code.splitlines() | |
| 1680 | + start = 0 | |
| 1681 | + for line in vba_lines: | |
| 1682 | + if line.startswith("Attribute VB_") and not ':' in line: | |
| 1683 | + start += 1 | |
| 1684 | + else: | |
| 1685 | + break | |
| 1686 | + #TODO: also remove empty lines? | |
| 1687 | + vba = '\n'.join(vba_lines[start:]) | |
| 1688 | + return vba | |
| 1689 | + | |
| 1690 | + | |
| 1691 | +def detect_autoexec(vba_code, obfuscation=None): | |
| 1692 | + """ | |
| 1693 | + Detect if the VBA code contains keywords corresponding to macros running | |
| 1694 | + automatically when triggered by specific actions (e.g. when a document is | |
| 1695 | + opened or closed). | |
| 1696 | + | |
| 1697 | + :param vba_code: str, VBA source code | |
| 1698 | + :param obfuscation: None or str, name of obfuscation to be added to description | |
| 1699 | + :return: list of str tuples (keyword, description) | |
| 1700 | + """ | |
| 1701 | + #TODO: merge code with detect_suspicious | |
| 1702 | + # case-insensitive search | |
| 1703 | + #vba_code = vba_code.lower() | |
| 1704 | + results = [] | |
| 1705 | + obf_text = '' | |
| 1706 | + if obfuscation: | |
| 1707 | + obf_text = ' (obfuscation: %s)' % obfuscation | |
| 1708 | + for description, keywords in AUTOEXEC_KEYWORDS.items(): | |
| 1709 | + for keyword in keywords: | |
| 1710 | + #TODO: if keyword is already a compiled regex, use it as-is | |
| 1711 | + # search using regex to detect word boundaries: | |
| 1712 | + if re.search(r'(?i)\b' + keyword + r'\b', vba_code): | |
| 1713 | + #if keyword.lower() in vba_code: | |
| 1714 | + results.append((keyword, description + obf_text)) | |
| 1715 | + return results | |
| 1716 | + | |
| 1717 | + | |
| 1718 | +def detect_suspicious(vba_code, obfuscation=None): | |
| 1719 | + """ | |
| 1720 | + Detect if the VBA code contains suspicious keywords corresponding to | |
| 1721 | + potential malware behaviour. | |
| 1722 | + | |
| 1723 | + :param vba_code: str, VBA source code | |
| 1724 | + :param obfuscation: None or str, name of obfuscation to be added to description | |
| 1725 | + :return: list of str tuples (keyword, description) | |
| 1726 | + """ | |
| 1727 | + # case-insensitive search | |
| 1728 | + #vba_code = vba_code.lower() | |
| 1729 | + results = [] | |
| 1730 | + obf_text = '' | |
| 1731 | + if obfuscation: | |
| 1732 | + obf_text = ' (obfuscation: %s)' % obfuscation | |
| 1733 | + for description, keywords in SUSPICIOUS_KEYWORDS.items(): | |
| 1734 | + for keyword in keywords: | |
| 1735 | + # search using regex to detect word boundaries: | |
| 1736 | + if re.search(r'(?i)\b' + keyword + r'\b', vba_code): | |
| 1737 | + #if keyword.lower() in vba_code: | |
| 1738 | + results.append((keyword, description + obf_text)) | |
| 1739 | + return results | |
| 1740 | + | |
| 1741 | + | |
| 1742 | +def detect_patterns(vba_code, obfuscation=None): | |
| 1743 | + """ | |
| 1744 | + Detect if the VBA code contains specific patterns such as IP addresses, | |
| 1745 | + URLs, e-mail addresses, executable file names, etc. | |
| 1746 | + | |
| 1747 | + :param vba_code: str, VBA source code | |
| 1748 | + :return: list of str tuples (pattern type, value) | |
| 1749 | + """ | |
| 1750 | + results = [] | |
| 1751 | + found = set() | |
| 1752 | + obf_text = '' | |
| 1753 | + if obfuscation: | |
| 1754 | + obf_text = ' (obfuscation: %s)' % obfuscation | |
| 1755 | + for pattern_type, pattern_re in RE_PATTERNS: | |
| 1756 | + for match in pattern_re.finditer(vba_code): | |
| 1757 | + value = match.group() | |
| 1758 | + if value not in found: | |
| 1759 | + results.append((pattern_type + obf_text, value)) | |
| 1760 | + found.add(value) | |
| 1761 | + return results | |
| 1762 | + | |
| 1763 | + | |
| 1764 | +def detect_hex_strings(vba_code): | |
| 1765 | + """ | |
| 1766 | + Detect if the VBA code contains strings encoded in hexadecimal. | |
| 1767 | + | |
| 1768 | + :param vba_code: str, VBA source code | |
| 1769 | + :return: list of str tuples (encoded string, decoded string) | |
| 1770 | + """ | |
| 1771 | + results = [] | |
| 1772 | + found = set() | |
| 1773 | + for match in re_hex_string.finditer(vba_code): | |
| 1774 | + value = match.group() | |
| 1775 | + if value not in found: | |
| 1776 | + decoded = binascii.unhexlify(value) | |
| 1777 | + results.append((value, decoded)) | |
| 1778 | + found.add(value) | |
| 1779 | + return results | |
| 1780 | + | |
| 1781 | + | |
| 1782 | +def detect_base64_strings(vba_code): | |
| 1783 | + """ | |
| 1784 | + Detect if the VBA code contains strings encoded in base64. | |
| 1785 | + | |
| 1786 | + :param vba_code: str, VBA source code | |
| 1787 | + :return: list of str tuples (encoded string, decoded string) | |
| 1788 | + """ | |
| 1789 | + #TODO: avoid matching simple hex strings as base64? | |
| 1790 | + results = [] | |
| 1791 | + found = set() | |
| 1792 | + for match in re_base64_string.finditer(vba_code): | |
| 1793 | + # extract the base64 string without quotes: | |
| 1794 | + value = match.group().strip('"') | |
| 1795 | + # check it is not just a hex string: | |
| 1796 | + if not re_nothex_check.search(value): | |
| 1797 | + continue | |
| 1798 | + # only keep new values and not in the whitelist: | |
| 1799 | + if value not in found and value.lower() not in BASE64_WHITELIST: | |
| 1800 | + try: | |
| 1801 | + decoded = base64.b64decode(value) | |
| 1802 | + results.append((value, decoded)) | |
| 1803 | + found.add(value) | |
| 1804 | + except (TypeError, ValueError) as exc: | |
| 1805 | + log.debug('Failed to base64-decode (%s)' % exc) | |
| 1806 | + # if an exception occurs, it is likely not a base64-encoded string | |
| 1807 | + return results | |
| 1808 | + | |
| 1809 | + | |
| 1810 | +def detect_dridex_strings(vba_code): | |
| 1811 | + """ | |
| 1812 | + Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples. | |
| 1813 | + | |
| 1814 | + :param vba_code: str, VBA source code | |
| 1815 | + :return: list of str tuples (encoded string, decoded string) | |
| 1816 | + """ | |
| 1817 | + from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode | |
| 1818 | + | |
| 1819 | + results = [] | |
| 1820 | + found = set() | |
| 1821 | + for match in re_dridex_string.finditer(vba_code): | |
| 1822 | + value = match.group()[1:-1] | |
| 1823 | + # check it is not just a hex string: | |
| 1824 | + if not re_nothex_check.search(value): | |
| 1825 | + continue | |
| 1826 | + if value not in found: | |
| 1827 | + try: | |
| 1828 | + decoded = DridexUrlDecode(value) | |
| 1829 | + results.append((value, decoded)) | |
| 1830 | + found.add(value) | |
| 1831 | + except Exception as exc: | |
| 1832 | + log.debug('Failed to Dridex-decode (%s)' % exc) | |
| 1833 | + # if an exception occurs, it is likely not a dridex-encoded string | |
| 1834 | + return results | |
| 1835 | + | |
| 1836 | + | |
| 1837 | +def detect_vba_strings(vba_code): | |
| 1838 | + """ | |
| 1839 | + Detect if the VBA code contains strings obfuscated with VBA expressions | |
| 1840 | + using keywords such as Chr, Asc, Val, StrReverse, etc. | |
| 1841 | + | |
| 1842 | + :param vba_code: str, VBA source code | |
| 1843 | + :return: list of str tuples (encoded string, decoded string) | |
| 1844 | + """ | |
| 1845 | + # TODO: handle exceptions | |
| 1846 | + results = [] | |
| 1847 | + found = set() | |
| 1848 | + # IMPORTANT: to extract the actual VBA expressions found in the code, | |
| 1849 | + # we must expand tabs to have the same string as pyparsing. | |
| 1850 | + # Otherwise, start and end offsets are incorrect. | |
| 1851 | + vba_code = vba_code.expandtabs() | |
| 1852 | + for tokens, start, end in vba_expr_str.scanString(vba_code): | |
| 1853 | + encoded = vba_code[start:end] | |
| 1854 | + decoded = tokens[0] | |
| 1855 | + if isinstance(decoded, VbaExpressionString): | |
| 1856 | + # This is a VBA expression, not a simple string | |
| 1857 | + # print 'VBA EXPRESSION: encoded=%r => decoded=%r' % (encoded, decoded) | |
| 1858 | + # remove parentheses and quotes from original string: | |
| 1859 | + # if encoded.startswith('(') and encoded.endswith(')'): | |
| 1860 | + # encoded = encoded[1:-1] | |
| 1861 | + # if encoded.startswith('"') and encoded.endswith('"'): | |
| 1862 | + # encoded = encoded[1:-1] | |
| 1863 | + # avoid duplicates and simple strings: | |
| 1864 | + if encoded not in found and decoded != encoded: | |
| 1865 | + results.append((encoded, decoded)) | |
| 1866 | + found.add(encoded) | |
| 1867 | + # else: | |
| 1868 | + # print 'VBA STRING: encoded=%r => decoded=%r' % (encoded, decoded) | |
| 1869 | + return results | |
| 1870 | + | |
| 1871 | + | |
| 1872 | +def json2ascii(json_obj, encoding='utf8', errors='replace'): | |
| 1873 | + """ ensure there is no unicode in json and all strings are safe to decode | |
| 1874 | + | |
| 1875 | + works recursively, decodes and re-encodes every string to/from unicode | |
| 1876 | + to ensure there will be no trouble in loading the dumped json output | |
| 1877 | + """ | |
| 1878 | + if json_obj is None: | |
| 1879 | + pass | |
| 1880 | + elif isinstance(json_obj, (bool, int, float)): | |
| 1881 | + pass | |
| 1882 | + elif isinstance(json_obj, str): | |
| 1883 | + # de-code and re-encode | |
| 1884 | + dencoded = json_obj | |
| 1885 | + if dencoded != json_obj: | |
| 1886 | + log.debug('json2ascii: replaced: {0} (len {1})' | |
| 1887 | + .format(json_obj, len(json_obj))) | |
| 1888 | + log.debug('json2ascii: with: {0} (len {1})' | |
| 1889 | + .format(dencoded, len(dencoded))) | |
| 1890 | + return dencoded | |
| 1891 | + elif isinstance(json_obj, bytes): | |
| 1892 | + log.debug('json2ascii: encode unicode: {0}' | |
| 1893 | + .format(json_obj.decode(encoding, errors))) | |
| 1894 | + # cannot put original into logger | |
| 1895 | + # print 'original: ' json_obj | |
| 1896 | + return json_obj.decode(encoding, errors) | |
| 1897 | + elif isinstance(json_obj, dict): | |
| 1898 | + for key in json_obj: | |
| 1899 | + json_obj[key] = json2ascii(json_obj[key]) | |
| 1900 | + elif isinstance(json_obj, (list,tuple)): | |
| 1901 | + for item in json_obj: | |
| 1902 | + item = json2ascii(item) | |
| 1903 | + else: | |
| 1904 | + log.debug('unexpected type in json2ascii: {0} -- leave as is' | |
| 1905 | + .format(type(json_obj))) | |
| 1906 | + return json_obj | |
| 1907 | + | |
| 1908 | + | |
| 1909 | +_have_printed_json_start = False | |
| 1910 | + | |
| 1911 | +def print_json(json_dict=None, _json_is_last=False, **json_parts): | |
| 1912 | + """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1 | |
| 1913 | + | |
| 1914 | + can use in two ways: | |
| 1915 | + (1) print_json(some_dict) | |
| 1916 | + (2) print_json(key1=value1, key2=value2, ...) | |
| 1917 | + | |
| 1918 | + :param bool _json_is_last: set to True only for very last entry to complete | |
| 1919 | + the top-level json-list | |
| 1920 | + """ | |
| 1921 | + global _have_printed_json_start | |
| 1922 | + | |
| 1923 | + if json_dict and json_parts: | |
| 1924 | + raise ValueError('Invalid json argument: want either single dict or ' | |
| 1925 | + 'key=value parts but got both)') | |
| 1926 | + elif (json_dict is not None) and (not isinstance(json_dict, dict)): | |
| 1927 | + raise ValueError('Invalid json argument: want either single dict or ' | |
| 1928 | + 'key=value parts but got {0} instead of dict)' | |
| 1929 | + .format(type(json_dict))) | |
| 1930 | + if json_parts: | |
| 1931 | + json_dict = json_parts | |
| 1932 | + | |
| 1933 | + if not _have_printed_json_start: | |
| 1934 | + print('[') | |
| 1935 | + _have_printed_json_start = True | |
| 1936 | + | |
| 1937 | + lines = json.dumps(json2ascii(json_dict), check_circular=False, | |
| 1938 | + indent=4, ensure_ascii=False).splitlines() | |
| 1939 | + for line in lines[:-1]: | |
| 1940 | + print(' {0}'.format(line)) | |
| 1941 | + if _json_is_last: | |
| 1942 | + print(' {0}'.format(lines[-1])) # print last line without comma | |
| 1943 | + print(']') | |
| 1944 | + else: | |
| 1945 | + print(' {0},'.format(lines[-1])) # print last line with comma | |
| 1946 | + | |
| 1947 | + | |
| 1948 | +class VBA_Scanner(object): | |
| 1949 | + """ | |
| 1950 | + Class to scan the source code of a VBA module to find obfuscated strings, | |
| 1951 | + suspicious keywords, IOCs, auto-executable macros, etc. | |
| 1952 | + """ | |
| 1953 | + | |
| 1954 | + def __init__(self, vba_code): | |
| 1955 | + """ | |
| 1956 | + VBA_Scanner constructor | |
| 1957 | + | |
| 1958 | + :param vba_code: str, VBA source code to be analyzed | |
| 1959 | + """ | |
| 1960 | + # join long lines ending with " _": | |
| 1961 | + self.code = vba_collapse_long_lines(vba_code) | |
| 1962 | + self.code_hex = b'' | |
| 1963 | + self.code_hex_rev = b'' | |
| 1964 | + self.code_rev_hex = b'' | |
| 1965 | + self.code_base64 = b'' | |
| 1966 | + self.code_dridex = '' | |
| 1967 | + self.code_vba = '' | |
| 1968 | + self.strReverse = None | |
| 1969 | + # results = None before scanning, then a list of tuples after scanning | |
| 1970 | + self.results = None | |
| 1971 | + self.autoexec_keywords = None | |
| 1972 | + self.suspicious_keywords = None | |
| 1973 | + self.iocs = None | |
| 1974 | + self.hex_strings = None | |
| 1975 | + self.base64_strings = None | |
| 1976 | + self.dridex_strings = None | |
| 1977 | + self.vba_strings = None | |
| 1978 | + | |
| 1979 | + | |
| 1980 | + def scan(self, include_decoded_strings=False, deobfuscate=False): | |
| 1981 | + """ | |
| 1982 | + Analyze the provided VBA code to detect suspicious keywords, | |
| 1983 | + auto-executable macros, IOC patterns, obfuscation patterns | |
| 1984 | + such as hex-encoded strings. | |
| 1985 | + | |
| 1986 | + :param include_decoded_strings: bool, if True, all encoded strings will be included with their decoded content. | |
| 1987 | + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) | |
| 1988 | + :return: list of tuples (type, keyword, description) | |
| 1989 | + (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String') | |
| 1990 | + """ | |
| 1991 | + # First, detect and extract hex-encoded strings: | |
| 1992 | + self.hex_strings = detect_hex_strings(self.code) | |
| 1993 | + # detect if the code contains StrReverse: | |
| 1994 | + self.strReverse = False | |
| 1995 | + if 'strreverse' in self.code.lower(): self.strReverse = True | |
| 1996 | + # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: | |
| 1997 | + for encoded, decoded in self.hex_strings: | |
| 1998 | + self.code_hex += b'\n' + decoded | |
| 1999 | + # if the code contains "StrReverse", also append the hex strings in reverse order: | |
| 2000 | + if self.strReverse: | |
| 2001 | + # StrReverse after hex decoding: | |
| 2002 | + self.code_hex_rev += b'\n' + decoded[::-1] | |
| 2003 | + # StrReverse before hex decoding: | |
| 2004 | + self.code_rev_hex += b'\n' + binascii.unhexlify(encoded[::-1]) | |
| 2005 | + #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ | |
| 2006 | + #TODO: also append the full code reversed if StrReverse? (risk of false positives?) | |
| 2007 | + # Detect Base64-encoded strings | |
| 2008 | + self.base64_strings = detect_base64_strings(self.code) | |
| 2009 | + for encoded, decoded in self.base64_strings: | |
| 2010 | + self.code_base64 += b'\n' + decoded | |
| 2011 | + # Detect Dridex-encoded strings | |
| 2012 | + self.dridex_strings = detect_dridex_strings(self.code) | |
| 2013 | + for encoded, decoded in self.dridex_strings: | |
| 2014 | + self.code_dridex += '\n' + decoded | |
| 2015 | + # Detect obfuscated strings in VBA expressions | |
| 2016 | + if deobfuscate: | |
| 2017 | + self.vba_strings = detect_vba_strings(self.code) | |
| 2018 | + else: | |
| 2019 | + self.vba_strings = [] | |
| 2020 | + for encoded, decoded in self.vba_strings: | |
| 2021 | + self.code_vba += '\n' + decoded | |
| 2022 | + results = [] | |
| 2023 | + self.autoexec_keywords = [] | |
| 2024 | + self.suspicious_keywords = [] | |
| 2025 | + self.iocs = [] | |
| 2026 | + | |
| 2027 | + for code, obfuscation in ( | |
| 2028 | + (self.code, None), | |
| 2029 | + (self.code_hex.decode('utf-8','replace'), 'Hex'), | |
| 2030 | + (self.code_hex_rev, 'Hex+StrReverse'), | |
| 2031 | + (self.code_rev_hex, 'StrReverse+Hex'), | |
| 2032 | + (self.code_base64.decode('utf-8', 'replace'), 'Base64'), | |
| 2033 | + (self.code_dridex, 'Dridex'), | |
| 2034 | + (self.code_vba, 'VBA expression'), | |
| 2035 | + ): | |
| 2036 | + if isinstance(code,bytes): | |
| 2037 | + code=code.decode('utf-8','replace') | |
| 2038 | + self.autoexec_keywords += detect_autoexec(code, obfuscation) | |
| 2039 | + self.suspicious_keywords += detect_suspicious(code, obfuscation) | |
| 2040 | + self.iocs += detect_patterns(code, obfuscation) | |
| 2041 | + | |
| 2042 | + # If hex-encoded strings were discovered, add an item to suspicious keywords: | |
| 2043 | + if self.hex_strings: | |
| 2044 | + self.suspicious_keywords.append(('Hex Strings', | |
| 2045 | + 'Hex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 2046 | + if self.base64_strings: | |
| 2047 | + self.suspicious_keywords.append(('Base64 Strings', | |
| 2048 | + 'Base64-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 2049 | + if self.dridex_strings: | |
| 2050 | + self.suspicious_keywords.append(('Dridex Strings', | |
| 2051 | + 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 2052 | + if self.vba_strings: | |
| 2053 | + self.suspicious_keywords.append(('VBA obfuscated Strings', | |
| 2054 | + 'VBA string expressions were detected, may be used to obfuscate strings (option --decode to see all)')) | |
| 2055 | + # use a set to avoid duplicate keywords | |
| 2056 | + keyword_set = set() | |
| 2057 | + for keyword, description in self.autoexec_keywords: | |
| 2058 | + if keyword not in keyword_set: | |
| 2059 | + results.append(('AutoExec', keyword, description)) | |
| 2060 | + keyword_set.add(keyword) | |
| 2061 | + keyword_set = set() | |
| 2062 | + for keyword, description in self.suspicious_keywords: | |
| 2063 | + if keyword not in keyword_set: | |
| 2064 | + results.append(('Suspicious', keyword, description)) | |
| 2065 | + keyword_set.add(keyword) | |
| 2066 | + keyword_set = set() | |
| 2067 | + for pattern_type, value in self.iocs: | |
| 2068 | + if value not in keyword_set: | |
| 2069 | + results.append(('IOC', value, pattern_type)) | |
| 2070 | + keyword_set.add(value) | |
| 2071 | + | |
| 2072 | + # include decoded strings only if they are printable or if --decode option: | |
| 2073 | + for encoded, decoded in self.hex_strings: | |
| 2074 | + if include_decoded_strings or is_printable(decoded): | |
| 2075 | + results.append(('Hex String', decoded, encoded)) | |
| 2076 | + for encoded, decoded in self.base64_strings: | |
| 2077 | + if include_decoded_strings or is_printable(decoded): | |
| 2078 | + results.append(('Base64 String', decoded, encoded)) | |
| 2079 | + for encoded, decoded in self.dridex_strings: | |
| 2080 | + if include_decoded_strings or is_printable(decoded): | |
| 2081 | + results.append(('Dridex string', decoded, encoded)) | |
| 2082 | + for encoded, decoded in self.vba_strings: | |
| 2083 | + if include_decoded_strings or is_printable(decoded): | |
| 2084 | + results.append(('VBA string', decoded, encoded)) | |
| 2085 | + self.results = results | |
| 2086 | + return results | |
| 2087 | + | |
| 2088 | + def scan_summary(self): | |
| 2089 | + """ | |
| 2090 | + Analyze the provided VBA code to detect suspicious keywords, | |
| 2091 | + auto-executable macros, IOC patterns, obfuscation patterns | |
| 2092 | + such as hex-encoded strings. | |
| 2093 | + | |
| 2094 | + :return: tuple with the number of items found for each category: | |
| 2095 | + (autoexec, suspicious, IOCs, hex, base64, dridex, vba) | |
| 2096 | + """ | |
| 2097 | + # avoid scanning the same code twice: | |
| 2098 | + if self.results is None: | |
| 2099 | + self.scan() | |
| 2100 | + return (len(self.autoexec_keywords), len(self.suspicious_keywords), | |
| 2101 | + len(self.iocs), len(self.hex_strings), len(self.base64_strings), | |
| 2102 | + len(self.dridex_strings), len(self.vba_strings)) | |
| 2103 | + | |
| 2104 | + | |
| 2105 | +def scan_vba(vba_code, include_decoded_strings, deobfuscate=False): | |
| 2106 | + """ | |
| 2107 | + Analyze the provided VBA code to detect suspicious keywords, | |
| 2108 | + auto-executable macros, IOC patterns, obfuscation patterns | |
| 2109 | + such as hex-encoded strings. | |
| 2110 | + (shortcut for VBA_Scanner(vba_code).scan()) | |
| 2111 | + | |
| 2112 | + :param vba_code: str, VBA source code to be analyzed | |
| 2113 | + :param include_decoded_strings: bool, if True all encoded strings will be included with their decoded content. | |
| 2114 | + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) | |
| 2115 | + :return: list of tuples (type, keyword, description) | |
| 2116 | + (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String') | |
| 2117 | + """ | |
| 2118 | + return VBA_Scanner(vba_code).scan(include_decoded_strings, deobfuscate) | |
| 2119 | + | |
| 2120 | + | |
| 2121 | +#=== CLASSES ================================================================= | |
| 2122 | + | |
| 2123 | +class VBA_Parser(object): | |
| 2124 | + """ | |
| 2125 | + Class to parse MS Office files, to detect VBA macros and extract VBA source code | |
| 2126 | + Supported file formats: | |
| 2127 | + - Word 97-2003 (.doc, .dot) | |
| 2128 | + - Word 2007+ (.docm, .dotm) | |
| 2129 | + - Word 2003 XML (.xml) | |
| 2130 | + - Word MHT - Single File Web Page / MHTML (.mht) | |
| 2131 | + - Excel 97-2003 (.xls) | |
| 2132 | + - Excel 2007+ (.xlsm, .xlsb) | |
| 2133 | + - PowerPoint 97-2003 (.ppt) | |
| 2134 | + - PowerPoint 2007+ (.pptm, .ppsm) | |
| 2135 | + """ | |
| 2136 | + | |
| 2137 | + def __init__(self, filename, data=None, container=None, relaxed=False): | |
| 2138 | + """ | |
| 2139 | + Constructor for VBA_Parser | |
| 2140 | + | |
| 2141 | + :param filename: filename or path of file to parse, or file-like object | |
| 2142 | + | |
| 2143 | + :param data: None or bytes str, if None the file will be read from disk (or from the file-like object). | |
| 2144 | + If data is provided as a bytes string, it will be parsed as the content of the file in memory, | |
| 2145 | + and not read from disk. Note: files must be read in binary mode, i.e. open(f, 'rb'). | |
| 2146 | + | |
| 2147 | + :param container: str, path and filename of container if the file is within | |
| 2148 | + a zip archive, None otherwise. | |
| 2149 | + | |
| 2150 | + :param relaxed: if True, treat mal-formed documents and missing streams more like MS office: | |
| 2151 | + do nothing; if False (default), raise errors in these cases | |
| 2152 | + | |
| 2153 | + raises a FileOpenError if all attemps to interpret the data header failed | |
| 2154 | + """ | |
| 2155 | + #TODO: filename should only be a string, data should be used for the file-like object | |
| 2156 | + #TODO: filename should be mandatory, optional data is a string or file-like object | |
| 2157 | + #TODO: also support olefile and zipfile as input | |
| 2158 | + if data is None: | |
| 2159 | + # open file from disk: | |
| 2160 | + _file = filename | |
| 2161 | + else: | |
| 2162 | + # file already read in memory, make it a file-like object for zipfile: | |
| 2163 | + _file = BytesIO(data) | |
| 2164 | + #self.file = _file | |
| 2165 | + self.ole_file = None | |
| 2166 | + self.ole_subfiles = [] | |
| 2167 | + self.filename = filename | |
| 2168 | + self.container = container | |
| 2169 | + self.relaxed = relaxed | |
| 2170 | + self.type = None | |
| 2171 | + self.vba_projects = None | |
| 2172 | + self.vba_forms = None | |
| 2173 | + self.contains_macros = None # will be set to True or False by detect_macros | |
| 2174 | + self.vba_code_all_modules = None # to store the source code of all modules | |
| 2175 | + # list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code) | |
| 2176 | + self.modules = None | |
| 2177 | + # Analysis results: list of tuples (type, keyword, description) - See VBA_Scanner | |
| 2178 | + self.analysis_results = None | |
| 2179 | + # statistics for the scan summary and flags | |
| 2180 | + self.nb_macros = 0 | |
| 2181 | + self.nb_autoexec = 0 | |
| 2182 | + self.nb_suspicious = 0 | |
| 2183 | + self.nb_iocs = 0 | |
| 2184 | + self.nb_hexstrings = 0 | |
| 2185 | + self.nb_base64strings = 0 | |
| 2186 | + self.nb_dridexstrings = 0 | |
| 2187 | + self.nb_vbastrings = 0 | |
| 2188 | + | |
| 2189 | + # if filename is None: | |
| 2190 | + # if isinstance(_file, basestring): | |
| 2191 | + # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: | |
| 2192 | + # self.filename = _file | |
| 2193 | + # else: | |
| 2194 | + # self.filename = '<file in bytes string>' | |
| 2195 | + # else: | |
| 2196 | + # self.filename = '<file-like object>' | |
| 2197 | + if olefile.isOleFile(_file): | |
| 2198 | + # This looks like an OLE file | |
| 2199 | + self.open_ole(_file) | |
| 2200 | + | |
| 2201 | + # if this worked, try whether it is a ppt file (special ole file) | |
| 2202 | + self.open_ppt() | |
| 2203 | + if self.type is None and zipfile.is_zipfile(_file): | |
| 2204 | + # Zip file, which may be an OpenXML document | |
| 2205 | + self.open_openxml(_file) | |
| 2206 | + if self.type is None: | |
| 2207 | + # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML, | |
| 2208 | + # or a plain text file containing VBA code | |
| 2209 | + if data is None: | |
| 2210 | + data = open(filename, 'rb').read() | |
| 2211 | + # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | |
| 2212 | + if b'http://schemas.microsoft.com/office/word/2003/wordml' in data: | |
| 2213 | + self.open_word2003xml(data) | |
| 2214 | + # store a lowercase version for the next tests: | |
| 2215 | + data_lowercase = data.lower() | |
| 2216 | + # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): | |
| 2217 | + # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line | |
| 2218 | + # BUT Word accepts a blank line or other MIME headers inserted before, | |
| 2219 | + # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. | |
| 2220 | + # And the line is case insensitive. | |
| 2221 | + # so we'll just check the presence of mime, version and multipart anywhere: | |
| 2222 | + if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \ | |
| 2223 | + and b'multipart' in data_lowercase: | |
| 2224 | + self.open_mht(data) | |
| 2225 | + #TODO: handle exceptions | |
| 2226 | + #TODO: Excel 2003 XML | |
| 2227 | + # Check if this is a plain text VBA or VBScript file: | |
| 2228 | + # To avoid scanning binary files, we simply check for some control chars: | |
| 2229 | + if self.type is None and b'\x00' not in data: | |
| 2230 | + self.open_text(data) | |
| 2231 | + if self.type is None: | |
| 2232 | + # At this stage, could not match a known format: | |
| 2233 | + msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename | |
| 2234 | + log.info(msg) | |
| 2235 | + raise FileOpenError(msg) | |
| 2236 | + | |
| 2237 | + def open_ole(self, _file): | |
| 2238 | + """ | |
| 2239 | + Open an OLE file | |
| 2240 | + :param _file: filename or file contents in a file object | |
| 2241 | + :return: nothing | |
| 2242 | + """ | |
| 2243 | + log.info('Opening OLE file %s' % self.filename) | |
| 2244 | + try: | |
| 2245 | + # Open and parse the OLE file, using unicode for path names: | |
| 2246 | + self.ole_file = olefile.OleFileIO(_file, path_encoding=None) | |
| 2247 | + # set type only if parsing succeeds | |
| 2248 | + self.type = TYPE_OLE | |
| 2249 | + except (IOError, TypeError, ValueError) as exc: | |
| 2250 | + # TODO: handle OLE parsing exceptions | |
| 2251 | + log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc)) | |
| 2252 | + log.debug('Trace:', exc_info=True) | |
| 2253 | + | |
| 2254 | + | |
| 2255 | + def open_openxml(self, _file): | |
| 2256 | + """ | |
| 2257 | + Open an OpenXML file | |
| 2258 | + :param _file: filename or file contents in a file object | |
| 2259 | + :return: nothing | |
| 2260 | + """ | |
| 2261 | + # This looks like a zip file, need to look for vbaProject.bin inside | |
| 2262 | + # It can be any OLE file inside the archive | |
| 2263 | + #...because vbaProject.bin can be renamed: | |
| 2264 | + # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18 | |
| 2265 | + log.info('Opening ZIP/OpenXML file %s' % self.filename) | |
| 2266 | + try: | |
| 2267 | + z = zipfile.ZipFile(_file) | |
| 2268 | + #TODO: check if this is actually an OpenXML file | |
| 2269 | + #TODO: if the zip file is encrypted, suggest to use the -z option, or try '-z infected' automatically | |
| 2270 | + # check each file within the zip if it is an OLE file, by reading its magic: | |
| 2271 | + for subfile in z.namelist(): | |
| 2272 | + magic = z.open(subfile).read(len(olefile.MAGIC)) | |
| 2273 | + if magic == olefile.MAGIC: | |
| 2274 | + log.debug('Opening OLE file %s within zip' % subfile) | |
| 2275 | + ole_data = z.open(subfile).read() | |
| 2276 | + try: | |
| 2277 | + self.ole_subfiles.append( | |
| 2278 | + VBA_Parser(filename=subfile, data=ole_data, | |
| 2279 | + relaxed=self.relaxed)) | |
| 2280 | + except OlevbaBaseException as exc: | |
| 2281 | + if self.relaxed: | |
| 2282 | + log.info('%s is not a valid OLE file (%s)' % (subfile, exc)) | |
| 2283 | + log.debug('Trace:', exc_info=True) | |
| 2284 | + continue | |
| 2285 | + else: | |
| 2286 | + raise SubstreamOpenError(self.filename, subfile, | |
| 2287 | + exc) | |
| 2288 | + z.close() | |
| 2289 | + # set type only if parsing succeeds | |
| 2290 | + self.type = TYPE_OpenXML | |
| 2291 | + except OlevbaBaseException as exc: | |
| 2292 | + if self.relaxed: | |
| 2293 | + log.info('Error {0} caught in Zip/OpenXML parsing for file {1}' | |
| 2294 | + .format(exc, self.filename)) | |
| 2295 | + log.debug('Trace:', exc_info=True) | |
| 2296 | + else: | |
| 2297 | + raise | |
| 2298 | + except (RuntimeError, zipfile.BadZipfile, zipfile.LargeZipFile, IOError) as exc: | |
| 2299 | + # TODO: handle parsing exceptions | |
| 2300 | + log.info('Failed Zip/OpenXML parsing for file %r (%s)' | |
| 2301 | + % (self.filename, exc)) | |
| 2302 | + log.debug('Trace:', exc_info=True) | |
| 2303 | + | |
| 2304 | + def open_word2003xml(self, data): | |
| 2305 | + """ | |
| 2306 | + Open a Word 2003 XML file | |
| 2307 | + :param data: file contents in a string or bytes | |
| 2308 | + :return: nothing | |
| 2309 | + """ | |
| 2310 | + log.info('Opening Word 2003 XML file %s' % self.filename) | |
| 2311 | + try: | |
| 2312 | + # parse the XML content | |
| 2313 | + # TODO: handle XML parsing exceptions | |
| 2314 | + et = ET.fromstring(data) | |
| 2315 | + # find all the binData elements: | |
| 2316 | + for bindata in et.getiterator(TAG_BINDATA): | |
| 2317 | + # the binData content is an OLE container for the VBA project, compressed | |
| 2318 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | |
| 2319 | + # get the filename: | |
| 2320 | + fname = bindata.get(ATTR_NAME, 'noname.mso') | |
| 2321 | + # decode the base64 activemime | |
| 2322 | + mso_data = binascii.a2b_base64(bindata.text) | |
| 2323 | + if is_mso_file(mso_data): | |
| 2324 | + # decompress the zlib data stored in the MSO file, which is the OLE container: | |
| 2325 | + # TODO: handle different offsets => separate function | |
| 2326 | + try: | |
| 2327 | + ole_data = mso_file_extract(mso_data) | |
| 2328 | + self.ole_subfiles.append( | |
| 2329 | + VBA_Parser(filename=fname, data=ole_data, | |
| 2330 | + relaxed=self.relaxed)) | |
| 2331 | + except OlevbaBaseException as exc: | |
| 2332 | + if self.relaxed: | |
| 2333 | + log.info('Error parsing subfile {0}: {1}' | |
| 2334 | + .format(fname, exc)) | |
| 2335 | + log.debug('Trace:', exc_info=True) | |
| 2336 | + else: | |
| 2337 | + raise SubstreamOpenError(self.filename, fname, exc) | |
| 2338 | + else: | |
| 2339 | + log.info('%s is not a valid MSO file' % fname) | |
| 2340 | + # set type only if parsing succeeds | |
| 2341 | + self.type = TYPE_Word2003_XML | |
| 2342 | + except OlevbaBaseException as exc: | |
| 2343 | + if self.relaxed: | |
| 2344 | + log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc)) | |
| 2345 | + log.debug('Trace:', exc_info=True) | |
| 2346 | + else: | |
| 2347 | + raise | |
| 2348 | + except Exception as exc: | |
| 2349 | + # TODO: differentiate exceptions for each parsing stage | |
| 2350 | + # (but ET is different libs, no good exception description in API) | |
| 2351 | + # found: XMLSyntaxError | |
| 2352 | + log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc)) | |
| 2353 | + log.debug('Trace:', exc_info=True) | |
| 2354 | + | |
| 2355 | + def open_mht(self, data): | |
| 2356 | + """ | |
| 2357 | + Open a MHTML file | |
| 2358 | + :param data: file contents in a string or bytes | |
| 2359 | + :return: nothing | |
| 2360 | + """ | |
| 2361 | + log.info('Opening MHTML file %s' % self.filename) | |
| 2362 | + try: | |
| 2363 | + if isinstance(data,bytes): | |
| 2364 | + data = data.decode('utf8', 'replace') | |
| 2365 | + # parse the MIME content | |
| 2366 | + # remove any leading whitespace or newline (workaround for issue in email package) | |
| 2367 | + stripped_data = data.lstrip('\r\n\t ') | |
| 2368 | + # strip any junk from the beginning of the file | |
| 2369 | + # (issue #31 fix by Greg C - gdigreg) | |
| 2370 | + # TODO: improve keywords to avoid false positives | |
| 2371 | + mime_offset = stripped_data.find('MIME') | |
| 2372 | + content_offset = stripped_data.find('Content') | |
| 2373 | + # if "MIME" is found, and located before "Content": | |
| 2374 | + if -1 < mime_offset <= content_offset: | |
| 2375 | + stripped_data = stripped_data[mime_offset:] | |
| 2376 | + # else if "Content" is found, and before "MIME" | |
| 2377 | + # TODO: can it work without "MIME" at all? | |
| 2378 | + elif content_offset > -1: | |
| 2379 | + stripped_data = stripped_data[content_offset:] | |
| 2380 | + # TODO: quick and dirty fix: insert a standard line with MIME-Version header? | |
| 2381 | + mhtml = email.message_from_string(stripped_data) | |
| 2382 | + # find all the attached files: | |
| 2383 | + for part in mhtml.walk(): | |
| 2384 | + content_type = part.get_content_type() # always returns a value | |
| 2385 | + fname = part.get_filename(None) # returns None if it fails | |
| 2386 | + # TODO: get content-location if no filename | |
| 2387 | + log.debug('MHTML part: filename=%r, content-type=%r' % (fname, content_type)) | |
| 2388 | + part_data = part.get_payload(decode=True) | |
| 2389 | + # VBA macros are stored in a binary file named "editdata.mso". | |
| 2390 | + # the data content is an OLE container for the VBA project, compressed | |
| 2391 | + # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | |
| 2392 | + # decompress the zlib data starting at offset 0x32, which is the OLE container: | |
| 2393 | + # check ActiveMime header: | |
| 2394 | + | |
| 2395 | + if (isinstance(part_data, str) or isinstance(part_data, bytes)) and is_mso_file(part_data): | |
| 2396 | + log.debug('Found ActiveMime header, decompressing MSO container') | |
| 2397 | + try: | |
| 2398 | + ole_data = mso_file_extract(part_data) | |
| 2399 | + | |
| 2400 | + # TODO: check if it is actually an OLE file | |
| 2401 | + # TODO: get the MSO filename from content_location? | |
| 2402 | + self.ole_subfiles.append( | |
| 2403 | + VBA_Parser(filename=fname, data=ole_data, | |
| 2404 | + relaxed=self.relaxed)) | |
| 2405 | + except OlevbaBaseException as exc: | |
| 2406 | + if self.relaxed: | |
| 2407 | + log.info('%s does not contain a valid OLE file (%s)' | |
| 2408 | + % (fname, exc)) | |
| 2409 | + log.debug('Trace:', exc_info=True) | |
| 2410 | + # TODO: bug here - need to split in smaller functions/classes? | |
| 2411 | + else: | |
| 2412 | + raise SubstreamOpenError(self.filename, fname, exc) | |
| 2413 | + else: | |
| 2414 | + log.debug('type(part_data) = %s' % type(part_data)) | |
| 2415 | + try: | |
| 2416 | + log.debug('part_data[0:20] = %r' % part_data[0:20]) | |
| 2417 | + except TypeError as err: | |
| 2418 | + log.debug('part_data has no __getitem__') | |
| 2419 | + # set type only if parsing succeeds | |
| 2420 | + self.type = TYPE_MHTML | |
| 2421 | + except OlevbaBaseException: | |
| 2422 | + raise | |
| 2423 | + except Exception: | |
| 2424 | + log.info('Failed MIME parsing for file %r - %s' | |
| 2425 | + % (self.filename, MSG_OLEVBA_ISSUES)) | |
| 2426 | + log.debug('Trace:', exc_info=True) | |
| 2427 | + | |
| 2428 | + def open_ppt(self): | |
| 2429 | + """ try to interpret self.ole_file as PowerPoint 97-2003 using PptParser | |
| 2430 | + | |
| 2431 | + Although self.ole_file is a valid olefile.OleFileIO, we set | |
| 2432 | + self.ole_file = None in here and instead set self.ole_subfiles to the | |
| 2433 | + VBA ole streams found within the main ole file. That makes most of the | |
| 2434 | + code below treat this like an OpenXML file and only look at the | |
| 2435 | + ole_subfiles (except find_vba_* which needs to explicitly check for | |
| 2436 | + self.type) | |
| 2437 | + """ | |
| 2438 | + | |
| 2439 | + log.info('Check whether OLE file is PPT') | |
| 2440 | + ppt_parser.enable_logging() | |
| 2441 | + try: | |
| 2442 | + ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True) | |
| 2443 | + for vba_data in ppt.iter_vba_data(): | |
| 2444 | + self.ole_subfiles.append(VBA_Parser(None, vba_data, | |
| 2445 | + container='PptParser')) | |
| 2446 | + log.info('File is PPT') | |
| 2447 | + self.ole_file.close() # just in case | |
| 2448 | + self.ole_file = None # required to make other methods look at ole_subfiles | |
| 2449 | + self.type = TYPE_PPT | |
| 2450 | + except Exception as exc: | |
| 2451 | + if self.container == 'PptParser': | |
| 2452 | + # this is a subfile of a ppt --> to be expected that is no ppt | |
| 2453 | + log.debug('PPT subfile is not a PPT file') | |
| 2454 | + else: | |
| 2455 | + log.debug("File appears not to be a ppt file (%s)" % exc) | |
| 2456 | + | |
| 2457 | + | |
| 2458 | + def open_text(self, data): | |
| 2459 | + """ | |
| 2460 | + Open a text file containing VBA or VBScript source code | |
| 2461 | + :param data: file contents in a string or bytes | |
| 2462 | + :return: nothing | |
| 2463 | + """ | |
| 2464 | + log.info('Opening text file %s' % self.filename) | |
| 2465 | + # directly store the source code: | |
| 2466 | + if isinstance(data,bytes): | |
| 2467 | + data=data.decode('utf8','replace') | |
| 2468 | + self.vba_code_all_modules = data | |
| 2469 | + self.contains_macros = True | |
| 2470 | + # set type only if parsing succeeds | |
| 2471 | + self.type = TYPE_TEXT | |
| 2472 | + | |
| 2473 | + | |
| 2474 | + def find_vba_projects(self): | |
| 2475 | + """ | |
| 2476 | + Finds all the VBA projects stored in an OLE file. | |
| 2477 | + | |
| 2478 | + Return None if the file is not OLE but OpenXML. | |
| 2479 | + Return a list of tuples (vba_root, project_path, dir_path) for each VBA project. | |
| 2480 | + vba_root is the path of the root OLE storage containing the VBA project, | |
| 2481 | + including a trailing slash unless it is the root of the OLE file. | |
| 2482 | + project_path is the path of the OLE stream named "PROJECT" within the VBA project. | |
| 2483 | + dir_path is the path of the OLE stream named "VBA/dir" within the VBA project. | |
| 2484 | + | |
| 2485 | + If this function returns an empty list for one of the supported formats | |
| 2486 | + (i.e. Word, Excel, Powerpoint), then the file does not contain VBA macros. | |
| 2487 | + | |
| 2488 | + :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path) | |
| 2489 | + for each VBA project found if OLE file | |
| 2490 | + """ | |
| 2491 | + log.debug('VBA_Parser.find_vba_projects') | |
| 2492 | + | |
| 2493 | + # if the file is not OLE but OpenXML, return None: | |
| 2494 | + if self.ole_file is None and self.type != TYPE_PPT: | |
| 2495 | + return None | |
| 2496 | + | |
| 2497 | + # if this method has already been called, return previous result: | |
| 2498 | + if self.vba_projects is not None: | |
| 2499 | + return self.vba_projects | |
| 2500 | + | |
| 2501 | + # if this is a ppt file (PowerPoint 97-2003): | |
| 2502 | + # self.ole_file is None but the ole_subfiles do contain vba_projects | |
| 2503 | + # (like for OpenXML files). | |
| 2504 | + if self.type == TYPE_PPT: | |
| 2505 | + # TODO: so far, this function is never called for PPT files, but | |
| 2506 | + # if that happens, the information is lost which ole file contains | |
| 2507 | + # which storage! | |
| 2508 | + log.warning('Returned info is not complete for PPT types!') | |
| 2509 | + self.vba_projects = [] | |
| 2510 | + for subfile in self.ole_subfiles: | |
| 2511 | + self.vba_projects.extend(subfile.find_vba_projects()) | |
| 2512 | + return self.vba_projects | |
| 2513 | + | |
| 2514 | + # Find the VBA project root (different in MS Word, Excel, etc): | |
| 2515 | + # - Word 97-2003: Macros | |
| 2516 | + # - Excel 97-2003: _VBA_PROJECT_CUR | |
| 2517 | + # - PowerPoint 97-2003: PptParser has identified ole_subfiles | |
| 2518 | + # - Word 2007+: word/vbaProject.bin in zip archive, then the VBA project is the root of vbaProject.bin. | |
| 2519 | + # - Excel 2007+: xl/vbaProject.bin in zip archive, then same as Word | |
| 2520 | + # - PowerPoint 2007+: ppt/vbaProject.bin in zip archive, then same as Word | |
| 2521 | + # - Visio 2007: not supported yet (different file structure) | |
| 2522 | + | |
| 2523 | + # According to MS-OVBA section 2.2.1: | |
| 2524 | + # - the VBA project root storage MUST contain a VBA storage and a PROJECT stream | |
| 2525 | + # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream | |
| 2526 | + # - all names are case-insensitive | |
| 2527 | + | |
| 2528 | + def check_vba_stream(ole, vba_root, stream_path): | |
| 2529 | + full_path = vba_root + stream_path | |
| 2530 | + if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM: | |
| 2531 | + log.debug('Found %s stream: %s' % (stream_path, full_path)) | |
| 2532 | + return full_path | |
| 2533 | + else: | |
| 2534 | + log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path) | |
| 2535 | + return False | |
| 2536 | + | |
| 2537 | + # start with an empty list: | |
| 2538 | + self.vba_projects = [] | |
| 2539 | + # Look for any storage containing those storage/streams: | |
| 2540 | + ole = self.ole_file | |
| 2541 | + for storage in ole.listdir(streams=False, storages=True): | |
| 2542 | + log.debug('Checking storage %r' % storage) | |
| 2543 | + # Look for a storage ending with "VBA": | |
| 2544 | + if storage[-1].upper() == 'VBA': | |
| 2545 | + log.debug('Found VBA storage: %s' % ('/'.join(storage))) | |
| 2546 | + vba_root = '/'.join(storage[:-1]) | |
| 2547 | + # Add a trailing slash to vba_root, unless it is the root of the OLE file: | |
| 2548 | + # (used later to append all the child streams/storages) | |
| 2549 | + if vba_root != '': | |
| 2550 | + vba_root += '/' | |
| 2551 | + log.debug('Checking vba_root="%s"' % vba_root) | |
| 2552 | + | |
| 2553 | + # Check if the VBA root storage also contains a PROJECT stream: | |
| 2554 | + project_path = check_vba_stream(ole, vba_root, 'PROJECT') | |
| 2555 | + if not project_path: continue | |
| 2556 | + # Check if the VBA root storage also contains a VBA/_VBA_PROJECT stream: | |
| 2557 | + vba_project_path = check_vba_stream(ole, vba_root, 'VBA/_VBA_PROJECT') | |
| 2558 | + if not vba_project_path: continue | |
| 2559 | + # Check if the VBA root storage also contains a VBA/dir stream: | |
| 2560 | + dir_path = check_vba_stream(ole, vba_root, 'VBA/dir') | |
| 2561 | + if not dir_path: continue | |
| 2562 | + # Now we are pretty sure it is a VBA project structure | |
| 2563 | + log.debug('VBA root storage: "%s"' % vba_root) | |
| 2564 | + # append the results to the list as a tuple for later use: | |
| 2565 | + self.vba_projects.append((vba_root, project_path, dir_path)) | |
| 2566 | + return self.vba_projects | |
| 2567 | + | |
| 2568 | + def detect_vba_macros(self): | |
| 2569 | + """ | |
| 2570 | + Detect the potential presence of VBA macros in the file, by checking | |
| 2571 | + if it contains VBA projects. Both OLE and OpenXML files are supported. | |
| 2572 | + | |
| 2573 | + Important: for now, results are accurate only for Word, Excel and PowerPoint | |
| 2574 | + | |
| 2575 | + Note: this method does NOT attempt to check the actual presence or validity | |
| 2576 | + of VBA macro source code, so there might be false positives. | |
| 2577 | + It may also detect VBA macros in files embedded within the main file, | |
| 2578 | + for example an Excel workbook with macros embedded into a Word | |
| 2579 | + document without macros may be detected, without distinction. | |
| 2580 | + | |
| 2581 | + :return: bool, True if at least one VBA project has been found, False otherwise | |
| 2582 | + """ | |
| 2583 | + #TODO: return None or raise exception if format not supported | |
| 2584 | + #TODO: return the number of VBA projects found instead of True/False? | |
| 2585 | + # if this method was already called, return the previous result: | |
| 2586 | + if self.contains_macros is not None: | |
| 2587 | + return self.contains_macros | |
| 2588 | + # if OpenXML/PPT, check all the OLE subfiles: | |
| 2589 | + if self.ole_file is None: | |
| 2590 | + for ole_subfile in self.ole_subfiles: | |
| 2591 | + if ole_subfile.detect_vba_macros(): | |
| 2592 | + self.contains_macros = True | |
| 2593 | + return True | |
| 2594 | + # otherwise, no macro found: | |
| 2595 | + self.contains_macros = False | |
| 2596 | + return False | |
| 2597 | + # otherwise it's an OLE file, find VBA projects: | |
| 2598 | + vba_projects = self.find_vba_projects() | |
| 2599 | + if len(vba_projects) == 0: | |
| 2600 | + self.contains_macros = False | |
| 2601 | + else: | |
| 2602 | + self.contains_macros = True | |
| 2603 | + # Also look for VBA code in any stream including orphans | |
| 2604 | + # (happens in some malformed files) | |
| 2605 | + ole = self.ole_file | |
| 2606 | + for sid in range(len(ole.direntries)): | |
| 2607 | + # check if id is already done above: | |
| 2608 | + log.debug('Checking DirEntry #%d' % sid) | |
| 2609 | + d = ole.direntries[sid] | |
| 2610 | + if d is None: | |
| 2611 | + # this direntry is not part of the tree: either unused or an orphan | |
| 2612 | + d = ole._load_direntry(sid) | |
| 2613 | + log.debug('This DirEntry is an orphan or unused') | |
| 2614 | + if d.entry_type == olefile.STGTY_STREAM: | |
| 2615 | + # read data | |
| 2616 | + log.debug('Reading data from stream %r - size: %d bytes' % (d.name, d.size)) | |
| 2617 | + try: | |
| 2618 | + data = ole._open(d.isectStart, d.size).read() | |
| 2619 | + log.debug('Read %d bytes' % len(data)) | |
| 2620 | + if len(data) > 200: | |
| 2621 | + log.debug('%r...[much more data]...%r' % (data[:100], data[-50:])) | |
| 2622 | + else: | |
| 2623 | + log.debug(repr(data)) | |
| 2624 | + if 'Attribut' in data.decode('utf-8','ignore'): | |
| 2625 | + log.debug('Found VBA compressed code') | |
| 2626 | + self.contains_macros = True | |
| 2627 | + except IOError as exc: | |
| 2628 | + if self.relaxed: | |
| 2629 | + log.info('Error when reading OLE Stream %r' % d.name) | |
| 2630 | + log.debug('Trace:', exc_trace=True) | |
| 2631 | + else: | |
| 2632 | + raise SubstreamOpenError(self.filename, d.name, exc) | |
| 2633 | + return self.contains_macros | |
| 2634 | + | |
| 2635 | + def extract_macros(self): | |
| 2636 | + """ | |
| 2637 | + Extract and decompress source code for each VBA macro found in the file | |
| 2638 | + | |
| 2639 | + Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found | |
| 2640 | + If the file is OLE, filename is the path of the file. | |
| 2641 | + If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros | |
| 2642 | + within the zip archive, e.g. word/vbaProject.bin. | |
| 2643 | + If the file is PPT, result is as for OpenXML but filename is useless | |
| 2644 | + """ | |
| 2645 | + log.debug('extract_macros:') | |
| 2646 | + if self.ole_file is None: | |
| 2647 | + # This may be either an OpenXML/PPT or a text file: | |
| 2648 | + if self.type == TYPE_TEXT: | |
| 2649 | + # This is a text file, yield the full code: | |
| 2650 | + yield (self.filename, '', self.filename, self.vba_code_all_modules) | |
| 2651 | + else: | |
| 2652 | + # OpenXML/PPT: recursively yield results from each OLE subfile: | |
| 2653 | + for ole_subfile in self.ole_subfiles: | |
| 2654 | + for results in ole_subfile.extract_macros(): | |
| 2655 | + yield results | |
| 2656 | + else: | |
| 2657 | + # This is an OLE file: | |
| 2658 | + self.find_vba_projects() | |
| 2659 | + # set of stream ids | |
| 2660 | + vba_stream_ids = set() | |
| 2661 | + for vba_root, project_path, dir_path in self.vba_projects: | |
| 2662 | + # extract all VBA macros from that VBA root storage: | |
| 2663 | + for stream_path, vba_filename, vba_code in \ | |
| 2664 | + _extract_vba(self.ole_file, vba_root, project_path, | |
| 2665 | + dir_path, self.relaxed): | |
| 2666 | + # store direntry ids in a set: | |
| 2667 | + vba_stream_ids.add(self.ole_file._find(stream_path)) | |
| 2668 | + yield (self.filename, stream_path, vba_filename, vba_code) | |
| 2669 | + # Also look for VBA code in any stream including orphans | |
| 2670 | + # (happens in some malformed files) | |
| 2671 | + ole = self.ole_file | |
| 2672 | + for sid in range(len(ole.direntries)): | |
| 2673 | + # check if id is already done above: | |
| 2674 | + log.debug('Checking DirEntry #%d' % sid) | |
| 2675 | + if sid in vba_stream_ids: | |
| 2676 | + log.debug('Already extracted') | |
| 2677 | + continue | |
| 2678 | + d = ole.direntries[sid] | |
| 2679 | + if d is None: | |
| 2680 | + # this direntry is not part of the tree: either unused or an orphan | |
| 2681 | + d = ole._load_direntry(sid) | |
| 2682 | + log.debug('This DirEntry is an orphan or unused') | |
| 2683 | + if d.entry_type == olefile.STGTY_STREAM: | |
| 2684 | + # read data | |
| 2685 | + log.debug('Reading data from stream %r' % d.name) | |
| 2686 | + data = ole._open(d.isectStart, d.size).read() | |
| 2687 | + for match in re.finditer(rb'\x00Attribut[^e]', data, flags=re.IGNORECASE): | |
| 2688 | + start = match.start() - 3 | |
| 2689 | + log.debug('Found VBA compressed code at index %X' % start) | |
| 2690 | + compressed_code = data[start:] | |
| 2691 | + try: | |
| 2692 | + vba_code = decompress_stream(compressed_code) | |
| 2693 | + yield (self.filename, d.name, d.name, vba_code) | |
| 2694 | + except Exception as exc: | |
| 2695 | + # display the exception with full stack trace for debugging | |
| 2696 | + log.debug('Error processing stream %r in file %r (%s)' % (d.name, self.filename, exc)) | |
| 2697 | + log.debug('Traceback:', exc_info=True) | |
| 2698 | + # do not raise the error, as it is unlikely to be a compressed macro stream | |
| 2699 | + | |
| 2700 | + def extract_all_macros(self): | |
| 2701 | + """ | |
| 2702 | + Extract and decompress source code for each VBA macro found in the file | |
| 2703 | + by calling extract_macros(), store the results as a list of tuples | |
| 2704 | + (filename, stream_path, vba_filename, vba_code) in self.modules. | |
| 2705 | + See extract_macros for details. | |
| 2706 | + """ | |
| 2707 | + if self.modules is None: | |
| 2708 | + self.modules = [] | |
| 2709 | + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_macros(): | |
| 2710 | + self.modules.append((subfilename, stream_path, vba_filename, vba_code)) | |
| 2711 | + self.nb_macros = len(self.modules) | |
| 2712 | + return self.modules | |
| 2713 | + | |
| 2714 | + | |
| 2715 | + | |
| 2716 | + def analyze_macros(self, show_decoded_strings=False, deobfuscate=False): | |
| 2717 | + """ | |
| 2718 | + runs extract_macros and analyze the source code of all VBA macros | |
| 2719 | + found in the file. | |
| 2720 | + """ | |
| 2721 | + if self.detect_vba_macros(): | |
| 2722 | + # if the analysis was already done, avoid doing it twice: | |
| 2723 | + if self.analysis_results is not None: | |
| 2724 | + return self.analysis_results | |
| 2725 | + # variable to merge source code from all modules: | |
| 2726 | + if self.vba_code_all_modules is None: | |
| 2727 | + self.vba_code_all_modules = '' | |
| 2728 | + for (_, _, _, vba_code) in self.extract_all_macros(): | |
| 2729 | + #TODO: filter code? (each module) | |
| 2730 | + self.vba_code_all_modules += vba_code.decode('utf-8', 'ignore') + '\n' | |
| 2731 | + for (_, _, form_string) in self.extract_form_strings(): | |
| 2732 | + self.vba_code_all_modules += form_string.decode('utf-8', 'ignore') + '\n' | |
| 2733 | + # Analyze the whole code at once: | |
| 2734 | + scanner = VBA_Scanner(self.vba_code_all_modules) | |
| 2735 | + self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate) | |
| 2736 | + autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary() | |
| 2737 | + self.nb_autoexec += autoexec | |
| 2738 | + self.nb_suspicious += suspicious | |
| 2739 | + self.nb_iocs += iocs | |
| 2740 | + self.nb_hexstrings += hexstrings | |
| 2741 | + self.nb_base64strings += base64strings | |
| 2742 | + self.nb_dridexstrings += dridex | |
| 2743 | + self.nb_vbastrings += vbastrings | |
| 2744 | + | |
| 2745 | + return self.analysis_results | |
| 2746 | + | |
| 2747 | + | |
| 2748 | + def reveal(self): | |
| 2749 | + # we only want printable strings: | |
| 2750 | + analysis = self.analyze_macros(show_decoded_strings=False) | |
| 2751 | + # to avoid replacing short strings contained into longer strings, we sort the analysis results | |
| 2752 | + # based on the length of the encoded string, in reverse order: | |
| 2753 | + analysis = sorted(analysis, key=lambda type_decoded_encoded: len(type_decoded_encoded[2]), reverse=True) | |
| 2754 | + # normally now self.vba_code_all_modules contains source code from all modules | |
| 2755 | + deobf_code = self.vba_code_all_modules | |
| 2756 | + for kw_type, decoded, encoded in analysis: | |
| 2757 | + if kw_type == 'VBA string': | |
| 2758 | + #print '%3d occurences: %r => %r' % (deobf_code.count(encoded), encoded, decoded) | |
| 2759 | + # need to add double quotes around the decoded strings | |
| 2760 | + # after escaping double-quotes as double-double-quotes for VBA: | |
| 2761 | + decoded = decoded.replace('"', '""') | |
| 2762 | + deobf_code = deobf_code.replace(encoded, '"%s"' % decoded) | |
| 2763 | + return deobf_code | |
| 2764 | + #TODO: repasser l'analyse plusieurs fois si des chaines hex ou base64 sont revelees | |
| 2765 | + | |
| 2766 | + | |
| 2767 | + def find_vba_forms(self): | |
| 2768 | + """ | |
| 2769 | + Finds all the VBA forms stored in an OLE file. | |
| 2770 | + | |
| 2771 | + Return None if the file is not OLE but OpenXML. | |
| 2772 | + Return a list of tuples (vba_root, project_path, dir_path) for each VBA project. | |
| 2773 | + vba_root is the path of the root OLE storage containing the VBA project, | |
| 2774 | + including a trailing slash unless it is the root of the OLE file. | |
| 2775 | + project_path is the path of the OLE stream named "PROJECT" within the VBA project. | |
| 2776 | + dir_path is the path of the OLE stream named "VBA/dir" within the VBA project. | |
| 2777 | + | |
| 2778 | + If this function returns an empty list for one of the supported formats | |
| 2779 | + (i.e. Word, Excel, Powerpoint), then the file does not contain VBA forms. | |
| 2780 | + | |
| 2781 | + :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path) | |
| 2782 | + for each VBA project found if OLE file | |
| 2783 | + """ | |
| 2784 | + log.debug('VBA_Parser.find_vba_forms') | |
| 2785 | + | |
| 2786 | + # if the file is not OLE but OpenXML, return None: | |
| 2787 | + if self.ole_file is None and self.type != TYPE_PPT: | |
| 2788 | + return None | |
| 2789 | + | |
| 2790 | + # if this method has already been called, return previous result: | |
| 2791 | + # if self.vba_projects is not None: | |
| 2792 | + # return self.vba_projects | |
| 2793 | + | |
| 2794 | + # According to MS-OFORMS section 2.1.2 Control Streams: | |
| 2795 | + # - A parent control, that is, a control that can contain embedded controls, | |
| 2796 | + # MUST be persisted as a storage that contains multiple streams. | |
| 2797 | + # - All parent controls MUST contain a FormControl. The FormControl | |
| 2798 | + # properties are persisted to a stream (1) as specified in section 2.1.1.2. | |
| 2799 | + # The name of this stream (1) MUST be "f". | |
| 2800 | + # - Embedded controls that cannot themselves contain other embedded | |
| 2801 | + # controls are persisted sequentially as FormEmbeddedActiveXControls | |
| 2802 | + # to a stream (1) contained in the same storage as the parent control. | |
| 2803 | + # The name of this stream (1) MUST be "o". | |
| 2804 | + # - all names are case-insensitive | |
| 2805 | + | |
| 2806 | + if self.type == TYPE_PPT: | |
| 2807 | + # TODO: so far, this function is never called for PPT files, but | |
| 2808 | + # if that happens, the information is lost which ole file contains | |
| 2809 | + # which storage! | |
| 2810 | + ole_files = self.ole_subfiles | |
| 2811 | + log.warning('Returned info is not complete for PPT types!') | |
| 2812 | + else: | |
| 2813 | + ole_files = [self.ole_file, ] | |
| 2814 | + | |
| 2815 | + # start with an empty list: | |
| 2816 | + self.vba_forms = [] | |
| 2817 | + | |
| 2818 | + # Loop over ole streams | |
| 2819 | + for ole in ole_files: | |
| 2820 | + # Look for any storage containing those storage/streams: | |
| 2821 | + for storage in ole.listdir(streams=False, storages=True): | |
| 2822 | + log.debug('Checking storage %r' % storage) | |
| 2823 | + # Look for two streams named 'o' and 'f': | |
| 2824 | + o_stream = storage + ['o'] | |
| 2825 | + f_stream = storage + ['f'] | |
| 2826 | + log.debug('Checking if streams %r and %r exist' % (f_stream, o_stream)) | |
| 2827 | + if ole.exists(o_stream) and ole.get_type(o_stream) == olefile.STGTY_STREAM \ | |
| 2828 | + and ole.exists(f_stream) and ole.get_type(f_stream) == olefile.STGTY_STREAM: | |
| 2829 | + form_path = '/'.join(storage) | |
| 2830 | + log.debug('Found VBA Form: %r' % form_path) | |
| 2831 | + self.vba_forms.append(storage) | |
| 2832 | + return self.vba_forms | |
| 2833 | + | |
| 2834 | + def extract_form_strings(self): | |
| 2835 | + """ | |
| 2836 | + Extract printable strings from each VBA Form found in the file | |
| 2837 | + | |
| 2838 | + Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found | |
| 2839 | + If the file is OLE, filename is the path of the file. | |
| 2840 | + If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros | |
| 2841 | + within the zip archive, e.g. word/vbaProject.bin. | |
| 2842 | + If the file is PPT, result is as for OpenXML but filename is useless | |
| 2843 | + """ | |
| 2844 | + if self.ole_file is None: | |
| 2845 | + # This may be either an OpenXML/PPT or a text file: | |
| 2846 | + if self.type == TYPE_TEXT: | |
| 2847 | + # This is a text file, return no results: | |
| 2848 | + return | |
| 2849 | + else: | |
| 2850 | + # OpenXML/PPT: recursively yield results from each OLE subfile: | |
| 2851 | + for ole_subfile in self.ole_subfiles: | |
| 2852 | + for results in ole_subfile.extract_form_strings(): | |
| 2853 | + yield results | |
| 2854 | + else: | |
| 2855 | + # This is an OLE file: | |
| 2856 | + self.find_vba_forms() | |
| 2857 | + ole = self.ole_file | |
| 2858 | + for form_storage in self.vba_forms: | |
| 2859 | + o_stream = form_storage + ['o'] | |
| 2860 | + log.debug('Opening form object stream %r' % '/'.join(o_stream)) | |
| 2861 | + form_data = ole.openstream(o_stream).read() | |
| 2862 | + # Extract printable strings from the form object stream "o": | |
| 2863 | + for m in re_printable_string.finditer(form_data): | |
| 2864 | + log.debug('Printable string found in form: %r' % m.group()) | |
| 2865 | + yield (self.filename, '/'.join(o_stream), m.group()) | |
| 2866 | + | |
| 2867 | + | |
| 2868 | + def close(self): | |
| 2869 | + """ | |
| 2870 | + Close all the open files. This method must be called after usage, if | |
| 2871 | + the application is opening many files. | |
| 2872 | + """ | |
| 2873 | + if self.ole_file is None: | |
| 2874 | + if self.ole_subfiles is not None: | |
| 2875 | + for ole_subfile in self.ole_subfiles: | |
| 2876 | + ole_subfile.close() | |
| 2877 | + else: | |
| 2878 | + self.ole_file.close() | |
| 2879 | + | |
| 2880 | + | |
| 2881 | + | |
| 2882 | +class VBA_Parser_CLI(VBA_Parser): | |
| 2883 | + """ | |
| 2884 | + VBA parser and analyzer, adding methods for the command line interface | |
| 2885 | + of olevba. (see VBA_Parser) | |
| 2886 | + """ | |
| 2887 | + | |
| 2888 | + def __init__(self, *args, **kwargs): | |
| 2889 | + """ | |
| 2890 | + Constructor for VBA_Parser_CLI. | |
| 2891 | + Calls __init__ from VBA_Parser with all arguments --> see doc there | |
| 2892 | + """ | |
| 2893 | + super(VBA_Parser_CLI, self).__init__(*args, **kwargs) | |
| 2894 | + | |
| 2895 | + | |
| 2896 | + def print_analysis(self, show_decoded_strings=False, deobfuscate=False): | |
| 2897 | + """ | |
| 2898 | + Analyze the provided VBA code, and print the results in a table | |
| 2899 | + | |
| 2900 | + :param vba_code: str, VBA source code to be analyzed | |
| 2901 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 2902 | + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) | |
| 2903 | + :return: None | |
| 2904 | + """ | |
| 2905 | + # print a waiting message only if the output is not redirected to a file: | |
| 2906 | + if sys.stdout.isatty(): | |
| 2907 | + print('Analysis...\r') | |
| 2908 | + sys.stdout.flush() | |
| 2909 | + results = self.analyze_macros(show_decoded_strings, deobfuscate) | |
| 2910 | + if results: | |
| 2911 | + t = prettytable.PrettyTable(('Type', 'Keyword', 'Description')) | |
| 2912 | + t.align = 'l' | |
| 2913 | + t.max_width['Type'] = 10 | |
| 2914 | + t.max_width['Keyword'] = 20 | |
| 2915 | + t.max_width['Description'] = 39 | |
| 2916 | + for kw_type, keyword, description in results: | |
| 2917 | + # handle non printable strings: | |
| 2918 | + if not is_printable(keyword): | |
| 2919 | + keyword = repr(keyword) | |
| 2920 | + if not is_printable(description): | |
| 2921 | + description = repr(description) | |
| 2922 | + t.add_row((kw_type, keyword, description)) | |
| 2923 | + print(t) | |
| 2924 | + else: | |
| 2925 | + print('No suspicious keyword or IOC found.') | |
| 2926 | + | |
| 2927 | + def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False): | |
| 2928 | + """ | |
| 2929 | + Analyze the provided VBA code, and return the results in json format | |
| 2930 | + | |
| 2931 | + :param vba_code: str, VBA source code to be analyzed | |
| 2932 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 2933 | + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) | |
| 2934 | + | |
| 2935 | + :return: dict | |
| 2936 | + """ | |
| 2937 | + # print a waiting message only if the output is not redirected to a file: | |
| 2938 | + if sys.stdout.isatty(): | |
| 2939 | + print('Analysis...\r') | |
| 2940 | + sys.stdout.flush() | |
| 2941 | + return [dict(type=kw_type, keyword=keyword, description=description) | |
| 2942 | + for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)] | |
| 2943 | + | |
| 2944 | + def process_file(self, show_decoded_strings=False, | |
| 2945 | + display_code=True, hide_attributes=True, | |
| 2946 | + vba_code_only=False, show_deobfuscated_code=False, | |
| 2947 | + deobfuscate=False): | |
| 2948 | + """ | |
| 2949 | + Process a single file | |
| 2950 | + | |
| 2951 | + :param filename: str, path and filename of file on disk, or within the container. | |
| 2952 | + :param data: bytes, content of the file if it is in a container, None if it is a file on disk. | |
| 2953 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 2954 | + :param display_code: bool, if False VBA source code is not displayed (default True) | |
| 2955 | + :param global_analysis: bool, if True all modules are merged for a single analysis (default), | |
| 2956 | + otherwise each module is analyzed separately (old behaviour) | |
| 2957 | + :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) | |
| 2958 | + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) | |
| 2959 | + """ | |
| 2960 | + #TODO: replace print by writing to a provided output file (sys.stdout by default) | |
| 2961 | + # fix conflicting parameters: | |
| 2962 | + if vba_code_only and not display_code: | |
| 2963 | + display_code = True | |
| 2964 | + if self.container: | |
| 2965 | + display_filename = '%s in %s' % (self.filename, self.container) | |
| 2966 | + else: | |
| 2967 | + display_filename = self.filename | |
| 2968 | + print('=' * 79) | |
| 2969 | + print('FILE:', display_filename) | |
| 2970 | + try: | |
| 2971 | + #TODO: handle olefile errors, when an OLE file is malformed | |
| 2972 | + print('Type: %s' % self.type) | |
| 2973 | + if self.detect_vba_macros(): | |
| 2974 | + #print 'Contains VBA Macros:' | |
| 2975 | + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): | |
| 2976 | + if hide_attributes: | |
| 2977 | + # hide attribute lines: | |
| 2978 | + if isinstance(vba_code,bytes): | |
| 2979 | + vba_code =vba_code.decode('utf-8','replace') | |
| 2980 | + vba_code_filtered = filter_vba(vba_code) | |
| 2981 | + else: | |
| 2982 | + vba_code_filtered = vba_code | |
| 2983 | + print('-' * 79) | |
| 2984 | + print('VBA MACRO %s ' % vba_filename) | |
| 2985 | + print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))) | |
| 2986 | + if display_code: | |
| 2987 | + print('- ' * 39) | |
| 2988 | + # detect empty macros: | |
| 2989 | + if vba_code_filtered.strip() == '': | |
| 2990 | + print('(empty macro)') | |
| 2991 | + else: | |
| 2992 | + print(vba_code_filtered) | |
| 2993 | + for (subfilename, stream_path, form_string) in self.extract_form_strings(): | |
| 2994 | + print('-' * 79) | |
| 2995 | + print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) | |
| 2996 | + print('- ' * 39) | |
| 2997 | + print(form_string.decode('utf-8', 'ignore')) | |
| 2998 | + if not vba_code_only: | |
| 2999 | + # analyse the code from all modules at once: | |
| 3000 | + self.print_analysis(show_decoded_strings, deobfuscate) | |
| 3001 | + if show_deobfuscated_code: | |
| 3002 | + print('MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n') | |
| 3003 | + print(self.reveal()) | |
| 3004 | + else: | |
| 3005 | + print('No VBA macros found.') | |
| 3006 | + except OlevbaBaseException: | |
| 3007 | + raise | |
| 3008 | + except Exception as exc: | |
| 3009 | + # display the exception with full stack trace for debugging | |
| 3010 | + log.info('Error processing file %s (%s)' % (self.filename, exc)) | |
| 3011 | + log.debug('Traceback:', exc_info=True) | |
| 3012 | + raise ProcessingError(self.filename, exc) | |
| 3013 | + print('') | |
| 3014 | + | |
| 3015 | + | |
| 3016 | + def process_file_json(self, show_decoded_strings=False, | |
| 3017 | + display_code=True, hide_attributes=True, | |
| 3018 | + vba_code_only=False, show_deobfuscated_code=False, | |
| 3019 | + deobfuscate=False): | |
| 3020 | + """ | |
| 3021 | + Process a single file | |
| 3022 | + | |
| 3023 | + every "show" or "print" here is to be translated as "add to json" | |
| 3024 | + | |
| 3025 | + :param filename: str, path and filename of file on disk, or within the container. | |
| 3026 | + :param data: bytes, content of the file if it is in a container, None if it is a file on disk. | |
| 3027 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 3028 | + :param display_code: bool, if False VBA source code is not displayed (default True) | |
| 3029 | + :param global_analysis: bool, if True all modules are merged for a single analysis (default), | |
| 3030 | + otherwise each module is analyzed separately (old behaviour) | |
| 3031 | + :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) | |
| 3032 | + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) | |
| 3033 | + """ | |
| 3034 | + #TODO: fix conflicting parameters (?) | |
| 3035 | + | |
| 3036 | + if vba_code_only and not display_code: | |
| 3037 | + display_code = True | |
| 3038 | + | |
| 3039 | + result = {} | |
| 3040 | + | |
| 3041 | + if self.container: | |
| 3042 | + result['container'] = self.container | |
| 3043 | + else: | |
| 3044 | + result['container'] = None | |
| 3045 | + result['file'] = self.filename | |
| 3046 | + result['json_conversion_successful'] = False | |
| 3047 | + result['analysis'] = None | |
| 3048 | + result['code_deobfuscated'] = None | |
| 3049 | + result['do_deobfuscate'] = deobfuscate | |
| 3050 | + | |
| 3051 | + try: | |
| 3052 | + #TODO: handle olefile errors, when an OLE file is malformed | |
| 3053 | + result['type'] = self.type | |
| 3054 | + macros = [] | |
| 3055 | + if self.detect_vba_macros(): | |
| 3056 | + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): | |
| 3057 | + curr_macro = {} | |
| 3058 | + if hide_attributes: | |
| 3059 | + # hide attribute lines: | |
| 3060 | + vba_code_filtered = filter_vba(vba_code.decode('utf-8','replace')) | |
| 3061 | + else: | |
| 3062 | + vba_code_filtered = vba_code | |
| 3063 | + | |
| 3064 | + curr_macro['vba_filename'] = vba_filename | |
| 3065 | + curr_macro['subfilename'] = subfilename | |
| 3066 | + curr_macro['ole_stream'] = stream_path | |
| 3067 | + if display_code: | |
| 3068 | + curr_macro['code'] = vba_code_filtered.strip() | |
| 3069 | + else: | |
| 3070 | + curr_macro['code'] = None | |
| 3071 | + macros.append(curr_macro) | |
| 3072 | + if not vba_code_only: | |
| 3073 | + # analyse the code from all modules at once: | |
| 3074 | + result['analysis'] = self.print_analysis_json(show_decoded_strings, | |
| 3075 | + deobfuscate) | |
| 3076 | + if show_deobfuscated_code: | |
| 3077 | + result['code_deobfuscated'] = self.reveal() | |
| 3078 | + result['macros'] = macros | |
| 3079 | + result['json_conversion_successful'] = True | |
| 3080 | + except Exception as exc: | |
| 3081 | + # display the exception with full stack trace for debugging | |
| 3082 | + log.info('Error processing file %s (%s)' % (self.filename, exc)) | |
| 3083 | + log.debug('Traceback:', exc_info=True) | |
| 3084 | + raise ProcessingError(self.filename, exc) | |
| 3085 | + | |
| 3086 | + return result | |
| 3087 | + | |
| 3088 | + | |
| 3089 | + def process_file_triage(self, show_decoded_strings=False, deobfuscate=False): | |
| 3090 | + """ | |
| 3091 | + Process a file in triage mode, showing only summary results on one line. | |
| 3092 | + """ | |
| 3093 | + #TODO: replace print by writing to a provided output file (sys.stdout by default) | |
| 3094 | + try: | |
| 3095 | + #TODO: handle olefile errors, when an OLE file is malformed | |
| 3096 | + if self.detect_vba_macros(): | |
| 3097 | + # print a waiting message only if the output is not redirected to a file: | |
| 3098 | + if sys.stdout.isatty(): | |
| 3099 | + print('Analysis...\r') | |
| 3100 | + sys.stdout.flush() | |
| 3101 | + self.analyze_macros(show_decoded_strings=show_decoded_strings, | |
| 3102 | + deobfuscate=deobfuscate) | |
| 3103 | + flags = TYPE2TAG[self.type] | |
| 3104 | + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' | |
| 3105 | + if self.contains_macros: macros = 'M' | |
| 3106 | + if self.nb_autoexec: autoexec = 'A' | |
| 3107 | + if self.nb_suspicious: suspicious = 'S' | |
| 3108 | + if self.nb_iocs: iocs = 'I' | |
| 3109 | + if self.nb_hexstrings: hexstrings = 'H' | |
| 3110 | + if self.nb_base64strings: base64obf = 'B' | |
| 3111 | + if self.nb_dridexstrings: dridex = 'D' | |
| 3112 | + if self.nb_vbastrings: vba_obf = 'V' | |
| 3113 | + flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, | |
| 3114 | + base64obf, dridex, vba_obf) | |
| 3115 | + | |
| 3116 | + line = '%-12s %s' % (flags, self.filename) | |
| 3117 | + print(line) | |
| 3118 | + | |
| 3119 | + # old table display: | |
| 3120 | + # macros = autoexec = suspicious = iocs = hexstrings = 'no' | |
| 3121 | + # if nb_macros: macros = 'YES:%d' % nb_macros | |
| 3122 | + # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec | |
| 3123 | + # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious | |
| 3124 | + # if nb_iocs: iocs = 'YES:%d' % nb_iocs | |
| 3125 | + # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings | |
| 3126 | + # # 2nd line = info | |
| 3127 | + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings) | |
| 3128 | + except Exception as exc: | |
| 3129 | + # display the exception with full stack trace for debugging only | |
| 3130 | + log.debug('Error processing file %s (%s)' % (self.filename, exc), | |
| 3131 | + exc_info=True) | |
| 3132 | + raise ProcessingError(self.filename, exc) | |
| 3133 | + | |
| 3134 | + | |
| 3135 | + # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'), | |
| 3136 | + # header=False, border=False) | |
| 3137 | + # t.align = 'l' | |
| 3138 | + # t.max_width['filename'] = 30 | |
| 3139 | + # t.max_width['type'] = 10 | |
| 3140 | + # t.max_width['macros'] = 6 | |
| 3141 | + # t.max_width['autoexec'] = 6 | |
| 3142 | + # t.max_width['suspicious'] = 6 | |
| 3143 | + # t.max_width['ioc'] = 6 | |
| 3144 | + # t.max_width['hexstrings'] = 6 | |
| 3145 | + # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings)) | |
| 3146 | + # print t | |
| 3147 | + | |
| 3148 | + | |
| 3149 | +#=== MAIN ===================================================================== | |
| 3150 | + | |
| 3151 | +def main(): | |
| 3152 | + """ | |
| 3153 | + Main function, called when olevba is run from the command line | |
| 3154 | + """ | |
| 3155 | + DEFAULT_LOG_LEVEL = "warning" # Default log level | |
| 3156 | + LOG_LEVELS = { | |
| 3157 | + 'debug': logging.DEBUG, | |
| 3158 | + 'info': logging.INFO, | |
| 3159 | + 'warning': logging.WARNING, | |
| 3160 | + 'error': logging.ERROR, | |
| 3161 | + 'critical': logging.CRITICAL | |
| 3162 | + } | |
| 3163 | + | |
| 3164 | + usage = 'usage: %prog [options] <filename> [filename2 ...]' | |
| 3165 | + parser = optparse.OptionParser(usage=usage) | |
| 3166 | + # parser.add_option('-o', '--outfile', dest='outfile', | |
| 3167 | + # help='output file') | |
| 3168 | + # parser.add_option('-c', '--csv', dest='csv', | |
| 3169 | + # help='export results to a CSV file') | |
| 3170 | + parser.add_option("-r", action="store_true", dest="recursive", | |
| 3171 | + help='find files recursively in subdirectories.') | |
| 3172 | + parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, | |
| 3173 | + help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)') | |
| 3174 | + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | |
| 3175 | + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | |
| 3176 | + # output mode; could make this even simpler with add_option(type='choice') but that would make | |
| 3177 | + # cmd line interface incompatible... | |
| 3178 | + modes = optparse.OptionGroup(parser, title='Output mode (mutually exclusive)') | |
| 3179 | + modes.add_option("-t", '--triage', action="store_const", dest="output_mode", | |
| 3180 | + const='triage', default='unspecified', | |
| 3181 | + help='triage mode, display results as a summary table (default for multiple files)') | |
| 3182 | + modes.add_option("-d", '--detailed', action="store_const", dest="output_mode", | |
| 3183 | + const='detailed', default='unspecified', | |
| 3184 | + help='detailed mode, display full results (default for single file)') | |
| 3185 | + modes.add_option("-j", '--json', action="store_const", dest="output_mode", | |
| 3186 | + const='json', default='unspecified', | |
| 3187 | + help='json mode, detailed in json format (never default)') | |
| 3188 | + parser.add_option_group(modes) | |
| 3189 | + parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True, | |
| 3190 | + help='display only analysis results, not the macro source code') | |
| 3191 | + parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False, | |
| 3192 | + help='display only VBA source code, do not analyze it') | |
| 3193 | + parser.add_option("--decode", action="store_true", dest="show_decoded_strings", | |
| 3194 | + help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex, VBA).') | |
| 3195 | + parser.add_option("--attr", action="store_false", dest="hide_attributes", default=True, | |
| 3196 | + help='display the attribute lines at the beginning of VBA source code') | |
| 3197 | + parser.add_option("--reveal", action="store_true", dest="show_deobfuscated_code", | |
| 3198 | + help='display the macro source code after replacing all the obfuscated strings by their decoded content.') | |
| 3199 | + parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, | |
| 3200 | + help="logging level debug/info/warning/error/critical (default=%default)") | |
| 3201 | + parser.add_option('--deobf', dest="deobfuscate", action="store_true", default=False, | |
| 3202 | + help="Attempt to deobfuscate VBA expressions (slow)") | |
| 3203 | + parser.add_option('--relaxed', dest="relaxed", action="store_true", default=False, | |
| 3204 | + help="Do not raise errors if opening of substream fails") | |
| 3205 | + | |
| 3206 | + (options, args) = parser.parse_args() | |
| 3207 | + | |
| 3208 | + # Print help if no arguments are passed | |
| 3209 | + if len(args) == 0: | |
| 3210 | + print(__doc__) | |
| 3211 | + parser.print_help() | |
| 3212 | + sys.exit(RETURN_WRONG_ARGS) | |
| 3213 | + | |
| 3214 | + # provide info about tool and its version | |
| 3215 | + if options.output_mode == 'json': | |
| 3216 | + # prints opening [ | |
| 3217 | + print_json(script_name='olevba', version=__version__, | |
| 3218 | + url='http://decalage.info/python/oletools', | |
| 3219 | + type='MetaInformation') | |
| 3220 | + else: | |
| 3221 | + print('olevba %s - http://decalage.info/python/oletools' % __version__) | |
| 3222 | + | |
| 3223 | + logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') | |
| 3224 | + # enable logging in the modules: | |
| 3225 | + log.setLevel(logging.NOTSET) | |
| 3226 | + | |
| 3227 | + # Old display with number of items detected: | |
| 3228 | + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') | |
| 3229 | + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) | |
| 3230 | + | |
| 3231 | + # with the option --reveal, make sure --deobf is also enabled: | |
| 3232 | + if options.show_deobfuscated_code and not options.deobfuscate: | |
| 3233 | + log.info('set --deobf because --reveal was set') | |
| 3234 | + options.deobfuscate = True | |
| 3235 | + if options.output_mode == 'triage' and options.show_deobfuscated_code: | |
| 3236 | + log.info('ignoring option --reveal in triage output mode') | |
| 3237 | + | |
| 3238 | + # Column headers (do not know how many files there will be yet, so if no output_mode | |
| 3239 | + # was specified, we will print triage for first file --> need these headers) | |
| 3240 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3241 | + print('%-12s %-65s' % ('Flags', 'Filename')) | |
| 3242 | + print('%-12s %-65s' % ('-' * 11, '-' * 65)) | |
| 3243 | + | |
| 3244 | + previous_container = None | |
| 3245 | + count = 0 | |
| 3246 | + container = filename = data = None | |
| 3247 | + vba_parser = None | |
| 3248 | + return_code = RETURN_OK | |
| 3249 | + try: | |
| 3250 | + for container, filename, data in xglob.iter_files(args, recursive=options.recursive, | |
| 3251 | + zip_password=options.zip_password, zip_fname=options.zip_fname): | |
| 3252 | + # ignore directory names stored in zip files: | |
| 3253 | + if container and filename.endswith('/'): | |
| 3254 | + continue | |
| 3255 | + | |
| 3256 | + # handle errors from xglob | |
| 3257 | + if isinstance(data, Exception): | |
| 3258 | + if isinstance(data, PathNotFoundException): | |
| 3259 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3260 | + print('%-12s %s - File not found' % ('?', filename)) | |
| 3261 | + elif options.output_mode != 'json': | |
| 3262 | + log.error('Given path %r does not exist!' % filename) | |
| 3263 | + return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ | |
| 3264 | + else RETURN_SEVERAL_ERRS | |
| 3265 | + else: | |
| 3266 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3267 | + print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container)) | |
| 3268 | + elif options.output_mode != 'json': | |
| 3269 | + log.error('Exception opening/reading %r from zip file %r: %s' | |
| 3270 | + % (filename, container, data)) | |
| 3271 | + return_code = RETURN_XGLOB_ERR if return_code == 0 \ | |
| 3272 | + else RETURN_SEVERAL_ERRS | |
| 3273 | + if options.output_mode == 'json': | |
| 3274 | + print_json(file=filename, type='error', | |
| 3275 | + error=type(data).__name__, message=str(data)) | |
| 3276 | + continue | |
| 3277 | + | |
| 3278 | + try: | |
| 3279 | + # Open the file | |
| 3280 | + vba_parser = VBA_Parser_CLI(filename, data=data, container=container, | |
| 3281 | + relaxed=options.relaxed) | |
| 3282 | + | |
| 3283 | + if options.output_mode == 'detailed': | |
| 3284 | + # fully detailed output | |
| 3285 | + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, | |
| 3286 | + display_code=options.display_code, | |
| 3287 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3288 | + show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3289 | + deobfuscate=options.deobfuscate) | |
| 3290 | + elif options.output_mode in ('triage', 'unspecified'): | |
| 3291 | + # print container name when it changes: | |
| 3292 | + if container != previous_container: | |
| 3293 | + if container is not None: | |
| 3294 | + print('\nFiles in %s:' % container) | |
| 3295 | + previous_container = container | |
| 3296 | + # summarized output for triage: | |
| 3297 | + vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, | |
| 3298 | + deobfuscate=options.deobfuscate) | |
| 3299 | + elif options.output_mode == 'json': | |
| 3300 | + print_json( | |
| 3301 | + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, | |
| 3302 | + display_code=options.display_code, | |
| 3303 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3304 | + show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3305 | + deobfuscate=options.deobfuscate)) | |
| 3306 | + else: # (should be impossible) | |
| 3307 | + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) | |
| 3308 | + count += 1 | |
| 3309 | + | |
| 3310 | + except (SubstreamOpenError, UnexpectedDataError) as exc: | |
| 3311 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3312 | + print('%-12s %s - Error opening substream or uenxpected ' \ | |
| 3313 | + 'content' % ('?', filename)) | |
| 3314 | + elif options.output_mode == 'json': | |
| 3315 | + print_json(file=filename, type='error', | |
| 3316 | + error=type(exc).__name__, message=str(exc)) | |
| 3317 | + else: | |
| 3318 | + log.exception('Error opening substream or unexpected ' | |
| 3319 | + 'content in %s' % filename) | |
| 3320 | + return_code = RETURN_OPEN_ERROR if return_code == 0 \ | |
| 3321 | + else RETURN_SEVERAL_ERRS | |
| 3322 | + except FileOpenError as exc: | |
| 3323 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3324 | + print('%-12s %s - File format not supported' % ('?', filename)) | |
| 3325 | + elif options.output_mode == 'json': | |
| 3326 | + print_json(file=filename, type='error', | |
| 3327 | + error=type(exc).__name__, message=str(exc)) | |
| 3328 | + else: | |
| 3329 | + log.exception('Failed to open %s -- probably not supported!' % filename) | |
| 3330 | + return_code = RETURN_OPEN_ERROR if return_code == 0 \ | |
| 3331 | + else RETURN_SEVERAL_ERRS | |
| 3332 | + except ProcessingError as exc: | |
| 3333 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3334 | + print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc)) | |
| 3335 | + elif options.output_mode == 'json': | |
| 3336 | + print_json(file=filename, type='error', | |
| 3337 | + error=type(exc).__name__, | |
| 3338 | + message=str(exc.orig_exc)) | |
| 3339 | + else: | |
| 3340 | + log.exception('Error processing file %s (%s)!' | |
| 3341 | + % (filename, exc.orig_exc)) | |
| 3342 | + return_code = RETURN_PARSE_ERROR if return_code == 0 \ | |
| 3343 | + else RETURN_SEVERAL_ERRS | |
| 3344 | + finally: | |
| 3345 | + if vba_parser is not None: | |
| 3346 | + vba_parser.close() | |
| 3347 | + | |
| 3348 | + if options.output_mode == 'triage': | |
| 3349 | + print('\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 3350 | + 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ | |
| 3351 | + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n') | |
| 3352 | + | |
| 3353 | + if count == 1 and options.output_mode == 'unspecified': | |
| 3354 | + # if options -t, -d and -j were not specified and it's a single file, print details: | |
| 3355 | + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, | |
| 3356 | + display_code=options.display_code, | |
| 3357 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3358 | + show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3359 | + deobfuscate=options.deobfuscate) | |
| 3360 | + | |
| 3361 | + if options.output_mode == 'json': | |
| 3362 | + # print last json entry (a last one without a comma) and closing ] | |
| 3363 | + print_json(type='MetaInformation', return_code=return_code, | |
| 3364 | + n_processed=count, _json_is_last=True) | |
| 3365 | + | |
| 3366 | + except Exception as exc: | |
| 3367 | + # some unexpected error, maybe some of the types caught in except clauses | |
| 3368 | + # above were not sufficient. This is very bad, so log complete trace at exception level | |
| 3369 | + # and do not care about output mode | |
| 3370 | + log.exception('Unhandled exception in main: %s' % exc, exc_info=True) | |
| 3371 | + return_code = RETURN_UNEXPECTED # even if there were others before -- this is more important | |
| 3372 | + # TODO: print msg with URL to report issues (except in JSON mode) | |
| 3373 | + | |
| 3374 | + # done. exit | |
| 3375 | + log.debug('will exit now with code %s' % return_code) | |
| 3376 | + sys.exit(return_code) | |
| 3377 | + | |
| 3378 | +if __name__ == '__main__': | |
| 3379 | + main() | |
| 3380 | + | |
| 3381 | +# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness | ... | ... |