Commit e6148632893ae77754319a20284481033d2bbd8b
1 parent
7c2a7d81
olevba: improved VBA_Parser, refactored the main CLI functions
Showing
2 changed files
with
125 additions
and
56 deletions
oletools/olevba.py
| ... | ... | @@ -145,6 +145,7 @@ https://github.com/unixfreak0037/officeparser |
| 145 | 145 | # 2015-07-09 v0.33 PL: - removed usage of sys.stderr which causes issues |
| 146 | 146 | # 2015-07-12 PL: - added Hex function decoding to VBA Parser |
| 147 | 147 | # 2015-07-13 PL: - added Base64 function decoding to VBA Parser |
| 148 | +# 2015-09-06 PL: - improved VBA_Parser, refactored the main functions | |
| 148 | 149 | |
| 149 | 150 | __version__ = '0.33' |
| 150 | 151 | |
| ... | ... | @@ -1468,6 +1469,16 @@ class VBA_Scanner(object): |
| 1468 | 1469 | self.code_base64 = '' |
| 1469 | 1470 | self.code_dridex = '' |
| 1470 | 1471 | self.code_vba = '' |
| 1472 | + self.strReverse = None | |
| 1473 | + # results = None before scanning, then a list of tuples after scanning | |
| 1474 | + self.results = None | |
| 1475 | + self.autoexec_keywords = None | |
| 1476 | + self.suspicious_keywords = None | |
| 1477 | + self.iocs = None | |
| 1478 | + self.hex_strings = None | |
| 1479 | + self.base64_strings = None | |
| 1480 | + self.dridex_strings = None | |
| 1481 | + self.vba_strings = None | |
| 1471 | 1482 | |
| 1472 | 1483 | |
| 1473 | 1484 | def scan(self, include_decoded_strings=False): |
| ... | ... | @@ -1558,6 +1569,7 @@ class VBA_Scanner(object): |
| 1558 | 1569 | for encoded, decoded in self.vba_strings: |
| 1559 | 1570 | if include_decoded_strings or is_printable(decoded): |
| 1560 | 1571 | results.append(('VBA string', decoded, encoded)) |
| 1572 | + self.results = results | |
| 1561 | 1573 | return results |
| 1562 | 1574 | |
| 1563 | 1575 | def scan_summary(self): |
| ... | ... | @@ -1569,7 +1581,9 @@ class VBA_Scanner(object): |
| 1569 | 1581 | :return: tuple with the number of items found for each category: |
| 1570 | 1582 | (autoexec, suspicious, IOCs, hex, base64, dridex, vba) |
| 1571 | 1583 | """ |
| 1572 | - self.scan() | |
| 1584 | + # avoid scanning the same code twice: | |
| 1585 | + if self.results is None: | |
| 1586 | + self.scan() | |
| 1573 | 1587 | return (len(self.autoexec_keywords), len(self.suspicious_keywords), |
| 1574 | 1588 | len(self.iocs), len(self.hex_strings), len(self.base64_strings), |
| 1575 | 1589 | len(self.dridex_strings), len(self.vba_strings)) |
| ... | ... | @@ -1630,6 +1644,22 @@ class VBA_Parser(object): |
| 1630 | 1644 | self.filename = filename |
| 1631 | 1645 | self.type = None |
| 1632 | 1646 | self.vba_projects = None |
| 1647 | + self.contains_macros = None # will be set to True or False by detect_macros | |
| 1648 | + self.vba_code_all_modules = None # to store the source code of all modules | |
| 1649 | + # list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code) | |
| 1650 | + self.modules = None | |
| 1651 | + # Analysis results: list of tuples (type, keyword, description) - See VBA_Scanner | |
| 1652 | + self.analysis_results = None | |
| 1653 | + # statistics for the scan summary and flags | |
| 1654 | + self.nb_macros = 0 | |
| 1655 | + self.nb_autoexec = 0 | |
| 1656 | + self.nb_suspicious = 0 | |
| 1657 | + self.nb_iocs = 0 | |
| 1658 | + self.nb_hexstrings = 0 | |
| 1659 | + self.nb_base64strings = 0 | |
| 1660 | + self.nb_dridexstrings = 0 | |
| 1661 | + self.nb_vbastrings = 0 | |
| 1662 | + | |
| 1633 | 1663 | # if filename is None: |
| 1634 | 1664 | # if isinstance(_file, basestring): |
| 1635 | 1665 | # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: |
| ... | ... | @@ -1857,19 +1887,25 @@ class VBA_Parser(object): |
| 1857 | 1887 | """ |
| 1858 | 1888 | #TODO: return None or raise exception if format not supported like PPT 97-2003 |
| 1859 | 1889 | #TODO: return the number of VBA projects found instead of True/False? |
| 1890 | + # if this method was already called, return the previous result: | |
| 1891 | + if self.contains_macros is not None: | |
| 1892 | + return self.contains_macros | |
| 1860 | 1893 | # if OpenXML, check all the OLE subfiles: |
| 1861 | 1894 | if self.ole_file is None: |
| 1862 | 1895 | for ole_subfile in self.ole_subfiles: |
| 1863 | 1896 | if ole_subfile.detect_vba_macros(): |
| 1897 | + self.contains_macros = True | |
| 1864 | 1898 | return True |
| 1899 | + # otherwise, no macro found: | |
| 1900 | + self.contains_macros = False | |
| 1865 | 1901 | return False |
| 1866 | 1902 | # otherwise it's an OLE file, find VBA projects: |
| 1867 | 1903 | vba_projects = self.find_vba_projects() |
| 1868 | 1904 | if len(vba_projects) == 0: |
| 1869 | - return False | |
| 1905 | + self.contains_macros = False | |
| 1870 | 1906 | else: |
| 1871 | - return True | |
| 1872 | - | |
| 1907 | + self.contains_macros = True | |
| 1908 | + return self.contains_macros | |
| 1873 | 1909 | |
| 1874 | 1910 | def extract_macros(self): |
| 1875 | 1911 | """ |
| ... | ... | @@ -1893,6 +1929,52 @@ class VBA_Parser(object): |
| 1893 | 1929 | yield (self.filename, stream_path, vba_filename, vba_code) |
| 1894 | 1930 | |
| 1895 | 1931 | |
| 1932 | + def extract_all_macros(self): | |
| 1933 | + """ | |
| 1934 | + Extract and decompress source code for each VBA macro found in the file | |
| 1935 | + by calling extract_macros(), store the results as a list of tuples | |
| 1936 | + (filename, stream_path, vba_filename, vba_code) in self.modules. | |
| 1937 | + See extract_macros for details. | |
| 1938 | + """ | |
| 1939 | + if self.modules is None: | |
| 1940 | + self.modules = [] | |
| 1941 | + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_macros(): | |
| 1942 | + self.modules.append((subfilename, stream_path, vba_filename, vba_code)) | |
| 1943 | + self.nb_macros = len(self.modules) | |
| 1944 | + return self.modules | |
| 1945 | + | |
| 1946 | + | |
| 1947 | + | |
| 1948 | + def analyze_macros(self, show_decoded_strings=False): | |
| 1949 | + """ | |
| 1950 | + runs extract_macros and analyze the source code of all VBA macros | |
| 1951 | + found in the file. | |
| 1952 | + """ | |
| 1953 | + if self.detect_vba_macros(): | |
| 1954 | + # variable to merge source code from all modules: | |
| 1955 | + if self.vba_code_all_modules is None: | |
| 1956 | + self.vba_code_all_modules = '' | |
| 1957 | + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): | |
| 1958 | + #TODO: filter code? (each module) | |
| 1959 | + self.vba_code_all_modules += vba_code + '\n' | |
| 1960 | + # Analyze the whole code at once: | |
| 1961 | + scanner = VBA_Scanner(self.vba_code_all_modules) | |
| 1962 | + self.analysis_results = scanner.scan(show_decoded_strings) | |
| 1963 | + autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary() | |
| 1964 | + self.nb_autoexec += autoexec | |
| 1965 | + self.nb_suspicious += suspicious | |
| 1966 | + self.nb_iocs += iocs | |
| 1967 | + self.nb_hexstrings += hexstrings | |
| 1968 | + self.nb_base64strings += base64strings | |
| 1969 | + self.nb_dridexstrings += dridex | |
| 1970 | + self.nb_vbastrings += vbastrings | |
| 1971 | + | |
| 1972 | + return self.analysis_results | |
| 1973 | + | |
| 1974 | + | |
| 1975 | + | |
| 1976 | + | |
| 1977 | + | |
| 1896 | 1978 | def close(self): |
| 1897 | 1979 | """ |
| 1898 | 1980 | Close all the open files. This method must be called after usage, if |
| ... | ... | @@ -1905,7 +1987,7 @@ class VBA_Parser(object): |
| 1905 | 1987 | self.ole_file.close() |
| 1906 | 1988 | |
| 1907 | 1989 | |
| 1908 | -def print_analysis(vba_code, show_decoded_strings=False): | |
| 1990 | +def print_analysis(vba_parser, show_decoded_strings=False): | |
| 1909 | 1991 | """ |
| 1910 | 1992 | Analyze the provided VBA code, and print the results in a table |
| 1911 | 1993 | |
| ... | ... | @@ -1916,7 +1998,8 @@ def print_analysis(vba_code, show_decoded_strings=False): |
| 1916 | 1998 | # print a waiting message only if the output is not redirected to a file: |
| 1917 | 1999 | if sys.stdout.isatty(): |
| 1918 | 2000 | print 'Analysis...\r', |
| 1919 | - results = scan_vba(vba_code, show_decoded_strings) | |
| 2001 | + sys.stdout.flush() | |
| 2002 | + results = vba_parser.analyze_macros(show_decoded_strings) | |
| 1920 | 2003 | if results: |
| 1921 | 2004 | t = prettytable.PrettyTable(('Type', 'Keyword', 'Description')) |
| 1922 | 2005 | t.align = 'l' |
| ... | ... | @@ -1967,9 +2050,7 @@ def process_file(container, filename, data, show_decoded_strings=False, |
| 1967 | 2050 | print 'Type:', vba.type |
| 1968 | 2051 | if vba.detect_vba_macros(): |
| 1969 | 2052 | #print 'Contains VBA Macros:' |
| 1970 | - # variable to merge source code from all modules: | |
| 1971 | - vba_code_all_modules = '' | |
| 1972 | - for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): | |
| 2053 | + for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_all_macros(): | |
| 1973 | 2054 | if hide_attributes: |
| 1974 | 2055 | # hide attribute lines: |
| 1975 | 2056 | vba_code_filtered = filter_vba(vba_code) |
| ... | ... | @@ -1986,15 +2067,15 @@ def process_file(container, filename, data, show_decoded_strings=False, |
| 1986 | 2067 | else: |
| 1987 | 2068 | print vba_code_filtered |
| 1988 | 2069 | if not global_analysis and not vba_code_only: |
| 2070 | + #TODO: remove this option | |
| 2071 | + raise NotImplementedError | |
| 1989 | 2072 | print '- ' * 39 |
| 1990 | 2073 | print 'ANALYSIS:' |
| 1991 | 2074 | # analyse each module's code, filtered to avoid false positives: |
| 1992 | - print_analysis(vba_code_filtered, show_decoded_strings) | |
| 1993 | - else: | |
| 1994 | - vba_code_all_modules += vba_code_filtered + '\n' | |
| 2075 | + print_analysis(vba, show_decoded_strings) | |
| 1995 | 2076 | if global_analysis and not vba_code_only: |
| 1996 | 2077 | # analyse the code from all modules at once: |
| 1997 | - print_analysis(vba_code_all_modules, show_decoded_strings) | |
| 2078 | + print_analysis(vba, show_decoded_strings) | |
| 1998 | 2079 | else: |
| 1999 | 2080 | print 'No VBA macros found.' |
| 2000 | 2081 | except: #TypeError: |
| ... | ... | @@ -2005,6 +2086,13 @@ def process_file(container, filename, data, show_decoded_strings=False, |
| 2005 | 2086 | traceback.print_exc() |
| 2006 | 2087 | print '' |
| 2007 | 2088 | |
| 2089 | +# short tag to display file types in triage mode: | |
| 2090 | +TYPE2TAG = { | |
| 2091 | + TYPE_OLE: 'OLE:', | |
| 2092 | + TYPE_OpenXML: 'OpX:', | |
| 2093 | + TYPE_Word2003_XML: 'XML:', | |
| 2094 | + TYPE_MHTML: 'MHT:', | |
| 2095 | +} | |
| 2008 | 2096 | |
| 2009 | 2097 | def process_file_triage(container, filename, data): |
| 2010 | 2098 | """ |
| ... | ... | @@ -2016,56 +2104,30 @@ def process_file_triage(container, filename, data): |
| 2016 | 2104 | :param data: bytes, content of the file if it is in a container, None if it is a file on disk. |
| 2017 | 2105 | """ |
| 2018 | 2106 | #TODO: replace print by writing to a provided output file (sys.stdout by default) |
| 2019 | - nb_macros = 0 | |
| 2020 | - nb_autoexec = 0 | |
| 2021 | - nb_suspicious = 0 | |
| 2022 | - nb_iocs = 0 | |
| 2023 | - nb_hexstrings = 0 | |
| 2024 | - nb_base64strings = 0 | |
| 2025 | - nb_dridexstrings = 0 | |
| 2026 | - nb_vbastrings = 0 | |
| 2027 | 2107 | # ftype = 'Other' |
| 2028 | 2108 | message = '' |
| 2029 | 2109 | try: |
| 2030 | 2110 | #TODO: handle olefile errors, when an OLE file is malformed |
| 2031 | 2111 | vba = VBA_Parser(filename, data) |
| 2032 | 2112 | if vba.detect_vba_macros(): |
| 2033 | - for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): | |
| 2034 | - nb_macros += 1 | |
| 2035 | - if vba_code.strip() != '': | |
| 2036 | - # print a waiting message only if the output is not redirected to a file: | |
| 2037 | - if sys.stdout.isatty(): | |
| 2038 | - print 'Analysis...\r', | |
| 2039 | - # analyse the whole code, filtered to avoid false positives: | |
| 2040 | - scanner = VBA_Scanner(filter_vba(vba_code)) | |
| 2041 | - autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary() | |
| 2042 | - nb_autoexec += autoexec | |
| 2043 | - nb_suspicious += suspicious | |
| 2044 | - nb_iocs += iocs | |
| 2045 | - nb_hexstrings += hexstrings | |
| 2046 | - nb_base64strings += base64strings | |
| 2047 | - nb_dridexstrings += dridex | |
| 2048 | - nb_vbastrings += vbastrings | |
| 2049 | - if vba.type == TYPE_OLE: | |
| 2050 | - flags = 'OLE:' | |
| 2051 | - elif vba.type == TYPE_OpenXML: | |
| 2052 | - flags = 'OpX:' | |
| 2053 | - elif vba.type == TYPE_Word2003_XML: | |
| 2054 | - flags = 'XML:' | |
| 2055 | - elif vba.type == TYPE_MHTML: | |
| 2056 | - flags = 'MHT:' | |
| 2113 | + # print a waiting message only if the output is not redirected to a file: | |
| 2114 | + if sys.stdout.isatty(): | |
| 2115 | + print 'Analysis...\r', | |
| 2116 | + sys.stdout.flush() | |
| 2117 | + vba.analyze_macros() | |
| 2118 | + flags = TYPE2TAG[vba.type] | |
| 2057 | 2119 | macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' |
| 2058 | - if nb_macros: macros = 'M' | |
| 2059 | - if nb_autoexec: autoexec = 'A' | |
| 2060 | - if nb_suspicious: suspicious = 'S' | |
| 2061 | - if nb_iocs: iocs = 'I' | |
| 2062 | - if nb_hexstrings: hexstrings = 'H' | |
| 2063 | - if nb_base64strings: base64obf = 'B' | |
| 2064 | - if nb_dridexstrings: dridex = 'D' | |
| 2065 | - if nb_vbastrings: vba_obf = 'V' | |
| 2120 | + if vba.nb_macros: macros = 'M' | |
| 2121 | + if vba.nb_autoexec: autoexec = 'A' | |
| 2122 | + if vba.nb_suspicious: suspicious = 'S' | |
| 2123 | + if vba.nb_iocs: iocs = 'I' | |
| 2124 | + if vba.nb_hexstrings: hexstrings = 'H' | |
| 2125 | + if vba.nb_base64strings: base64obf = 'B' | |
| 2126 | + if vba.nb_dridexstrings: dridex = 'D' | |
| 2127 | + if vba.nb_vbastrings: vba_obf = 'V' | |
| 2066 | 2128 | flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, |
| 2067 | 2129 | base64obf, dridex, vba_obf) |
| 2068 | - | |
| 2130 | + # old table display: | |
| 2069 | 2131 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' |
| 2070 | 2132 | # if nb_macros: macros = 'YES:%d' % nb_macros |
| 2071 | 2133 | # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec |
| ... | ... | @@ -2123,7 +2185,7 @@ def main(): |
| 2123 | 2185 | parser.add_option("-r", action="store_true", dest="recursive", |
| 2124 | 2186 | help='find files recursively in subdirectories.') |
| 2125 | 2187 | parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, |
| 2126 | - help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | |
| 2188 | + help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)') | |
| 2127 | 2189 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', |
| 2128 | 2190 | help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') |
| 2129 | 2191 | parser.add_option("-t", '--triage', action="store_true", dest="triage_mode", |
| ... | ... | @@ -2162,17 +2224,23 @@ def main(): |
| 2162 | 2224 | logging.disable(logging.CRITICAL) |
| 2163 | 2225 | |
| 2164 | 2226 | if options.input: |
| 2227 | + #TODO: remove this option | |
| 2228 | + raise NotImplementedError | |
| 2165 | 2229 | # input file provided with VBA source code to be analyzed directly: |
| 2166 | 2230 | print 'Analysis of VBA source code from %s:' % options.input |
| 2167 | 2231 | vba_code = open(options.input).read() |
| 2168 | 2232 | print_analysis(vba_code, show_decoded_strings=options.show_decoded_strings) |
| 2169 | 2233 | sys.exit() |
| 2170 | 2234 | |
| 2235 | + # Old display with number of items detected: | |
| 2171 | 2236 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') |
| 2172 | 2237 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) |
| 2238 | + | |
| 2239 | + # Column headers (except if detailed mode) | |
| 2173 | 2240 | if not options.detailed_mode or options.triage_mode: |
| 2174 | 2241 | print '%-12s %-65s' % ('Flags', 'Filename') |
| 2175 | 2242 | print '%-12s %-65s' % ('-' * 11, '-' * 65) |
| 2243 | + | |
| 2176 | 2244 | previous_container = None |
| 2177 | 2245 | count = 0 |
| 2178 | 2246 | container = filename = data = None |
| ... | ... | @@ -2203,6 +2271,7 @@ def main(): |
| 2203 | 2271 | if count == 1 and not options.triage_mode and not options.detailed_mode: |
| 2204 | 2272 | # if options -t and -d were not specified and it's a single file, print details: |
| 2205 | 2273 | #TODO: avoid doing the analysis twice by storing results |
| 2274 | + #TODO: all the cli functions should be methods of a class VBA_Parser_CLI | |
| 2206 | 2275 | process_file(container, filename, data, show_decoded_strings=options.show_decoded_strings, |
| 2207 | 2276 | display_code=options.display_code, global_analysis=options.global_analysis, |
| 2208 | 2277 | hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only) | ... | ... |
setup.py
100755 → 100644
| ... | ... | @@ -37,7 +37,7 @@ import sys, os, fnmatch |
| 37 | 37 | #--- METADATA ----------------------------------------------------------------- |
| 38 | 38 | |
| 39 | 39 | name = "oletools" |
| 40 | -version = '0.12' | |
| 40 | +version = '0.13' | |
| 41 | 41 | desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" |
| 42 | 42 | long_desc = open('oletools/README.rst').read() |
| 43 | 43 | author ="Philippe Lagadec" | ... | ... |