Commit e6148632893ae77754319a20284481033d2bbd8b

Authored by Philippe Lagadec
1 parent 7c2a7d81

olevba: improved VBA_Parser, refactored the main CLI functions

Showing 2 changed files with 125 additions and 56 deletions
oletools/olevba.py
@@ -145,6 +145,7 @@ https://github.com/unixfreak0037/officeparser @@ -145,6 +145,7 @@ https://github.com/unixfreak0037/officeparser
145 # 2015-07-09 v0.33 PL: - removed usage of sys.stderr which causes issues 145 # 2015-07-09 v0.33 PL: - removed usage of sys.stderr which causes issues
146 # 2015-07-12 PL: - added Hex function decoding to VBA Parser 146 # 2015-07-12 PL: - added Hex function decoding to VBA Parser
147 # 2015-07-13 PL: - added Base64 function decoding to VBA Parser 147 # 2015-07-13 PL: - added Base64 function decoding to VBA Parser
  148 +# 2015-09-06 PL: - improved VBA_Parser, refactored the main functions
148 149
149 __version__ = '0.33' 150 __version__ = '0.33'
150 151
@@ -1468,6 +1469,16 @@ class VBA_Scanner(object): @@ -1468,6 +1469,16 @@ class VBA_Scanner(object):
1468 self.code_base64 = '' 1469 self.code_base64 = ''
1469 self.code_dridex = '' 1470 self.code_dridex = ''
1470 self.code_vba = '' 1471 self.code_vba = ''
  1472 + self.strReverse = None
  1473 + # results = None before scanning, then a list of tuples after scanning
  1474 + self.results = None
  1475 + self.autoexec_keywords = None
  1476 + self.suspicious_keywords = None
  1477 + self.iocs = None
  1478 + self.hex_strings = None
  1479 + self.base64_strings = None
  1480 + self.dridex_strings = None
  1481 + self.vba_strings = None
1471 1482
1472 1483
1473 def scan(self, include_decoded_strings=False): 1484 def scan(self, include_decoded_strings=False):
@@ -1558,6 +1569,7 @@ class VBA_Scanner(object): @@ -1558,6 +1569,7 @@ class VBA_Scanner(object):
1558 for encoded, decoded in self.vba_strings: 1569 for encoded, decoded in self.vba_strings:
1559 if include_decoded_strings or is_printable(decoded): 1570 if include_decoded_strings or is_printable(decoded):
1560 results.append(('VBA string', decoded, encoded)) 1571 results.append(('VBA string', decoded, encoded))
  1572 + self.results = results
1561 return results 1573 return results
1562 1574
1563 def scan_summary(self): 1575 def scan_summary(self):
@@ -1569,7 +1581,9 @@ class VBA_Scanner(object): @@ -1569,7 +1581,9 @@ class VBA_Scanner(object):
1569 :return: tuple with the number of items found for each category: 1581 :return: tuple with the number of items found for each category:
1570 (autoexec, suspicious, IOCs, hex, base64, dridex, vba) 1582 (autoexec, suspicious, IOCs, hex, base64, dridex, vba)
1571 """ 1583 """
1572 - self.scan() 1584 + # avoid scanning the same code twice:
  1585 + if self.results is None:
  1586 + self.scan()
1573 return (len(self.autoexec_keywords), len(self.suspicious_keywords), 1587 return (len(self.autoexec_keywords), len(self.suspicious_keywords),
1574 len(self.iocs), len(self.hex_strings), len(self.base64_strings), 1588 len(self.iocs), len(self.hex_strings), len(self.base64_strings),
1575 len(self.dridex_strings), len(self.vba_strings)) 1589 len(self.dridex_strings), len(self.vba_strings))
@@ -1630,6 +1644,22 @@ class VBA_Parser(object): @@ -1630,6 +1644,22 @@ class VBA_Parser(object):
1630 self.filename = filename 1644 self.filename = filename
1631 self.type = None 1645 self.type = None
1632 self.vba_projects = None 1646 self.vba_projects = None
  1647 + self.contains_macros = None # will be set to True or False by detect_macros
  1648 + self.vba_code_all_modules = None # to store the source code of all modules
  1649 + # list of tuples for each module: (subfilename, stream_path, vba_filename, vba_code)
  1650 + self.modules = None
  1651 + # Analysis results: list of tuples (type, keyword, description) - See VBA_Scanner
  1652 + self.analysis_results = None
  1653 + # statistics for the scan summary and flags
  1654 + self.nb_macros = 0
  1655 + self.nb_autoexec = 0
  1656 + self.nb_suspicious = 0
  1657 + self.nb_iocs = 0
  1658 + self.nb_hexstrings = 0
  1659 + self.nb_base64strings = 0
  1660 + self.nb_dridexstrings = 0
  1661 + self.nb_vbastrings = 0
  1662 +
1633 # if filename is None: 1663 # if filename is None:
1634 # if isinstance(_file, basestring): 1664 # if isinstance(_file, basestring):
1635 # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: 1665 # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE:
@@ -1857,19 +1887,25 @@ class VBA_Parser(object): @@ -1857,19 +1887,25 @@ class VBA_Parser(object):
1857 """ 1887 """
1858 #TODO: return None or raise exception if format not supported like PPT 97-2003 1888 #TODO: return None or raise exception if format not supported like PPT 97-2003
1859 #TODO: return the number of VBA projects found instead of True/False? 1889 #TODO: return the number of VBA projects found instead of True/False?
  1890 + # if this method was already called, return the previous result:
  1891 + if self.contains_macros is not None:
  1892 + return self.contains_macros
1860 # if OpenXML, check all the OLE subfiles: 1893 # if OpenXML, check all the OLE subfiles:
1861 if self.ole_file is None: 1894 if self.ole_file is None:
1862 for ole_subfile in self.ole_subfiles: 1895 for ole_subfile in self.ole_subfiles:
1863 if ole_subfile.detect_vba_macros(): 1896 if ole_subfile.detect_vba_macros():
  1897 + self.contains_macros = True
1864 return True 1898 return True
  1899 + # otherwise, no macro found:
  1900 + self.contains_macros = False
1865 return False 1901 return False
1866 # otherwise it's an OLE file, find VBA projects: 1902 # otherwise it's an OLE file, find VBA projects:
1867 vba_projects = self.find_vba_projects() 1903 vba_projects = self.find_vba_projects()
1868 if len(vba_projects) == 0: 1904 if len(vba_projects) == 0:
1869 - return False 1905 + self.contains_macros = False
1870 else: 1906 else:
1871 - return True  
1872 - 1907 + self.contains_macros = True
  1908 + return self.contains_macros
1873 1909
1874 def extract_macros(self): 1910 def extract_macros(self):
1875 """ 1911 """
@@ -1893,6 +1929,52 @@ class VBA_Parser(object): @@ -1893,6 +1929,52 @@ class VBA_Parser(object):
1893 yield (self.filename, stream_path, vba_filename, vba_code) 1929 yield (self.filename, stream_path, vba_filename, vba_code)
1894 1930
1895 1931
  1932 + def extract_all_macros(self):
  1933 + """
  1934 + Extract and decompress source code for each VBA macro found in the file
  1935 + by calling extract_macros(), store the results as a list of tuples
  1936 + (filename, stream_path, vba_filename, vba_code) in self.modules.
  1937 + See extract_macros for details.
  1938 + """
  1939 + if self.modules is None:
  1940 + self.modules = []
  1941 + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_macros():
  1942 + self.modules.append((subfilename, stream_path, vba_filename, vba_code))
  1943 + self.nb_macros = len(self.modules)
  1944 + return self.modules
  1945 +
  1946 +
  1947 +
  1948 + def analyze_macros(self, show_decoded_strings=False):
  1949 + """
  1950 + runs extract_macros and analyze the source code of all VBA macros
  1951 + found in the file.
  1952 + """
  1953 + if self.detect_vba_macros():
  1954 + # variable to merge source code from all modules:
  1955 + if self.vba_code_all_modules is None:
  1956 + self.vba_code_all_modules = ''
  1957 + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
  1958 + #TODO: filter code? (each module)
  1959 + self.vba_code_all_modules += vba_code + '\n'
  1960 + # Analyze the whole code at once:
  1961 + scanner = VBA_Scanner(self.vba_code_all_modules)
  1962 + self.analysis_results = scanner.scan(show_decoded_strings)
  1963 + autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary()
  1964 + self.nb_autoexec += autoexec
  1965 + self.nb_suspicious += suspicious
  1966 + self.nb_iocs += iocs
  1967 + self.nb_hexstrings += hexstrings
  1968 + self.nb_base64strings += base64strings
  1969 + self.nb_dridexstrings += dridex
  1970 + self.nb_vbastrings += vbastrings
  1971 +
  1972 + return self.analysis_results
  1973 +
  1974 +
  1975 +
  1976 +
  1977 +
1896 def close(self): 1978 def close(self):
1897 """ 1979 """
1898 Close all the open files. This method must be called after usage, if 1980 Close all the open files. This method must be called after usage, if
@@ -1905,7 +1987,7 @@ class VBA_Parser(object): @@ -1905,7 +1987,7 @@ class VBA_Parser(object):
1905 self.ole_file.close() 1987 self.ole_file.close()
1906 1988
1907 1989
1908 -def print_analysis(vba_code, show_decoded_strings=False): 1990 +def print_analysis(vba_parser, show_decoded_strings=False):
1909 """ 1991 """
1910 Analyze the provided VBA code, and print the results in a table 1992 Analyze the provided VBA code, and print the results in a table
1911 1993
@@ -1916,7 +1998,8 @@ def print_analysis(vba_code, show_decoded_strings=False): @@ -1916,7 +1998,8 @@ def print_analysis(vba_code, show_decoded_strings=False):
1916 # print a waiting message only if the output is not redirected to a file: 1998 # print a waiting message only if the output is not redirected to a file:
1917 if sys.stdout.isatty(): 1999 if sys.stdout.isatty():
1918 print 'Analysis...\r', 2000 print 'Analysis...\r',
1919 - results = scan_vba(vba_code, show_decoded_strings) 2001 + sys.stdout.flush()
  2002 + results = vba_parser.analyze_macros(show_decoded_strings)
1920 if results: 2003 if results:
1921 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description')) 2004 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
1922 t.align = 'l' 2005 t.align = 'l'
@@ -1967,9 +2050,7 @@ def process_file(container, filename, data, show_decoded_strings=False, @@ -1967,9 +2050,7 @@ def process_file(container, filename, data, show_decoded_strings=False,
1967 print 'Type:', vba.type 2050 print 'Type:', vba.type
1968 if vba.detect_vba_macros(): 2051 if vba.detect_vba_macros():
1969 #print 'Contains VBA Macros:' 2052 #print 'Contains VBA Macros:'
1970 - # variable to merge source code from all modules:  
1971 - vba_code_all_modules = ''  
1972 - for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): 2053 + for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_all_macros():
1973 if hide_attributes: 2054 if hide_attributes:
1974 # hide attribute lines: 2055 # hide attribute lines:
1975 vba_code_filtered = filter_vba(vba_code) 2056 vba_code_filtered = filter_vba(vba_code)
@@ -1986,15 +2067,15 @@ def process_file(container, filename, data, show_decoded_strings=False, @@ -1986,15 +2067,15 @@ def process_file(container, filename, data, show_decoded_strings=False,
1986 else: 2067 else:
1987 print vba_code_filtered 2068 print vba_code_filtered
1988 if not global_analysis and not vba_code_only: 2069 if not global_analysis and not vba_code_only:
  2070 + #TODO: remove this option
  2071 + raise NotImplementedError
1989 print '- ' * 39 2072 print '- ' * 39
1990 print 'ANALYSIS:' 2073 print 'ANALYSIS:'
1991 # analyse each module's code, filtered to avoid false positives: 2074 # analyse each module's code, filtered to avoid false positives:
1992 - print_analysis(vba_code_filtered, show_decoded_strings)  
1993 - else:  
1994 - vba_code_all_modules += vba_code_filtered + '\n' 2075 + print_analysis(vba, show_decoded_strings)
1995 if global_analysis and not vba_code_only: 2076 if global_analysis and not vba_code_only:
1996 # analyse the code from all modules at once: 2077 # analyse the code from all modules at once:
1997 - print_analysis(vba_code_all_modules, show_decoded_strings) 2078 + print_analysis(vba, show_decoded_strings)
1998 else: 2079 else:
1999 print 'No VBA macros found.' 2080 print 'No VBA macros found.'
2000 except: #TypeError: 2081 except: #TypeError:
@@ -2005,6 +2086,13 @@ def process_file(container, filename, data, show_decoded_strings=False, @@ -2005,6 +2086,13 @@ def process_file(container, filename, data, show_decoded_strings=False,
2005 traceback.print_exc() 2086 traceback.print_exc()
2006 print '' 2087 print ''
2007 2088
  2089 +# short tag to display file types in triage mode:
  2090 +TYPE2TAG = {
  2091 + TYPE_OLE: 'OLE:',
  2092 + TYPE_OpenXML: 'OpX:',
  2093 + TYPE_Word2003_XML: 'XML:',
  2094 + TYPE_MHTML: 'MHT:',
  2095 +}
2008 2096
2009 def process_file_triage(container, filename, data): 2097 def process_file_triage(container, filename, data):
2010 """ 2098 """
@@ -2016,56 +2104,30 @@ def process_file_triage(container, filename, data): @@ -2016,56 +2104,30 @@ def process_file_triage(container, filename, data):
2016 :param data: bytes, content of the file if it is in a container, None if it is a file on disk. 2104 :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
2017 """ 2105 """
2018 #TODO: replace print by writing to a provided output file (sys.stdout by default) 2106 #TODO: replace print by writing to a provided output file (sys.stdout by default)
2019 - nb_macros = 0  
2020 - nb_autoexec = 0  
2021 - nb_suspicious = 0  
2022 - nb_iocs = 0  
2023 - nb_hexstrings = 0  
2024 - nb_base64strings = 0  
2025 - nb_dridexstrings = 0  
2026 - nb_vbastrings = 0  
2027 # ftype = 'Other' 2107 # ftype = 'Other'
2028 message = '' 2108 message = ''
2029 try: 2109 try:
2030 #TODO: handle olefile errors, when an OLE file is malformed 2110 #TODO: handle olefile errors, when an OLE file is malformed
2031 vba = VBA_Parser(filename, data) 2111 vba = VBA_Parser(filename, data)
2032 if vba.detect_vba_macros(): 2112 if vba.detect_vba_macros():
2033 - for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():  
2034 - nb_macros += 1  
2035 - if vba_code.strip() != '':  
2036 - # print a waiting message only if the output is not redirected to a file:  
2037 - if sys.stdout.isatty():  
2038 - print 'Analysis...\r',  
2039 - # analyse the whole code, filtered to avoid false positives:  
2040 - scanner = VBA_Scanner(filter_vba(vba_code))  
2041 - autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary()  
2042 - nb_autoexec += autoexec  
2043 - nb_suspicious += suspicious  
2044 - nb_iocs += iocs  
2045 - nb_hexstrings += hexstrings  
2046 - nb_base64strings += base64strings  
2047 - nb_dridexstrings += dridex  
2048 - nb_vbastrings += vbastrings  
2049 - if vba.type == TYPE_OLE:  
2050 - flags = 'OLE:'  
2051 - elif vba.type == TYPE_OpenXML:  
2052 - flags = 'OpX:'  
2053 - elif vba.type == TYPE_Word2003_XML:  
2054 - flags = 'XML:'  
2055 - elif vba.type == TYPE_MHTML:  
2056 - flags = 'MHT:' 2113 + # print a waiting message only if the output is not redirected to a file:
  2114 + if sys.stdout.isatty():
  2115 + print 'Analysis...\r',
  2116 + sys.stdout.flush()
  2117 + vba.analyze_macros()
  2118 + flags = TYPE2TAG[vba.type]
2057 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' 2119 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
2058 - if nb_macros: macros = 'M'  
2059 - if nb_autoexec: autoexec = 'A'  
2060 - if nb_suspicious: suspicious = 'S'  
2061 - if nb_iocs: iocs = 'I'  
2062 - if nb_hexstrings: hexstrings = 'H'  
2063 - if nb_base64strings: base64obf = 'B'  
2064 - if nb_dridexstrings: dridex = 'D'  
2065 - if nb_vbastrings: vba_obf = 'V' 2120 + if vba.nb_macros: macros = 'M'
  2121 + if vba.nb_autoexec: autoexec = 'A'
  2122 + if vba.nb_suspicious: suspicious = 'S'
  2123 + if vba.nb_iocs: iocs = 'I'
  2124 + if vba.nb_hexstrings: hexstrings = 'H'
  2125 + if vba.nb_base64strings: base64obf = 'B'
  2126 + if vba.nb_dridexstrings: dridex = 'D'
  2127 + if vba.nb_vbastrings: vba_obf = 'V'
2066 flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, 2128 flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
2067 base64obf, dridex, vba_obf) 2129 base64obf, dridex, vba_obf)
2068 - 2130 + # old table display:
2069 # macros = autoexec = suspicious = iocs = hexstrings = 'no' 2131 # macros = autoexec = suspicious = iocs = hexstrings = 'no'
2070 # if nb_macros: macros = 'YES:%d' % nb_macros 2132 # if nb_macros: macros = 'YES:%d' % nb_macros
2071 # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec 2133 # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
@@ -2123,7 +2185,7 @@ def main(): @@ -2123,7 +2185,7 @@ def main():
2123 parser.add_option("-r", action="store_true", dest="recursive", 2185 parser.add_option("-r", action="store_true", dest="recursive",
2124 help='find files recursively in subdirectories.') 2186 help='find files recursively in subdirectories.')
2125 parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, 2187 parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
2126 - help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') 2188 + help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
2127 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', 2189 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
2128 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') 2190 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
2129 parser.add_option("-t", '--triage', action="store_true", dest="triage_mode", 2191 parser.add_option("-t", '--triage', action="store_true", dest="triage_mode",
@@ -2162,17 +2224,23 @@ def main(): @@ -2162,17 +2224,23 @@ def main():
2162 logging.disable(logging.CRITICAL) 2224 logging.disable(logging.CRITICAL)
2163 2225
2164 if options.input: 2226 if options.input:
  2227 + #TODO: remove this option
  2228 + raise NotImplementedError
2165 # input file provided with VBA source code to be analyzed directly: 2229 # input file provided with VBA source code to be analyzed directly:
2166 print 'Analysis of VBA source code from %s:' % options.input 2230 print 'Analysis of VBA source code from %s:' % options.input
2167 vba_code = open(options.input).read() 2231 vba_code = open(options.input).read()
2168 print_analysis(vba_code, show_decoded_strings=options.show_decoded_strings) 2232 print_analysis(vba_code, show_decoded_strings=options.show_decoded_strings)
2169 sys.exit() 2233 sys.exit()
2170 2234
  2235 + # Old display with number of items detected:
2171 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') 2236 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
2172 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) 2237 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
  2238 +
  2239 + # Column headers (except if detailed mode)
2173 if not options.detailed_mode or options.triage_mode: 2240 if not options.detailed_mode or options.triage_mode:
2174 print '%-12s %-65s' % ('Flags', 'Filename') 2241 print '%-12s %-65s' % ('Flags', 'Filename')
2175 print '%-12s %-65s' % ('-' * 11, '-' * 65) 2242 print '%-12s %-65s' % ('-' * 11, '-' * 65)
  2243 +
2176 previous_container = None 2244 previous_container = None
2177 count = 0 2245 count = 0
2178 container = filename = data = None 2246 container = filename = data = None
@@ -2203,6 +2271,7 @@ def main(): @@ -2203,6 +2271,7 @@ def main():
2203 if count == 1 and not options.triage_mode and not options.detailed_mode: 2271 if count == 1 and not options.triage_mode and not options.detailed_mode:
2204 # if options -t and -d were not specified and it's a single file, print details: 2272 # if options -t and -d were not specified and it's a single file, print details:
2205 #TODO: avoid doing the analysis twice by storing results 2273 #TODO: avoid doing the analysis twice by storing results
  2274 + #TODO: all the cli functions should be methods of a class VBA_Parser_CLI
2206 process_file(container, filename, data, show_decoded_strings=options.show_decoded_strings, 2275 process_file(container, filename, data, show_decoded_strings=options.show_decoded_strings,
2207 display_code=options.display_code, global_analysis=options.global_analysis, 2276 display_code=options.display_code, global_analysis=options.global_analysis,
2208 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only) 2277 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only)
setup.py 100755 → 100644
@@ -37,7 +37,7 @@ import sys, os, fnmatch @@ -37,7 +37,7 @@ import sys, os, fnmatch
37 #--- METADATA ----------------------------------------------------------------- 37 #--- METADATA -----------------------------------------------------------------
38 38
39 name = "oletools" 39 name = "oletools"
40 -version = '0.12' 40 +version = '0.13'
41 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" 41 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
42 long_desc = open('oletools/README.rst').read() 42 long_desc = open('oletools/README.rst').read()
43 author ="Philippe Lagadec" 43 author ="Philippe Lagadec"