Commit e6d5614b555b4cd1cfff256f7629ecc5074d3744

Authored by Philippe Lagadec
1 parent e6148632

olevba: moved main functions to a class VBA_Parser_CLI, fixed issue when analysis was done twice

Showing 1 changed file with 209 additions and 180 deletions
oletools/olevba.py
... ... @@ -146,6 +146,8 @@ https://github.com/unixfreak0037/officeparser
146 146 # 2015-07-12 PL: - added Hex function decoding to VBA Parser
147 147 # 2015-07-13 PL: - added Base64 function decoding to VBA Parser
148 148 # 2015-09-06 PL: - improved VBA_Parser, refactored the main functions
  149 +# 2015-09-13 PL: - moved main functions to a class VBA_Parser_CLI
  150 +# - fixed issue when analysis was done twice
149 151  
150 152 __version__ = '0.33'
151 153  
... ... @@ -234,6 +236,15 @@ TYPE_OpenXML = 'OpenXML'
234 236 TYPE_Word2003_XML = 'Word2003_XML'
235 237 TYPE_MHTML = 'MHTML'
236 238  
  239 +# short tag to display file types in triage mode:
  240 +TYPE2TAG = {
  241 + TYPE_OLE: 'OLE:',
  242 + TYPE_OpenXML: 'OpX:',
  243 + TYPE_Word2003_XML: 'XML:',
  244 + TYPE_MHTML: 'MHT:',
  245 +}
  246 +
  247 +
237 248 # MSO files ActiveMime header magic
238 249 MSO_ACTIVEMIME_HEADER = 'ActiveMime'
239 250  
... ... @@ -1619,7 +1630,7 @@ class VBA_Parser(object):
1619 1630 - PowerPoint 2007+ (.pptm, .ppsm)
1620 1631 """
1621 1632  
1622   - def __init__(self, filename, data=None):
  1633 + def __init__(self, filename, data=None, container=None):
1623 1634 """
1624 1635 Constructor for VBA_Parser
1625 1636  
... ... @@ -1628,6 +1639,9 @@ class VBA_Parser(object):
1628 1639 :param data: None or bytes str, if None the file will be read from disk (or from the file-like object).
1629 1640 If data is provided as a bytes string, it will be parsed as the content of the file in memory,
1630 1641 and not read from disk. Note: files must be read in binary mode, i.e. open(f, 'rb').
  1642 +
  1643 + :param container: str, path and filename of container if the file is within
  1644 + a zip archive, None otherwise.
1631 1645 """
1632 1646 #TODO: filename should only be a string, data should be used for the file-like object
1633 1647 #TODO: filename should be mandatory, optional data is a string or file-like object
... ... @@ -1642,6 +1656,7 @@ class VBA_Parser(object):
1642 1656 self.ole_file = None
1643 1657 self.ole_subfiles = []
1644 1658 self.filename = filename
  1659 + self.container = container
1645 1660 self.type = None
1646 1661 self.vba_projects = None
1647 1662 self.contains_macros = None # will be set to True or False by detect_macros
... ... @@ -1951,6 +1966,9 @@ class VBA_Parser(object):
1951 1966 found in the file.
1952 1967 """
1953 1968 if self.detect_vba_macros():
  1969 + # if the analysis was already done, avoid doing it twice:
  1970 + if self.analysis_results is not None:
  1971 + return self.analysis_results
1954 1972 # variable to merge source code from all modules:
1955 1973 if self.vba_code_all_modules is None:
1956 1974 self.vba_code_all_modules = ''
... ... @@ -1987,187 +2005,197 @@ class VBA_Parser(object):
1987 2005 self.ole_file.close()
1988 2006  
1989 2007  
1990   -def print_analysis(vba_parser, show_decoded_strings=False):
1991   - """
1992   - Analyze the provided VBA code, and print the results in a table
1993 2008  
1994   - :param vba_code: str, VBA source code to be analyzed
1995   - :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
1996   - :return: None
  2009 +class VBA_Parser_CLI(VBA_Parser):
1997 2010 """
1998   - # print a waiting message only if the output is not redirected to a file:
1999   - if sys.stdout.isatty():
2000   - print 'Analysis...\r',
2001   - sys.stdout.flush()
2002   - results = vba_parser.analyze_macros(show_decoded_strings)
2003   - if results:
2004   - t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
2005   - t.align = 'l'
2006   - t.max_width['Type'] = 10
2007   - t.max_width['Keyword'] = 20
2008   - t.max_width['Description'] = 39
2009   - for kw_type, keyword, description in results:
2010   - # handle non printable strings:
2011   - if not is_printable(keyword):
2012   - keyword = repr(keyword)
2013   - if not is_printable(description):
2014   - description = repr(description)
2015   - t.add_row((kw_type, keyword, description))
2016   - print t
2017   - else:
2018   - print 'No suspicious keyword or IOC found.'
  2011 + VBA parser and analyzer, adding methods for the command line interface
  2012 + of olevba. (see VBA_Parser)
  2013 + """
  2014 +
  2015 + def __init__(self, filename, data=None, container=None):
  2016 + """
  2017 + Constructor for VBA_Parser_CLI.
  2018 + Calls __init__ from VBA_Parser, but handles the TypeError exception
  2019 + when the file type is not supported.
2019 2020  
  2021 + :param filename: filename or path of file to parse, or file-like object
2020 2022  
2021   -def process_file(container, filename, data, show_decoded_strings=False,
2022   - display_code=True, global_analysis=True, hide_attributes=True,
2023   - vba_code_only=False):
2024   - """
2025   - Process a single file
2026   -
2027   - :param container: str, path and filename of container if the file is within
2028   - a zip archive, None otherwise.
2029   - :param filename: str, path and filename of file on disk, or within the container.
2030   - :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
2031   - :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
2032   - :param display_code: bool, if False VBA source code is not displayed (default True)
2033   - :param global_analysis: bool, if True all modules are merged for a single analysis (default),
2034   - otherwise each module is analyzed separately (old behaviour)
2035   - :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
2036   - """
2037   - #TODO: replace print by writing to a provided output file (sys.stdout by default)
2038   - # fix conflicting parameters:
2039   - if vba_code_only and not display_code:
2040   - display_code = True
2041   - if container:
2042   - display_filename = '%s in %s' % (filename, container)
2043   - else:
2044   - display_filename = filename
2045   - print '=' * 79
2046   - print 'FILE:', display_filename
2047   - try:
2048   - #TODO: handle olefile errors, when an OLE file is malformed
2049   - vba = VBA_Parser(filename, data)
2050   - print 'Type:', vba.type
2051   - if vba.detect_vba_macros():
2052   - #print 'Contains VBA Macros:'
2053   - for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_all_macros():
2054   - if hide_attributes:
2055   - # hide attribute lines:
2056   - vba_code_filtered = filter_vba(vba_code)
2057   - else:
2058   - vba_code_filtered = vba_code
2059   - print '-' * 79
2060   - print 'VBA MACRO %s ' % vba_filename
2061   - print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
2062   - if display_code:
2063   - print '- ' * 39
2064   - # detect empty macros:
2065   - if vba_code_filtered.strip() == '':
2066   - print '(empty macro)'
2067   - else:
2068   - print vba_code_filtered
2069   - if not global_analysis and not vba_code_only:
2070   - #TODO: remove this option
2071   - raise NotImplementedError
2072   - print '- ' * 39
2073   - print 'ANALYSIS:'
2074   - # analyse each module's code, filtered to avoid false positives:
2075   - print_analysis(vba, show_decoded_strings)
2076   - if global_analysis and not vba_code_only:
2077   - # analyse the code from all modules at once:
2078   - print_analysis(vba, show_decoded_strings)
  2023 + :param data: None or bytes str, if None the file will be read from disk (or from the file-like object).
  2024 + If data is provided as a bytes string, it will be parsed as the content of the file in memory,
  2025 + and not read from disk. Note: files must be read in binary mode, i.e. open(f, 'rb').
  2026 +
  2027 + :param container: str, path and filename of container if the file is within
  2028 + a zip archive, None otherwise.
  2029 + """
  2030 + try:
  2031 + VBA_Parser.__init__(self, filename, data=data, container=container)
  2032 + except TypeError:
  2033 + # in that case, self.type=None
  2034 + pass
  2035 +
  2036 +
  2037 + def print_analysis(self, show_decoded_strings=False):
  2038 + """
  2039 + Analyze the provided VBA code, and print the results in a table
  2040 +
  2041 + :param vba_code: str, VBA source code to be analyzed
  2042 + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
  2043 + :return: None
  2044 + """
  2045 + # print a waiting message only if the output is not redirected to a file:
  2046 + if sys.stdout.isatty():
  2047 + print 'Analysis...\r',
  2048 + sys.stdout.flush()
  2049 + results = self.analyze_macros(show_decoded_strings)
  2050 + if results:
  2051 + t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
  2052 + t.align = 'l'
  2053 + t.max_width['Type'] = 10
  2054 + t.max_width['Keyword'] = 20
  2055 + t.max_width['Description'] = 39
  2056 + for kw_type, keyword, description in results:
  2057 + # handle non printable strings:
  2058 + if not is_printable(keyword):
  2059 + keyword = repr(keyword)
  2060 + if not is_printable(description):
  2061 + description = repr(description)
  2062 + t.add_row((kw_type, keyword, description))
  2063 + print t
2079 2064 else:
2080   - print 'No VBA macros found.'
2081   - except: #TypeError:
2082   - #raise
2083   - #TODO: print more info if debug mode
2084   - #print sys.exc_value
2085   - # display the exception with full stack trace for debugging, but do not stop:
2086   - traceback.print_exc()
2087   - print ''
  2065 + print 'No suspicious keyword or IOC found.'
2088 2066  
2089   -# short tag to display file types in triage mode:
2090   -TYPE2TAG = {
2091   - TYPE_OLE: 'OLE:',
2092   - TYPE_OpenXML: 'OpX:',
2093   - TYPE_Word2003_XML: 'XML:',
2094   - TYPE_MHTML: 'MHT:',
2095   -}
2096 2067  
2097   -def process_file_triage(container, filename, data):
2098   - """
2099   - Process a single file
  2068 + def process_file(self, show_decoded_strings=False,
  2069 + display_code=True, global_analysis=True, hide_attributes=True,
  2070 + vba_code_only=False):
  2071 + """
  2072 + Process a single file
  2073 +
  2074 + :param filename: str, path and filename of file on disk, or within the container.
  2075 + :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
  2076 + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
  2077 + :param display_code: bool, if False VBA source code is not displayed (default True)
  2078 + :param global_analysis: bool, if True all modules are merged for a single analysis (default),
  2079 + otherwise each module is analyzed separately (old behaviour)
  2080 + :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
  2081 + """
  2082 + #TODO: replace print by writing to a provided output file (sys.stdout by default)
  2083 + # fix conflicting parameters:
  2084 + if vba_code_only and not display_code:
  2085 + display_code = True
  2086 + if self.container:
  2087 + display_filename = '%s in %s' % (self.filename, self.container)
  2088 + else:
  2089 + display_filename = self.filename
  2090 + print '=' * 79
  2091 + print 'FILE:', display_filename
  2092 + try:
  2093 + #TODO: handle olefile errors, when an OLE file is malformed
  2094 + print 'Type:', self.type
  2095 + if self.detect_vba_macros():
  2096 + #print 'Contains VBA Macros:'
  2097 + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
  2098 + if hide_attributes:
  2099 + # hide attribute lines:
  2100 + vba_code_filtered = filter_vba(vba_code)
  2101 + else:
  2102 + vba_code_filtered = vba_code
  2103 + print '-' * 79
  2104 + print 'VBA MACRO %s ' % vba_filename
  2105 + print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
  2106 + if display_code:
  2107 + print '- ' * 39
  2108 + # detect empty macros:
  2109 + if vba_code_filtered.strip() == '':
  2110 + print '(empty macro)'
  2111 + else:
  2112 + print vba_code_filtered
  2113 + if not global_analysis and not vba_code_only:
  2114 + #TODO: remove this option
  2115 + raise NotImplementedError
  2116 + print '- ' * 39
  2117 + print 'ANALYSIS:'
  2118 + # analyse each module's code, filtered to avoid false positives:
  2119 + self.print_analysis(show_decoded_strings)
  2120 + if global_analysis and not vba_code_only:
  2121 + # analyse the code from all modules at once:
  2122 + self.print_analysis(show_decoded_strings)
  2123 + else:
  2124 + print 'No VBA macros found.'
  2125 + except: #TypeError:
  2126 + #raise
  2127 + #TODO: print more info if debug mode
  2128 + #print sys.exc_value
  2129 + # display the exception with full stack trace for debugging, but do not stop:
  2130 + traceback.print_exc()
  2131 + print ''
2100 2132  
2101   - :param container: str, path and filename of container if the file is within
2102   - a zip archive, None otherwise.
2103   - :param filename: str, path and filename of file on disk, or within the container.
2104   - :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
2105   - """
2106   - #TODO: replace print by writing to a provided output file (sys.stdout by default)
2107   - # ftype = 'Other'
2108   - message = ''
2109   - try:
2110   - #TODO: handle olefile errors, when an OLE file is malformed
2111   - vba = VBA_Parser(filename, data)
2112   - if vba.detect_vba_macros():
2113   - # print a waiting message only if the output is not redirected to a file:
2114   - if sys.stdout.isatty():
2115   - print 'Analysis...\r',
2116   - sys.stdout.flush()
2117   - vba.analyze_macros()
2118   - flags = TYPE2TAG[vba.type]
2119   - macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
2120   - if vba.nb_macros: macros = 'M'
2121   - if vba.nb_autoexec: autoexec = 'A'
2122   - if vba.nb_suspicious: suspicious = 'S'
2123   - if vba.nb_iocs: iocs = 'I'
2124   - if vba.nb_hexstrings: hexstrings = 'H'
2125   - if vba.nb_base64strings: base64obf = 'B'
2126   - if vba.nb_dridexstrings: dridex = 'D'
2127   - if vba.nb_vbastrings: vba_obf = 'V'
2128   - flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
2129   - base64obf, dridex, vba_obf)
2130   - # old table display:
2131   - # macros = autoexec = suspicious = iocs = hexstrings = 'no'
2132   - # if nb_macros: macros = 'YES:%d' % nb_macros
2133   - # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
2134   - # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
2135   - # if nb_iocs: iocs = 'YES:%d' % nb_iocs
2136   - # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
2137   - # # 2nd line = info
2138   - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (vba.type, macros, autoexec, suspicious, iocs, hexstrings)
2139   - except TypeError:
2140   - # file type not OLE nor OpenXML
2141   - flags = '?'
2142   - message = 'File format not supported'
2143   - except:
2144   - # another error occurred
2145   - #raise
2146   - #TODO: print more info if debug mode
2147   - #TODO: distinguish real errors from incorrect file types
2148   - flags = '!ERROR'
2149   - message = sys.exc_value
2150   - line = '%-12s %s' % (flags, filename)
2151   - if message:
2152   - line += ' - %s' % message
2153   - print line
2154   -
2155   - # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
2156   - # header=False, border=False)
2157   - # t.align = 'l'
2158   - # t.max_width['filename'] = 30
2159   - # t.max_width['type'] = 10
2160   - # t.max_width['macros'] = 6
2161   - # t.max_width['autoexec'] = 6
2162   - # t.max_width['suspicious'] = 6
2163   - # t.max_width['ioc'] = 6
2164   - # t.max_width['hexstrings'] = 6
2165   - # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))
2166   - # print t
2167   -
2168   -
2169   -def main_triage_quick():
2170   - pass
  2133 +
  2134 + def process_file_triage(self):
  2135 + """
  2136 + Process a file in triage mode, showing only summary results on one line.
  2137 + """
  2138 + #TODO: replace print by writing to a provided output file (sys.stdout by default)
  2139 + message = ''
  2140 + try:
  2141 + if self.type is not None:
  2142 + #TODO: handle olefile errors, when an OLE file is malformed
  2143 + if self.detect_vba_macros():
  2144 + # print a waiting message only if the output is not redirected to a file:
  2145 + if sys.stdout.isatty():
  2146 + print 'Analysis...\r',
  2147 + sys.stdout.flush()
  2148 + self.analyze_macros()
  2149 + flags = TYPE2TAG[self.type]
  2150 + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
  2151 + if self.nb_macros: macros = 'M'
  2152 + if self.nb_autoexec: autoexec = 'A'
  2153 + if self.nb_suspicious: suspicious = 'S'
  2154 + if self.nb_iocs: iocs = 'I'
  2155 + if self.nb_hexstrings: hexstrings = 'H'
  2156 + if self.nb_base64strings: base64obf = 'B'
  2157 + if self.nb_dridexstrings: dridex = 'D'
  2158 + if self.nb_vbastrings: vba_obf = 'V'
  2159 + flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
  2160 + base64obf, dridex, vba_obf)
  2161 + # old table display:
  2162 + # macros = autoexec = suspicious = iocs = hexstrings = 'no'
  2163 + # if nb_macros: macros = 'YES:%d' % nb_macros
  2164 + # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
  2165 + # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
  2166 + # if nb_iocs: iocs = 'YES:%d' % nb_iocs
  2167 + # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
  2168 + # # 2nd line = info
  2169 + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings)
  2170 + else:
  2171 + # self.type==None
  2172 + # file type not OLE nor OpenXML
  2173 + flags = '?'
  2174 + message = 'File format not supported'
  2175 + except:
  2176 + # another error occurred
  2177 + #raise
  2178 + #TODO: print more info if debug mode
  2179 + #TODO: distinguish real errors from incorrect file types
  2180 + flags = '!ERROR'
  2181 + message = sys.exc_value
  2182 + line = '%-12s %s' % (flags, self.filename)
  2183 + if message:
  2184 + line += ' - %s' % message
  2185 + print line
  2186 +
  2187 + # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
  2188 + # header=False, border=False)
  2189 + # t.align = 'l'
  2190 + # t.max_width['filename'] = 30
  2191 + # t.max_width['type'] = 10
  2192 + # t.max_width['macros'] = 6
  2193 + # t.max_width['autoexec'] = 6
  2194 + # t.max_width['suspicious'] = 6
  2195 + # t.max_width['ioc'] = 6
  2196 + # t.max_width['hexstrings'] = 6
  2197 + # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))
  2198 + # print t
2171 2199  
2172 2200  
2173 2201 #=== MAIN =====================================================================
... ... @@ -2244,14 +2272,17 @@ def main():
2244 2272 previous_container = None
2245 2273 count = 0
2246 2274 container = filename = data = None
  2275 + vba_parser = None
2247 2276 for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
2248 2277 zip_password=options.zip_password, zip_fname=options.zip_fname):
2249 2278 # ignore directory names stored in zip files:
2250 2279 if container and filename.endswith('/'):
2251 2280 continue
  2281 + # Open the file
  2282 + vba_parser = VBA_Parser_CLI(filename, data=data, container=container)
2252 2283 if options.detailed_mode and not options.triage_mode:
2253 2284 # fully detailed output
2254   - process_file(container, filename, data, show_decoded_strings=options.show_decoded_strings,
  2285 + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
2255 2286 display_code=options.display_code, global_analysis=options.global_analysis,
2256 2287 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only)
2257 2288 else:
... ... @@ -2261,7 +2292,7 @@ def main():
2261 2292 print '\nFiles in %s:' % container
2262 2293 previous_container = container
2263 2294 # summarized output for triage:
2264   - process_file_triage(container, filename, data)
  2295 + vba_parser.process_file_triage()
2265 2296 count += 1
2266 2297 if not options.detailed_mode or options.triage_mode:
2267 2298 print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, M=Macros, ' \
... ... @@ -2270,9 +2301,7 @@ def main():
2270 2301  
2271 2302 if count == 1 and not options.triage_mode and not options.detailed_mode:
2272 2303 # if options -t and -d were not specified and it's a single file, print details:
2273   - #TODO: avoid doing the analysis twice by storing results
2274   - #TODO: all the cli functions should be methods of a class VBA_Parser_CLI
2275   - process_file(container, filename, data, show_decoded_strings=options.show_decoded_strings,
  2304 + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
2276 2305 display_code=options.display_code, global_analysis=options.global_analysis,
2277 2306 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only)
2278 2307  
... ... @@ -2280,4 +2309,4 @@ def main():
2280 2309 if __name__ == '__main__':
2281 2310 main()
2282 2311  
2283   - # This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
2284 2312 \ No newline at end of file
  2313 +# This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
2285 2314 \ No newline at end of file
... ...