Commit 28aa9a84cf42e1613b269c6d1caaff60eaa309f2

Authored by Philippe Lagadec
1 parent d7122049

olevba: added support for text files containing VBA source code

Showing 1 changed file with 48 additions and 13 deletions
oletools/olevba.py
@@ -154,6 +154,7 @@ https://github.com/unixfreak0037/officeparser @@ -154,6 +154,7 @@ https://github.com/unixfreak0037/officeparser
154 # 2015-09-22 v0.41 PL: - added new option --reveal 154 # 2015-09-22 v0.41 PL: - added new option --reveal
155 # - added suspicious strings for PowerShell.exe options 155 # - added suspicious strings for PowerShell.exe options
156 # 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method 156 # 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method
  157 +# 2015-10-10 PL: - added support for text files with VBA source code
157 158
158 __version__ = '0.42' 159 __version__ = '0.42'
159 160
@@ -241,6 +242,7 @@ TYPE_OLE = 'OLE' @@ -241,6 +242,7 @@ TYPE_OLE = 'OLE'
241 TYPE_OpenXML = 'OpenXML' 242 TYPE_OpenXML = 'OpenXML'
242 TYPE_Word2003_XML = 'Word2003_XML' 243 TYPE_Word2003_XML = 'Word2003_XML'
243 TYPE_MHTML = 'MHTML' 244 TYPE_MHTML = 'MHTML'
  245 +TYPE_TEXT = 'Text'
244 246
245 # short tag to display file types in triage mode: 247 # short tag to display file types in triage mode:
246 TYPE2TAG = { 248 TYPE2TAG = {
@@ -248,6 +250,7 @@ TYPE2TAG = { @@ -248,6 +250,7 @@ TYPE2TAG = {
248 TYPE_OpenXML: 'OpX:', 250 TYPE_OpenXML: 'OpX:',
249 TYPE_Word2003_XML: 'XML:', 251 TYPE_Word2003_XML: 'XML:',
250 TYPE_MHTML: 'MHT:', 252 TYPE_MHTML: 'MHT:',
  253 + TYPE_TEXT: 'TXT:',
251 } 254 }
252 255
253 256
@@ -1721,31 +1724,36 @@ class VBA_Parser(object): @@ -1721,31 +1724,36 @@ class VBA_Parser(object):
1721 if olefile.isOleFile(_file): 1724 if olefile.isOleFile(_file):
1722 # This looks like an OLE file 1725 # This looks like an OLE file
1723 self.open_ole(_file) 1726 self.open_ole(_file)
1724 - elif zipfile.is_zipfile(_file): 1727 + if self.type is None and zipfile.is_zipfile(_file):
1725 # Zip file, which may be an OpenXML document 1728 # Zip file, which may be an OpenXML document
1726 self.open_openxml(_file) 1729 self.open_openxml(_file)
1727 - else: 1730 + if self.type is None:
1728 # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML, 1731 # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML,
1729 # or a plain text file containing VBA code 1732 # or a plain text file containing VBA code
1730 if data is None: 1733 if data is None:
1731 data = open(filename, 'rb').read() 1734 data = open(filename, 'rb').read()
1732 - # store a lowercase version for some tests:  
1733 - data_lowercase = data.lower()  
1734 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace 1735 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
1735 if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: 1736 if 'http://schemas.microsoft.com/office/word/2003/wordml' in data:
1736 self.open_word2003xml(data) 1737 self.open_word2003xml(data)
  1738 + # store a lowercase version for the next tests:
  1739 + data_lowercase = data.lower()
1737 # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): 1740 # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"):
1738 # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line 1741 # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line
1739 # BUT Word accepts a blank line or other MIME headers inserted before, 1742 # BUT Word accepts a blank line or other MIME headers inserted before,
1740 # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. 1743 # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored.
1741 # And the line is case insensitive. 1744 # And the line is case insensitive.
1742 # so we'll just check the presence of mime, version and multipart anywhere: 1745 # so we'll just check the presence of mime, version and multipart anywhere:
1743 - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase and 'multipart' in data_lowercase: 1746 + if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \
  1747 + and 'multipart' in data_lowercase:
1744 self.open_mht(data) 1748 self.open_mht(data)
1745 #TODO: handle exceptions 1749 #TODO: handle exceptions
1746 #TODO: Excel 2003 XML 1750 #TODO: Excel 2003 XML
1747 - #TODO: plain text VBA file 1751 + # Check if this is a plain text VBA or VBScript file:
  1752 + # To avoid scanning binary files, we simply check for some control chars:
  1753 + if self.type is None and '\x00' not in data:
  1754 + self.open_text(data)
1748 if self.type is None: 1755 if self.type is None:
  1756 + # At this stage, could not match a known format:
1749 msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename 1757 msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename
1750 logging.error(msg) 1758 logging.error(msg)
1751 raise TypeError(msg) 1759 raise TypeError(msg)
@@ -1885,6 +1893,25 @@ class VBA_Parser(object): @@ -1885,6 +1893,25 @@ class VBA_Parser(object):
1885 pass 1893 pass
1886 1894
1887 1895
  1896 + def open_text(self, data):
  1897 + """
  1898 + Open a text file containing VBA or VBScript source code
  1899 + :param data: file contents in a string or bytes
  1900 + :return: nothing
  1901 + """
  1902 + logging.info('Opening text file %s' % self.filename)
  1903 + try:
  1904 + # directly store the source code:
  1905 + self.vba_code_all_modules = data
  1906 + self.contains_macros = True
  1907 + # set type only if parsing succeeds
  1908 + self.type = TYPE_TEXT
  1909 + except:
  1910 + logging.exception('Failed text parsing for file %r - %s'
  1911 + % (self.filename, MSG_OLEVBA_ISSUES))
  1912 + pass
  1913 +
  1914 +
1888 def find_vba_projects(self): 1915 def find_vba_projects(self):
1889 """ 1916 """
1890 Finds all the VBA projects stored in an OLE file. 1917 Finds all the VBA projects stored in an OLE file.
@@ -2012,10 +2039,17 @@ class VBA_Parser(object): @@ -2012,10 +2039,17 @@ class VBA_Parser(object):
2012 within the zip archive, e.g. word/vbaProject.bin. 2039 within the zip archive, e.g. word/vbaProject.bin.
2013 """ 2040 """
2014 if self.ole_file is None: 2041 if self.ole_file is None:
2015 - for ole_subfile in self.ole_subfiles:  
2016 - for results in ole_subfile.extract_macros():  
2017 - yield results 2042 + # This may be either an OpenXML or a text file:
  2043 + if self.type == TYPE_TEXT:
  2044 + # This is a text file, yield the full code:
  2045 + yield (self.filename, '', self.filename, self.vba_code_all_modules)
  2046 + else:
  2047 + # OpenXML: recursively yield results from each OLE subfile:
  2048 + for ole_subfile in self.ole_subfiles:
  2049 + for results in ole_subfile.extract_macros():
  2050 + yield results
2018 else: 2051 else:
  2052 + # This is an OLE file:
2019 self.find_vba_projects() 2053 self.find_vba_projects()
2020 for vba_root, project_path, dir_path in self.vba_projects: 2054 for vba_root, project_path, dir_path in self.vba_projects:
2021 # extract all VBA macros from that VBA root storage: 2055 # extract all VBA macros from that VBA root storage:
@@ -2079,8 +2113,9 @@ class VBA_Parser(object): @@ -2079,8 +2113,9 @@ class VBA_Parser(object):
2079 the application is opening many files. 2113 the application is opening many files.
2080 """ 2114 """
2081 if self.ole_file is None: 2115 if self.ole_file is None:
2082 - for ole_subfile in self.ole_subfiles:  
2083 - ole_subfile.close() 2116 + if self.ole_subfiles is not None:
  2117 + for ole_subfile in self.ole_subfiles:
  2118 + ole_subfile.close()
2084 else: 2119 else:
2085 self.ole_file.close() 2120 self.ole_file.close()
2086 2121
@@ -2252,7 +2287,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2252,7 +2287,7 @@ class VBA_Parser_CLI(VBA_Parser):
2252 self.analyze_macros() 2287 self.analyze_macros()
2253 flags = TYPE2TAG[self.type] 2288 flags = TYPE2TAG[self.type]
2254 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' 2289 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
2255 - if self.nb_macros: macros = 'M' 2290 + if self.contains_macros: macros = 'M'
2256 if self.nb_autoexec: autoexec = 'A' 2291 if self.nb_autoexec: autoexec = 'A'
2257 if self.nb_suspicious: suspicious = 'S' 2292 if self.nb_suspicious: suspicious = 'S'
2258 if self.nb_iocs: iocs = 'I' 2293 if self.nb_iocs: iocs = 'I'
@@ -2404,7 +2439,7 @@ def main(): @@ -2404,7 +2439,7 @@ def main():
2404 vba_parser.process_file_triage() 2439 vba_parser.process_file_triage()
2405 count += 1 2440 count += 1
2406 if not options.detailed_mode or options.triage_mode: 2441 if not options.detailed_mode or options.triage_mode:
2407 - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, M=Macros, ' \ 2442 + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
2408 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ 2443 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
2409 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' 2444 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n'
2410 2445