Commit 28aa9a84cf42e1613b269c6d1caaff60eaa309f2

Authored by Philippe Lagadec
1 parent d7122049

olevba: added support for text files containing VBA source code

Showing 1 changed file with 48 additions and 13 deletions
oletools/olevba.py
... ... @@ -154,6 +154,7 @@ https://github.com/unixfreak0037/officeparser
154 154 # 2015-09-22 v0.41 PL: - added new option --reveal
155 155 # - added suspicious strings for PowerShell.exe options
156 156 # 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method
  157 +# 2015-10-10 PL: - added support for text files with VBA source code
157 158  
158 159 __version__ = '0.42'
159 160  
... ... @@ -241,6 +242,7 @@ TYPE_OLE = 'OLE'
241 242 TYPE_OpenXML = 'OpenXML'
242 243 TYPE_Word2003_XML = 'Word2003_XML'
243 244 TYPE_MHTML = 'MHTML'
  245 +TYPE_TEXT = 'Text'
244 246  
245 247 # short tag to display file types in triage mode:
246 248 TYPE2TAG = {
... ... @@ -248,6 +250,7 @@ TYPE2TAG = {
248 250 TYPE_OpenXML: 'OpX:',
249 251 TYPE_Word2003_XML: 'XML:',
250 252 TYPE_MHTML: 'MHT:',
  253 + TYPE_TEXT: 'TXT:',
251 254 }
252 255  
253 256  
... ... @@ -1721,31 +1724,36 @@ class VBA_Parser(object):
1721 1724 if olefile.isOleFile(_file):
1722 1725 # This looks like an OLE file
1723 1726 self.open_ole(_file)
1724   - elif zipfile.is_zipfile(_file):
  1727 + if self.type is None and zipfile.is_zipfile(_file):
1725 1728 # Zip file, which may be an OpenXML document
1726 1729 self.open_openxml(_file)
1727   - else:
  1730 + if self.type is None:
1728 1731 # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML,
1729 1732 # or a plain text file containing VBA code
1730 1733 if data is None:
1731 1734 data = open(filename, 'rb').read()
1732   - # store a lowercase version for some tests:
1733   - data_lowercase = data.lower()
1734 1735 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
1735 1736 if 'http://schemas.microsoft.com/office/word/2003/wordml' in data:
1736 1737 self.open_word2003xml(data)
  1738 + # store a lowercase version for the next tests:
  1739 + data_lowercase = data.lower()
1737 1740 # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"):
1738 1741 # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line
1739 1742 # BUT Word accepts a blank line or other MIME headers inserted before,
1740 1743 # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored.
1741 1744 # And the line is case insensitive.
1742 1745 # so we'll just check the presence of mime, version and multipart anywhere:
1743   - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase and 'multipart' in data_lowercase:
  1746 + if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \
  1747 + and 'multipart' in data_lowercase:
1744 1748 self.open_mht(data)
1745 1749 #TODO: handle exceptions
1746 1750 #TODO: Excel 2003 XML
1747   - #TODO: plain text VBA file
  1751 + # Check if this is a plain text VBA or VBScript file:
  1752 + # To avoid scanning binary files, we simply check for some control chars:
  1753 + if self.type is None and '\x00' not in data:
  1754 + self.open_text(data)
1748 1755 if self.type is None:
  1756 + # At this stage, could not match a known format:
1749 1757 msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename
1750 1758 logging.error(msg)
1751 1759 raise TypeError(msg)
... ... @@ -1885,6 +1893,25 @@ class VBA_Parser(object):
1885 1893 pass
1886 1894  
1887 1895  
  1896 + def open_text(self, data):
  1897 + """
  1898 + Open a text file containing VBA or VBScript source code
  1899 + :param data: file contents in a string or bytes
  1900 + :return: nothing
  1901 + """
  1902 + logging.info('Opening text file %s' % self.filename)
  1903 + try:
  1904 + # directly store the source code:
  1905 + self.vba_code_all_modules = data
  1906 + self.contains_macros = True
  1907 + # set type only if parsing succeeds
  1908 + self.type = TYPE_TEXT
  1909 + except:
  1910 + logging.exception('Failed text parsing for file %r - %s'
  1911 + % (self.filename, MSG_OLEVBA_ISSUES))
  1912 + pass
  1913 +
  1914 +
1888 1915 def find_vba_projects(self):
1889 1916 """
1890 1917 Finds all the VBA projects stored in an OLE file.
... ... @@ -2012,10 +2039,17 @@ class VBA_Parser(object):
2012 2039 within the zip archive, e.g. word/vbaProject.bin.
2013 2040 """
2014 2041 if self.ole_file is None:
2015   - for ole_subfile in self.ole_subfiles:
2016   - for results in ole_subfile.extract_macros():
2017   - yield results
  2042 + # This may be either an OpenXML or a text file:
  2043 + if self.type == TYPE_TEXT:
  2044 + # This is a text file, yield the full code:
  2045 + yield (self.filename, '', self.filename, self.vba_code_all_modules)
  2046 + else:
  2047 + # OpenXML: recursively yield results from each OLE subfile:
  2048 + for ole_subfile in self.ole_subfiles:
  2049 + for results in ole_subfile.extract_macros():
  2050 + yield results
2018 2051 else:
  2052 + # This is an OLE file:
2019 2053 self.find_vba_projects()
2020 2054 for vba_root, project_path, dir_path in self.vba_projects:
2021 2055 # extract all VBA macros from that VBA root storage:
... ... @@ -2079,8 +2113,9 @@ class VBA_Parser(object):
2079 2113 the application is opening many files.
2080 2114 """
2081 2115 if self.ole_file is None:
2082   - for ole_subfile in self.ole_subfiles:
2083   - ole_subfile.close()
  2116 + if self.ole_subfiles is not None:
  2117 + for ole_subfile in self.ole_subfiles:
  2118 + ole_subfile.close()
2084 2119 else:
2085 2120 self.ole_file.close()
2086 2121  
... ... @@ -2252,7 +2287,7 @@ class VBA_Parser_CLI(VBA_Parser):
2252 2287 self.analyze_macros()
2253 2288 flags = TYPE2TAG[self.type]
2254 2289 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
2255   - if self.nb_macros: macros = 'M'
  2290 + if self.contains_macros: macros = 'M'
2256 2291 if self.nb_autoexec: autoexec = 'A'
2257 2292 if self.nb_suspicious: suspicious = 'S'
2258 2293 if self.nb_iocs: iocs = 'I'
... ... @@ -2404,7 +2439,7 @@ def main():
2404 2439 vba_parser.process_file_triage()
2405 2440 count += 1
2406 2441 if not options.detailed_mode or options.triage_mode:
2407   - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, M=Macros, ' \
  2442 + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
2408 2443 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
2409 2444 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n'
2410 2445  
... ...