Commit 28aa9a84cf42e1613b269c6d1caaff60eaa309f2
1 parent
d7122049
olevba: added support for text files containing VBA source code
Showing
1 changed file
with
48 additions
and
13 deletions
oletools/olevba.py
| @@ -154,6 +154,7 @@ https://github.com/unixfreak0037/officeparser | @@ -154,6 +154,7 @@ https://github.com/unixfreak0037/officeparser | ||
| 154 | # 2015-09-22 v0.41 PL: - added new option --reveal | 154 | # 2015-09-22 v0.41 PL: - added new option --reveal |
| 155 | # - added suspicious strings for PowerShell.exe options | 155 | # - added suspicious strings for PowerShell.exe options |
| 156 | # 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method | 156 | # 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method |
| 157 | +# 2015-10-10 PL: - added support for text files with VBA source code | ||
| 157 | 158 | ||
| 158 | __version__ = '0.42' | 159 | __version__ = '0.42' |
| 159 | 160 | ||
| @@ -241,6 +242,7 @@ TYPE_OLE = 'OLE' | @@ -241,6 +242,7 @@ TYPE_OLE = 'OLE' | ||
| 241 | TYPE_OpenXML = 'OpenXML' | 242 | TYPE_OpenXML = 'OpenXML' |
| 242 | TYPE_Word2003_XML = 'Word2003_XML' | 243 | TYPE_Word2003_XML = 'Word2003_XML' |
| 243 | TYPE_MHTML = 'MHTML' | 244 | TYPE_MHTML = 'MHTML' |
| 245 | +TYPE_TEXT = 'Text' | ||
| 244 | 246 | ||
| 245 | # short tag to display file types in triage mode: | 247 | # short tag to display file types in triage mode: |
| 246 | TYPE2TAG = { | 248 | TYPE2TAG = { |
| @@ -248,6 +250,7 @@ TYPE2TAG = { | @@ -248,6 +250,7 @@ TYPE2TAG = { | ||
| 248 | TYPE_OpenXML: 'OpX:', | 250 | TYPE_OpenXML: 'OpX:', |
| 249 | TYPE_Word2003_XML: 'XML:', | 251 | TYPE_Word2003_XML: 'XML:', |
| 250 | TYPE_MHTML: 'MHT:', | 252 | TYPE_MHTML: 'MHT:', |
| 253 | + TYPE_TEXT: 'TXT:', | ||
| 251 | } | 254 | } |
| 252 | 255 | ||
| 253 | 256 | ||
| @@ -1721,31 +1724,36 @@ class VBA_Parser(object): | @@ -1721,31 +1724,36 @@ class VBA_Parser(object): | ||
| 1721 | if olefile.isOleFile(_file): | 1724 | if olefile.isOleFile(_file): |
| 1722 | # This looks like an OLE file | 1725 | # This looks like an OLE file |
| 1723 | self.open_ole(_file) | 1726 | self.open_ole(_file) |
| 1724 | - elif zipfile.is_zipfile(_file): | 1727 | + if self.type is None and zipfile.is_zipfile(_file): |
| 1725 | # Zip file, which may be an OpenXML document | 1728 | # Zip file, which may be an OpenXML document |
| 1726 | self.open_openxml(_file) | 1729 | self.open_openxml(_file) |
| 1727 | - else: | 1730 | + if self.type is None: |
| 1728 | # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML, | 1731 | # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML, |
| 1729 | # or a plain text file containing VBA code | 1732 | # or a plain text file containing VBA code |
| 1730 | if data is None: | 1733 | if data is None: |
| 1731 | data = open(filename, 'rb').read() | 1734 | data = open(filename, 'rb').read() |
| 1732 | - # store a lowercase version for some tests: | ||
| 1733 | - data_lowercase = data.lower() | ||
| 1734 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | 1735 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 1735 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | 1736 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 1736 | self.open_word2003xml(data) | 1737 | self.open_word2003xml(data) |
| 1738 | + # store a lowercase version for the next tests: | ||
| 1739 | + data_lowercase = data.lower() | ||
| 1737 | # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): | 1740 | # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): |
| 1738 | # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line | 1741 | # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line |
| 1739 | # BUT Word accepts a blank line or other MIME headers inserted before, | 1742 | # BUT Word accepts a blank line or other MIME headers inserted before, |
| 1740 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. | 1743 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. |
| 1741 | # And the line is case insensitive. | 1744 | # And the line is case insensitive. |
| 1742 | # so we'll just check the presence of mime, version and multipart anywhere: | 1745 | # so we'll just check the presence of mime, version and multipart anywhere: |
| 1743 | - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase and 'multipart' in data_lowercase: | 1746 | + if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \ |
| 1747 | + and 'multipart' in data_lowercase: | ||
| 1744 | self.open_mht(data) | 1748 | self.open_mht(data) |
| 1745 | #TODO: handle exceptions | 1749 | #TODO: handle exceptions |
| 1746 | #TODO: Excel 2003 XML | 1750 | #TODO: Excel 2003 XML |
| 1747 | - #TODO: plain text VBA file | 1751 | + # Check if this is a plain text VBA or VBScript file: |
| 1752 | + # To avoid scanning binary files, we simply check for some control chars: | ||
| 1753 | + if self.type is None and '\x00' not in data: | ||
| 1754 | + self.open_text(data) | ||
| 1748 | if self.type is None: | 1755 | if self.type is None: |
| 1756 | + # At this stage, could not match a known format: | ||
| 1749 | msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename | 1757 | msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename |
| 1750 | logging.error(msg) | 1758 | logging.error(msg) |
| 1751 | raise TypeError(msg) | 1759 | raise TypeError(msg) |
| @@ -1885,6 +1893,25 @@ class VBA_Parser(object): | @@ -1885,6 +1893,25 @@ class VBA_Parser(object): | ||
| 1885 | pass | 1893 | pass |
| 1886 | 1894 | ||
| 1887 | 1895 | ||
| 1896 | + def open_text(self, data): | ||
| 1897 | + """ | ||
| 1898 | + Open a text file containing VBA or VBScript source code | ||
| 1899 | + :param data: file contents in a string or bytes | ||
| 1900 | + :return: nothing | ||
| 1901 | + """ | ||
| 1902 | + logging.info('Opening text file %s' % self.filename) | ||
| 1903 | + try: | ||
| 1904 | + # directly store the source code: | ||
| 1905 | + self.vba_code_all_modules = data | ||
| 1906 | + self.contains_macros = True | ||
| 1907 | + # set type only if parsing succeeds | ||
| 1908 | + self.type = TYPE_TEXT | ||
| 1909 | + except: | ||
| 1910 | + logging.exception('Failed text parsing for file %r - %s' | ||
| 1911 | + % (self.filename, MSG_OLEVBA_ISSUES)) | ||
| 1912 | + pass | ||
| 1913 | + | ||
| 1914 | + | ||
| 1888 | def find_vba_projects(self): | 1915 | def find_vba_projects(self): |
| 1889 | """ | 1916 | """ |
| 1890 | Finds all the VBA projects stored in an OLE file. | 1917 | Finds all the VBA projects stored in an OLE file. |
| @@ -2012,10 +2039,17 @@ class VBA_Parser(object): | @@ -2012,10 +2039,17 @@ class VBA_Parser(object): | ||
| 2012 | within the zip archive, e.g. word/vbaProject.bin. | 2039 | within the zip archive, e.g. word/vbaProject.bin. |
| 2013 | """ | 2040 | """ |
| 2014 | if self.ole_file is None: | 2041 | if self.ole_file is None: |
| 2015 | - for ole_subfile in self.ole_subfiles: | ||
| 2016 | - for results in ole_subfile.extract_macros(): | ||
| 2017 | - yield results | 2042 | + # This may be either an OpenXML or a text file: |
| 2043 | + if self.type == TYPE_TEXT: | ||
| 2044 | + # This is a text file, yield the full code: | ||
| 2045 | + yield (self.filename, '', self.filename, self.vba_code_all_modules) | ||
| 2046 | + else: | ||
| 2047 | + # OpenXML: recursively yield results from each OLE subfile: | ||
| 2048 | + for ole_subfile in self.ole_subfiles: | ||
| 2049 | + for results in ole_subfile.extract_macros(): | ||
| 2050 | + yield results | ||
| 2018 | else: | 2051 | else: |
| 2052 | + # This is an OLE file: | ||
| 2019 | self.find_vba_projects() | 2053 | self.find_vba_projects() |
| 2020 | for vba_root, project_path, dir_path in self.vba_projects: | 2054 | for vba_root, project_path, dir_path in self.vba_projects: |
| 2021 | # extract all VBA macros from that VBA root storage: | 2055 | # extract all VBA macros from that VBA root storage: |
| @@ -2079,8 +2113,9 @@ class VBA_Parser(object): | @@ -2079,8 +2113,9 @@ class VBA_Parser(object): | ||
| 2079 | the application is opening many files. | 2113 | the application is opening many files. |
| 2080 | """ | 2114 | """ |
| 2081 | if self.ole_file is None: | 2115 | if self.ole_file is None: |
| 2082 | - for ole_subfile in self.ole_subfiles: | ||
| 2083 | - ole_subfile.close() | 2116 | + if self.ole_subfiles is not None: |
| 2117 | + for ole_subfile in self.ole_subfiles: | ||
| 2118 | + ole_subfile.close() | ||
| 2084 | else: | 2119 | else: |
| 2085 | self.ole_file.close() | 2120 | self.ole_file.close() |
| 2086 | 2121 | ||
| @@ -2252,7 +2287,7 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -2252,7 +2287,7 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 2252 | self.analyze_macros() | 2287 | self.analyze_macros() |
| 2253 | flags = TYPE2TAG[self.type] | 2288 | flags = TYPE2TAG[self.type] |
| 2254 | macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' | 2289 | macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' |
| 2255 | - if self.nb_macros: macros = 'M' | 2290 | + if self.contains_macros: macros = 'M' |
| 2256 | if self.nb_autoexec: autoexec = 'A' | 2291 | if self.nb_autoexec: autoexec = 'A' |
| 2257 | if self.nb_suspicious: suspicious = 'S' | 2292 | if self.nb_suspicious: suspicious = 'S' |
| 2258 | if self.nb_iocs: iocs = 'I' | 2293 | if self.nb_iocs: iocs = 'I' |
| @@ -2404,7 +2439,7 @@ def main(): | @@ -2404,7 +2439,7 @@ def main(): | ||
| 2404 | vba_parser.process_file_triage() | 2439 | vba_parser.process_file_triage() |
| 2405 | count += 1 | 2440 | count += 1 |
| 2406 | if not options.detailed_mode or options.triage_mode: | 2441 | if not options.detailed_mode or options.triage_mode: |
| 2407 | - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, M=Macros, ' \ | 2442 | + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ |
| 2408 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ | 2443 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ |
| 2409 | 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' | 2444 | 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' |
| 2410 | 2445 |