Commit 28aa9a84cf42e1613b269c6d1caaff60eaa309f2
1 parent
d7122049
olevba: added support for text files containing VBA source code
Showing
1 changed file
with
48 additions
and
13 deletions
oletools/olevba.py
| ... | ... | @@ -154,6 +154,7 @@ https://github.com/unixfreak0037/officeparser |
| 154 | 154 | # 2015-09-22 v0.41 PL: - added new option --reveal |
| 155 | 155 | # - added suspicious strings for PowerShell.exe options |
| 156 | 156 | # 2015-10-09 v0.42 PL: - VBA_Parser: split each format into a separate method |
| 157 | +# 2015-10-10 PL: - added support for text files with VBA source code | |
| 157 | 158 | |
| 158 | 159 | __version__ = '0.42' |
| 159 | 160 | |
| ... | ... | @@ -241,6 +242,7 @@ TYPE_OLE = 'OLE' |
| 241 | 242 | TYPE_OpenXML = 'OpenXML' |
| 242 | 243 | TYPE_Word2003_XML = 'Word2003_XML' |
| 243 | 244 | TYPE_MHTML = 'MHTML' |
| 245 | +TYPE_TEXT = 'Text' | |
| 244 | 246 | |
| 245 | 247 | # short tag to display file types in triage mode: |
| 246 | 248 | TYPE2TAG = { |
| ... | ... | @@ -248,6 +250,7 @@ TYPE2TAG = { |
| 248 | 250 | TYPE_OpenXML: 'OpX:', |
| 249 | 251 | TYPE_Word2003_XML: 'XML:', |
| 250 | 252 | TYPE_MHTML: 'MHT:', |
| 253 | + TYPE_TEXT: 'TXT:', | |
| 251 | 254 | } |
| 252 | 255 | |
| 253 | 256 | |
| ... | ... | @@ -1721,31 +1724,36 @@ class VBA_Parser(object): |
| 1721 | 1724 | if olefile.isOleFile(_file): |
| 1722 | 1725 | # This looks like an OLE file |
| 1723 | 1726 | self.open_ole(_file) |
| 1724 | - elif zipfile.is_zipfile(_file): | |
| 1727 | + if self.type is None and zipfile.is_zipfile(_file): | |
| 1725 | 1728 | # Zip file, which may be an OpenXML document |
| 1726 | 1729 | self.open_openxml(_file) |
| 1727 | - else: | |
| 1730 | + if self.type is None: | |
| 1728 | 1731 | # read file from disk, check if it is a Word 2003 XML file (WordProcessingML), Excel 2003 XML, |
| 1729 | 1732 | # or a plain text file containing VBA code |
| 1730 | 1733 | if data is None: |
| 1731 | 1734 | data = open(filename, 'rb').read() |
| 1732 | - # store a lowercase version for some tests: | |
| 1733 | - data_lowercase = data.lower() | |
| 1734 | 1735 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 1735 | 1736 | if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 1736 | 1737 | self.open_word2003xml(data) |
| 1738 | + # store a lowercase version for the next tests: | |
| 1739 | + data_lowercase = data.lower() | |
| 1737 | 1740 | # check if it is a MHT file (MIME HTML, Word or Excel saved as "Single File Web Page"): |
| 1738 | 1741 | # According to my tests, these files usually start with "MIME-Version: 1.0" on the 1st line |
| 1739 | 1742 | # BUT Word accepts a blank line or other MIME headers inserted before, |
| 1740 | 1743 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. |
| 1741 | 1744 | # And the line is case insensitive. |
| 1742 | 1745 | # so we'll just check the presence of mime, version and multipart anywhere: |
| 1743 | - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase and 'multipart' in data_lowercase: | |
| 1746 | + if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \ | |
| 1747 | + and 'multipart' in data_lowercase: | |
| 1744 | 1748 | self.open_mht(data) |
| 1745 | 1749 | #TODO: handle exceptions |
| 1746 | 1750 | #TODO: Excel 2003 XML |
| 1747 | - #TODO: plain text VBA file | |
| 1751 | + # Check if this is a plain text VBA or VBScript file: | |
| 1752 | + # To avoid scanning binary files, we simply check for some control chars: | |
| 1753 | + if self.type is None and '\x00' not in data: | |
| 1754 | + self.open_text(data) | |
| 1748 | 1755 | if self.type is None: |
| 1756 | + # At this stage, could not match a known format: | |
| 1749 | 1757 | msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename |
| 1750 | 1758 | logging.error(msg) |
| 1751 | 1759 | raise TypeError(msg) |
| ... | ... | @@ -1885,6 +1893,25 @@ class VBA_Parser(object): |
| 1885 | 1893 | pass |
| 1886 | 1894 | |
| 1887 | 1895 | |
| 1896 | + def open_text(self, data): | |
| 1897 | + """ | |
| 1898 | + Open a text file containing VBA or VBScript source code | |
| 1899 | + :param data: file contents in a string or bytes | |
| 1900 | + :return: nothing | |
| 1901 | + """ | |
| 1902 | + logging.info('Opening text file %s' % self.filename) | |
| 1903 | + try: | |
| 1904 | + # directly store the source code: | |
| 1905 | + self.vba_code_all_modules = data | |
| 1906 | + self.contains_macros = True | |
| 1907 | + # set type only if parsing succeeds | |
| 1908 | + self.type = TYPE_TEXT | |
| 1909 | + except: | |
| 1910 | + logging.exception('Failed text parsing for file %r - %s' | |
| 1911 | + % (self.filename, MSG_OLEVBA_ISSUES)) | |
| 1912 | + pass | |
| 1913 | + | |
| 1914 | + | |
| 1888 | 1915 | def find_vba_projects(self): |
| 1889 | 1916 | """ |
| 1890 | 1917 | Finds all the VBA projects stored in an OLE file. |
| ... | ... | @@ -2012,10 +2039,17 @@ class VBA_Parser(object): |
| 2012 | 2039 | within the zip archive, e.g. word/vbaProject.bin. |
| 2013 | 2040 | """ |
| 2014 | 2041 | if self.ole_file is None: |
| 2015 | - for ole_subfile in self.ole_subfiles: | |
| 2016 | - for results in ole_subfile.extract_macros(): | |
| 2017 | - yield results | |
| 2042 | + # This may be either an OpenXML or a text file: | |
| 2043 | + if self.type == TYPE_TEXT: | |
| 2044 | + # This is a text file, yield the full code: | |
| 2045 | + yield (self.filename, '', self.filename, self.vba_code_all_modules) | |
| 2046 | + else: | |
| 2047 | + # OpenXML: recursively yield results from each OLE subfile: | |
| 2048 | + for ole_subfile in self.ole_subfiles: | |
| 2049 | + for results in ole_subfile.extract_macros(): | |
| 2050 | + yield results | |
| 2018 | 2051 | else: |
| 2052 | + # This is an OLE file: | |
| 2019 | 2053 | self.find_vba_projects() |
| 2020 | 2054 | for vba_root, project_path, dir_path in self.vba_projects: |
| 2021 | 2055 | # extract all VBA macros from that VBA root storage: |
| ... | ... | @@ -2079,8 +2113,9 @@ class VBA_Parser(object): |
| 2079 | 2113 | the application is opening many files. |
| 2080 | 2114 | """ |
| 2081 | 2115 | if self.ole_file is None: |
| 2082 | - for ole_subfile in self.ole_subfiles: | |
| 2083 | - ole_subfile.close() | |
| 2116 | + if self.ole_subfiles is not None: | |
| 2117 | + for ole_subfile in self.ole_subfiles: | |
| 2118 | + ole_subfile.close() | |
| 2084 | 2119 | else: |
| 2085 | 2120 | self.ole_file.close() |
| 2086 | 2121 | |
| ... | ... | @@ -2252,7 +2287,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2252 | 2287 | self.analyze_macros() |
| 2253 | 2288 | flags = TYPE2TAG[self.type] |
| 2254 | 2289 | macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' |
| 2255 | - if self.nb_macros: macros = 'M' | |
| 2290 | + if self.contains_macros: macros = 'M' | |
| 2256 | 2291 | if self.nb_autoexec: autoexec = 'A' |
| 2257 | 2292 | if self.nb_suspicious: suspicious = 'S' |
| 2258 | 2293 | if self.nb_iocs: iocs = 'I' |
| ... | ... | @@ -2404,7 +2439,7 @@ def main(): |
| 2404 | 2439 | vba_parser.process_file_triage() |
| 2405 | 2440 | count += 1 |
| 2406 | 2441 | if not options.detailed_mode or options.triage_mode: |
| 2407 | - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, M=Macros, ' \ | |
| 2442 | + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 2408 | 2443 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ |
| 2409 | 2444 | 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' |
| 2410 | 2445 | ... | ... |