Commit 9f45875afa47440c5c5ece443171da2e617e5658
1 parent
85f94f92
olevba: added hex strings detection and decoding
Showing
1 changed file
with
29 additions
and
1 deletions
oletools/olevba.py
| @@ -95,8 +95,9 @@ https://github.com/unixfreak0037/officeparser | @@ -95,8 +95,9 @@ https://github.com/unixfreak0037/officeparser | ||
| 95 | # - process_file: improved display, shows container file | 95 | # - process_file: improved display, shows container file |
| 96 | # - improved list of executable file extensions | 96 | # - improved list of executable file extensions |
| 97 | # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display | 97 | # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display |
| 98 | +# 2015-01-08 v0.14 PL: - added hex strings detection and decoding | ||
| 98 | 99 | ||
| 99 | -__version__ = '0.13' | 100 | +__version__ = '0.14' |
| 100 | 101 | ||
| 101 | #------------------------------------------------------------------------------ | 102 | #------------------------------------------------------------------------------ |
| 102 | # TODO: | 103 | # TODO: |
| @@ -105,6 +106,7 @@ __version__ = '0.13' | @@ -105,6 +106,7 @@ __version__ = '0.13' | ||
| 105 | # + update readme, wiki and decalage.info, pypi (link to sample files) | 106 | # + update readme, wiki and decalage.info, pypi (link to sample files) |
| 106 | 107 | ||
| 107 | # TODO later: | 108 | # TODO later: |
| 109 | +# - append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords | ||
| 108 | # + performance improvement: instead of searching each keyword separately, | 110 | # + performance improvement: instead of searching each keyword separately, |
| 109 | # first split vba code into a list of words (per line), then check each | 111 | # first split vba code into a list of words (per line), then check each |
| 110 | # word against a dict. (or put vba words into a set/dict?) | 112 | # word against a dict. (or put vba words into a set/dict?) |
| @@ -138,6 +140,7 @@ import zipfile | @@ -138,6 +140,7 @@ import zipfile | ||
| 138 | import re | 140 | import re |
| 139 | import optparse | 141 | import optparse |
| 140 | import os.path | 142 | import os.path |
| 143 | +import binascii | ||
| 141 | 144 | ||
| 142 | import thirdparty.olefile as olefile | 145 | import thirdparty.olefile as olefile |
| 143 | from thirdparty.prettytable import prettytable | 146 | from thirdparty.prettytable import prettytable |
| @@ -236,8 +239,12 @@ RE_PATTERNS = ( | @@ -236,8 +239,12 @@ RE_PATTERNS = ( | ||
| 236 | ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")), | 239 | ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")), |
| 237 | # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ | 240 | # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ |
| 238 | #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types | 241 | #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types |
| 242 | + #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')), | ||
| 239 | ) | 243 | ) |
| 240 | 244 | ||
| 245 | +# regex to detect strings encoded in hexadecimal | ||
| 246 | +re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | ||
| 247 | + | ||
| 241 | 248 | ||
| 242 | #--- FUNCTIONS ---------------------------------------------------------------- | 249 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 243 | 250 | ||
| @@ -839,6 +846,24 @@ def detect_patterns(vba_code): | @@ -839,6 +846,24 @@ def detect_patterns(vba_code): | ||
| 839 | return results | 846 | return results |
| 840 | 847 | ||
| 841 | 848 | ||
| 849 | +def detect_hex_strings(vba_code): | ||
| 850 | + """ | ||
| 851 | + Detect if the VBA code contains strings encoded in hexadecimal. | ||
| 852 | + | ||
| 853 | + :param vba_code: str, VBA source code | ||
| 854 | + :return: list of str tuples (encoded string, decoded string) | ||
| 855 | + """ | ||
| 856 | + results = [] | ||
| 857 | + found = set() | ||
| 858 | + for match in re_hex_string.finditer(vba_code): | ||
| 859 | + value = match.group() | ||
| 860 | + if value not in found: | ||
| 861 | + decoded = binascii.unhexlify(value) | ||
| 862 | + results.append((value, decoded)) | ||
| 863 | + found.add(value) | ||
| 864 | + return results | ||
| 865 | + | ||
| 866 | + | ||
| 842 | #=== CLASSES ================================================================= | 867 | #=== CLASSES ================================================================= |
| 843 | 868 | ||
| 844 | class VBA_Parser(object): | 869 | class VBA_Parser(object): |
| @@ -1094,6 +1119,7 @@ def process_file (container, filename, data): | @@ -1094,6 +1119,7 @@ def process_file (container, filename, data): | ||
| 1094 | autoexec_keywords = detect_autoexec(vba_code) | 1119 | autoexec_keywords = detect_autoexec(vba_code) |
| 1095 | suspicious_keywords = detect_suspicious(vba_code) | 1120 | suspicious_keywords = detect_suspicious(vba_code) |
| 1096 | patterns = detect_patterns(vba_code) | 1121 | patterns = detect_patterns(vba_code) |
| 1122 | + hex_strings = detect_hex_strings(vba_code) | ||
| 1097 | if autoexec_keywords or suspicious_keywords or patterns: | 1123 | if autoexec_keywords or suspicious_keywords or patterns: |
| 1098 | t = prettytable.PrettyTable(('Type', 'Keyword', 'Description')) | 1124 | t = prettytable.PrettyTable(('Type', 'Keyword', 'Description')) |
| 1099 | t.align = 'l' | 1125 | t.align = 'l' |
| @@ -1106,6 +1132,8 @@ def process_file (container, filename, data): | @@ -1106,6 +1132,8 @@ def process_file (container, filename, data): | ||
| 1106 | t.add_row(('Suspicious', keyword, description)) | 1132 | t.add_row(('Suspicious', keyword, description)) |
| 1107 | for pattern_type, value in patterns: | 1133 | for pattern_type, value in patterns: |
| 1108 | t.add_row(('IOC', value, pattern_type)) | 1134 | t.add_row(('IOC', value, pattern_type)) |
| 1135 | + for encoded, decoded in hex_strings: | ||
| 1136 | + t.add_row(('Hex String', repr(decoded), encoded)) | ||
| 1109 | print t | 1137 | print t |
| 1110 | else: | 1138 | else: |
| 1111 | print 'No suspicious keyword or pattern found.' | 1139 | print 'No suspicious keyword or pattern found.' |