Commit 9f45875afa47440c5c5ece443171da2e617e5658
1 parent
85f94f92
olevba: added hex strings detection and decoding
Showing
1 changed file
with
29 additions
and
1 deletions
oletools/olevba.py
| ... | ... | @@ -95,8 +95,9 @@ https://github.com/unixfreak0037/officeparser |
| 95 | 95 | # - process_file: improved display, shows container file |
| 96 | 96 | # - improved list of executable file extensions |
| 97 | 97 | # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display |
| 98 | +# 2015-01-08 v0.14 PL: - added hex strings detection and decoding | |
| 98 | 99 | |
| 99 | -__version__ = '0.13' | |
| 100 | +__version__ = '0.14' | |
| 100 | 101 | |
| 101 | 102 | #------------------------------------------------------------------------------ |
| 102 | 103 | # TODO: |
| ... | ... | @@ -105,6 +106,7 @@ __version__ = '0.13' |
| 105 | 106 | # + update readme, wiki and decalage.info, pypi (link to sample files) |
| 106 | 107 | |
| 107 | 108 | # TODO later: |
| 109 | +# - append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords | |
| 108 | 110 | # + performance improvement: instead of searching each keyword separately, |
| 109 | 111 | # first split vba code into a list of words (per line), then check each |
| 110 | 112 | # word against a dict. (or put vba words into a set/dict?) |
| ... | ... | @@ -138,6 +140,7 @@ import zipfile |
| 138 | 140 | import re |
| 139 | 141 | import optparse |
| 140 | 142 | import os.path |
| 143 | +import binascii | |
| 141 | 144 | |
| 142 | 145 | import thirdparty.olefile as olefile |
| 143 | 146 | from thirdparty.prettytable import prettytable |
| ... | ... | @@ -236,8 +239,12 @@ RE_PATTERNS = ( |
| 236 | 239 | ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")), |
| 237 | 240 | # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ |
| 238 | 241 | #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types |
| 242 | + #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')), | |
| 239 | 243 | ) |
| 240 | 244 | |
| 245 | +# regex to detect strings encoded in hexadecimal | |
| 246 | +re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | |
| 247 | + | |
| 241 | 248 | |
| 242 | 249 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 243 | 250 | |
| ... | ... | @@ -839,6 +846,24 @@ def detect_patterns(vba_code): |
| 839 | 846 | return results |
| 840 | 847 | |
| 841 | 848 | |
| 849 | +def detect_hex_strings(vba_code): | |
| 850 | + """ | |
| 851 | + Detect if the VBA code contains strings encoded in hexadecimal. | |
| 852 | + | |
| 853 | + :param vba_code: str, VBA source code | |
| 854 | + :return: list of str tuples (encoded string, decoded string) | |
| 855 | + """ | |
| 856 | + results = [] | |
| 857 | + found = set() | |
| 858 | + for match in re_hex_string.finditer(vba_code): | |
| 859 | + value = match.group() | |
| 860 | + if value not in found: | |
| 861 | + decoded = binascii.unhexlify(value) | |
| 862 | + results.append((value, decoded)) | |
| 863 | + found.add(value) | |
| 864 | + return results | |
| 865 | + | |
| 866 | + | |
| 842 | 867 | #=== CLASSES ================================================================= |
| 843 | 868 | |
| 844 | 869 | class VBA_Parser(object): |
| ... | ... | @@ -1094,6 +1119,7 @@ def process_file (container, filename, data): |
| 1094 | 1119 | autoexec_keywords = detect_autoexec(vba_code) |
| 1095 | 1120 | suspicious_keywords = detect_suspicious(vba_code) |
| 1096 | 1121 | patterns = detect_patterns(vba_code) |
| 1122 | + hex_strings = detect_hex_strings(vba_code) | |
| 1097 | 1123 | if autoexec_keywords or suspicious_keywords or patterns: |
| 1098 | 1124 | t = prettytable.PrettyTable(('Type', 'Keyword', 'Description')) |
| 1099 | 1125 | t.align = 'l' |
| ... | ... | @@ -1106,6 +1132,8 @@ def process_file (container, filename, data): |
| 1106 | 1132 | t.add_row(('Suspicious', keyword, description)) |
| 1107 | 1133 | for pattern_type, value in patterns: |
| 1108 | 1134 | t.add_row(('IOC', value, pattern_type)) |
| 1135 | + for encoded, decoded in hex_strings: | |
| 1136 | + t.add_row(('Hex String', repr(decoded), encoded)) | |
| 1109 | 1137 | print t |
| 1110 | 1138 | else: |
| 1111 | 1139 | print 'No suspicious keyword or pattern found.' | ... | ... |