Commit 5dfb7b56b9b050b818c2c7544ed026427b5b1aaf
1 parent
89272589
olevba: added Base64 obfuscation decoding (contribution from @JamesHabben)
Showing
1 changed file
with
40 additions
and
4 deletions
oletools/olevba.py
| ... | ... | @@ -112,6 +112,8 @@ https://github.com/unixfreak0037/officeparser |
| 112 | 112 | # 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding |
| 113 | 113 | # - improved display, shows obfuscation name |
| 114 | 114 | # 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename |
| 115 | +# - added Base64 obfuscation decoding (contribution from | |
| 116 | +# @JamesHabben) | |
| 115 | 117 | |
| 116 | 118 | __version__ = '0.22' |
| 117 | 119 | |
| ... | ... | @@ -121,7 +123,6 @@ __version__ = '0.22' |
| 121 | 123 | # + setup logging (common with other oletools) |
| 122 | 124 | |
| 123 | 125 | # TODO later: |
| 124 | -# + do not show hex strings by default (add option --hex) | |
| 125 | 126 | # + performance improvement: instead of searching each keyword separately, |
| 126 | 127 | # first split vba code into a list of words (per line), then check each |
| 127 | 128 | # word against a dict. (or put vba words into a set/dict?) |
| ... | ... | @@ -156,6 +157,7 @@ import re |
| 156 | 157 | import optparse |
| 157 | 158 | import os.path |
| 158 | 159 | import binascii |
| 160 | +import base64 | |
| 159 | 161 | |
| 160 | 162 | import thirdparty.olefile as olefile |
| 161 | 163 | from thirdparty.prettytable import prettytable |
| ... | ... | @@ -289,6 +291,8 @@ RE_PATTERNS = ( |
| 289 | 291 | # regex to detect strings encoded in hexadecimal |
| 290 | 292 | re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') |
| 291 | 293 | |
| 294 | +# regex to detect strings encoded in base64 | |
| 295 | +re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') | |
| 292 | 296 | |
| 293 | 297 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 294 | 298 | |
| ... | ... | @@ -929,6 +933,24 @@ def detect_hex_strings(vba_code): |
| 929 | 933 | return results |
| 930 | 934 | |
| 931 | 935 | |
| 936 | +def detect_base64_strings(vba_code): | |
| 937 | + """ | |
| 938 | + Detect if the VBA code contains strings encoded in base64. | |
| 939 | + | |
| 940 | + :param vba_code: str, VBA source code | |
| 941 | + :return: list of str tuples (encoded string, decoded string) | |
| 942 | + """ | |
| 943 | + results = [] | |
| 944 | + found = set() | |
| 945 | + for match in re_base64_string.finditer(vba_code): | |
| 946 | + value = match.group() | |
| 947 | + if value not in found: | |
| 948 | + decoded = base64.b64decode(value) | |
| 949 | + results.append((value, decoded)) | |
| 950 | + found.add(value) | |
| 951 | + return results | |
| 952 | + | |
| 953 | + | |
| 932 | 954 | def detect_dridex_strings(vba_code): |
| 933 | 955 | """ |
| 934 | 956 | Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples. |
| ... | ... | @@ -973,6 +995,7 @@ class VBA_Scanner (object): |
| 973 | 995 | self.code_hex = '' |
| 974 | 996 | self.code_hex_rev = '' |
| 975 | 997 | self.code_rev_hex = '' |
| 998 | + self.code_base64 = '' | |
| 976 | 999 | self.code_dridex = '' |
| 977 | 1000 | |
| 978 | 1001 | |
| ... | ... | @@ -1003,6 +1026,10 @@ class VBA_Scanner (object): |
| 1003 | 1026 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 1004 | 1027 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 1005 | 1028 | #TODO: show which IOCs have been found using hex, strrev or both |
| 1029 | + # Detect Base64-encoded strings | |
| 1030 | + self.base64_strings = detect_base64_strings(self.code) | |
| 1031 | + for encoded, decoded in self.base64_strings: | |
| 1032 | + self.code_base64 += '\n'+decoded | |
| 1006 | 1033 | # Detect Dridex-encoded strings |
| 1007 | 1034 | self.dridex_strings = detect_dridex_strings(self.code) |
| 1008 | 1035 | for encoded, decoded in self.dridex_strings: |
| ... | ... | @@ -1017,6 +1044,7 @@ class VBA_Scanner (object): |
| 1017 | 1044 | (self.code_hex, 'Hex'), |
| 1018 | 1045 | (self.code_hex_rev, 'Hex+StrReverse'), |
| 1019 | 1046 | (self.code_rev_hex, 'StrReverse+Hex'), |
| 1047 | + (self.code_base64, 'Base64'), | |
| 1020 | 1048 | (self.code_dridex, 'Dridex'), |
| 1021 | 1049 | ): |
| 1022 | 1050 | self.autoexec_keywords += detect_autoexec(code, obfuscation) |
| ... | ... | @@ -1027,6 +1055,12 @@ class VBA_Scanner (object): |
| 1027 | 1055 | if self.hex_strings: |
| 1028 | 1056 | self.suspicious_keywords.append(('Hex Strings', |
| 1029 | 1057 | 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) |
| 1058 | + if self.base64_strings: | |
| 1059 | + self.suspicious_keywords.append(('Base64 Strings', | |
| 1060 | + 'Base64-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) | |
| 1061 | + if self.dridex_strings: | |
| 1062 | + self.suspicious_keywords.append(('Dridex Strings', | |
| 1063 | + 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) | |
| 1030 | 1064 | for keyword, description in self.autoexec_keywords: |
| 1031 | 1065 | results.append(('AutoExec', keyword, description)) |
| 1032 | 1066 | for keyword, description in self.suspicious_keywords: |
| ... | ... | @@ -1036,8 +1070,10 @@ class VBA_Scanner (object): |
| 1036 | 1070 | if include_hex_strings: |
| 1037 | 1071 | for encoded, decoded in self.hex_strings: |
| 1038 | 1072 | results.append(('Hex String', repr(decoded), encoded)) |
| 1039 | - for encoded, decoded in self.dridex_strings: | |
| 1040 | - results.append(('Dridex string', repr(decoded), encoded)) | |
| 1073 | + for encoded, decoded in self.base64_strings: | |
| 1074 | + results.append(('Base64 String', repr(decoded), encoded)) | |
| 1075 | + for encoded, decoded in self.dridex_strings: | |
| 1076 | + results.append(('Dridex string', repr(decoded), encoded)) | |
| 1041 | 1077 | return results |
| 1042 | 1078 | |
| 1043 | 1079 | |
| ... | ... | @@ -1470,7 +1506,7 @@ def main(): |
| 1470 | 1506 | # input file provided with VBA source code to be analyzed directly: |
| 1471 | 1507 | print 'Analysis of VBA source code from %s:' % options.input |
| 1472 | 1508 | vba_code = open(options.input).read() |
| 1473 | - print_analysis(vba_code) | |
| 1509 | + print_analysis(vba_code, show_hex_strings=options.show_hex_strings) | |
| 1474 | 1510 | sys.exit() |
| 1475 | 1511 | |
| 1476 | 1512 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') | ... | ... |