Commit 5dfb7b56b9b050b818c2c7544ed026427b5b1aaf
1 parent
89272589
olevba: added Base64 obfuscation decoding (contribution from @JamesHabben)
Showing
1 changed file
with
40 additions
and
4 deletions
oletools/olevba.py
| @@ -112,6 +112,8 @@ https://github.com/unixfreak0037/officeparser | @@ -112,6 +112,8 @@ https://github.com/unixfreak0037/officeparser | ||
| 112 | # 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding | 112 | # 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding |
| 113 | # - improved display, shows obfuscation name | 113 | # - improved display, shows obfuscation name |
| 114 | # 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename | 114 | # 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename |
| 115 | +# - added Base64 obfuscation decoding (contribution from | ||
| 116 | +# @JamesHabben) | ||
| 115 | 117 | ||
| 116 | __version__ = '0.22' | 118 | __version__ = '0.22' |
| 117 | 119 | ||
| @@ -121,7 +123,6 @@ __version__ = '0.22' | @@ -121,7 +123,6 @@ __version__ = '0.22' | ||
| 121 | # + setup logging (common with other oletools) | 123 | # + setup logging (common with other oletools) |
| 122 | 124 | ||
| 123 | # TODO later: | 125 | # TODO later: |
| 124 | -# + do not show hex strings by default (add option --hex) | ||
| 125 | # + performance improvement: instead of searching each keyword separately, | 126 | # + performance improvement: instead of searching each keyword separately, |
| 126 | # first split vba code into a list of words (per line), then check each | 127 | # first split vba code into a list of words (per line), then check each |
| 127 | # word against a dict. (or put vba words into a set/dict?) | 128 | # word against a dict. (or put vba words into a set/dict?) |
| @@ -156,6 +157,7 @@ import re | @@ -156,6 +157,7 @@ import re | ||
| 156 | import optparse | 157 | import optparse |
| 157 | import os.path | 158 | import os.path |
| 158 | import binascii | 159 | import binascii |
| 160 | +import base64 | ||
| 159 | 161 | ||
| 160 | import thirdparty.olefile as olefile | 162 | import thirdparty.olefile as olefile |
| 161 | from thirdparty.prettytable import prettytable | 163 | from thirdparty.prettytable import prettytable |
| @@ -289,6 +291,8 @@ RE_PATTERNS = ( | @@ -289,6 +291,8 @@ RE_PATTERNS = ( | ||
| 289 | # regex to detect strings encoded in hexadecimal | 291 | # regex to detect strings encoded in hexadecimal |
| 290 | re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | 292 | re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') |
| 291 | 293 | ||
| 294 | +# regex to detect strings encoded in base64 | ||
| 295 | +re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') | ||
| 292 | 296 | ||
| 293 | #--- FUNCTIONS ---------------------------------------------------------------- | 297 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 294 | 298 | ||
| @@ -929,6 +933,24 @@ def detect_hex_strings(vba_code): | @@ -929,6 +933,24 @@ def detect_hex_strings(vba_code): | ||
| 929 | return results | 933 | return results |
| 930 | 934 | ||
| 931 | 935 | ||
| 936 | +def detect_base64_strings(vba_code): | ||
| 937 | + """ | ||
| 938 | + Detect if the VBA code contains strings encoded in base64. | ||
| 939 | + | ||
| 940 | + :param vba_code: str, VBA source code | ||
| 941 | + :return: list of str tuples (encoded string, decoded string) | ||
| 942 | + """ | ||
| 943 | + results = [] | ||
| 944 | + found = set() | ||
| 945 | + for match in re_base64_string.finditer(vba_code): | ||
| 946 | + value = match.group() | ||
| 947 | + if value not in found: | ||
| 948 | + decoded = base64.b64decode(value) | ||
| 949 | + results.append((value, decoded)) | ||
| 950 | + found.add(value) | ||
| 951 | + return results | ||
| 952 | + | ||
| 953 | + | ||
| 932 | def detect_dridex_strings(vba_code): | 954 | def detect_dridex_strings(vba_code): |
| 933 | """ | 955 | """ |
| 934 | Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples. | 956 | Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples. |
| @@ -973,6 +995,7 @@ class VBA_Scanner (object): | @@ -973,6 +995,7 @@ class VBA_Scanner (object): | ||
| 973 | self.code_hex = '' | 995 | self.code_hex = '' |
| 974 | self.code_hex_rev = '' | 996 | self.code_hex_rev = '' |
| 975 | self.code_rev_hex = '' | 997 | self.code_rev_hex = '' |
| 998 | + self.code_base64 = '' | ||
| 976 | self.code_dridex = '' | 999 | self.code_dridex = '' |
| 977 | 1000 | ||
| 978 | 1001 | ||
| @@ -1003,6 +1026,10 @@ class VBA_Scanner (object): | @@ -1003,6 +1026,10 @@ class VBA_Scanner (object): | ||
| 1003 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ | 1026 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 1004 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) | 1027 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 1005 | #TODO: show which IOCs have been found using hex, strrev or both | 1028 | #TODO: show which IOCs have been found using hex, strrev or both |
| 1029 | + # Detect Base64-encoded strings | ||
| 1030 | + self.base64_strings = detect_base64_strings(self.code) | ||
| 1031 | + for encoded, decoded in self.base64_strings: | ||
| 1032 | + self.code_base64 += '\n'+decoded | ||
| 1006 | # Detect Dridex-encoded strings | 1033 | # Detect Dridex-encoded strings |
| 1007 | self.dridex_strings = detect_dridex_strings(self.code) | 1034 | self.dridex_strings = detect_dridex_strings(self.code) |
| 1008 | for encoded, decoded in self.dridex_strings: | 1035 | for encoded, decoded in self.dridex_strings: |
| @@ -1017,6 +1044,7 @@ class VBA_Scanner (object): | @@ -1017,6 +1044,7 @@ class VBA_Scanner (object): | ||
| 1017 | (self.code_hex, 'Hex'), | 1044 | (self.code_hex, 'Hex'), |
| 1018 | (self.code_hex_rev, 'Hex+StrReverse'), | 1045 | (self.code_hex_rev, 'Hex+StrReverse'), |
| 1019 | (self.code_rev_hex, 'StrReverse+Hex'), | 1046 | (self.code_rev_hex, 'StrReverse+Hex'), |
| 1047 | + (self.code_base64, 'Base64'), | ||
| 1020 | (self.code_dridex, 'Dridex'), | 1048 | (self.code_dridex, 'Dridex'), |
| 1021 | ): | 1049 | ): |
| 1022 | self.autoexec_keywords += detect_autoexec(code, obfuscation) | 1050 | self.autoexec_keywords += detect_autoexec(code, obfuscation) |
| @@ -1027,6 +1055,12 @@ class VBA_Scanner (object): | @@ -1027,6 +1055,12 @@ class VBA_Scanner (object): | ||
| 1027 | if self.hex_strings: | 1055 | if self.hex_strings: |
| 1028 | self.suspicious_keywords.append(('Hex Strings', | 1056 | self.suspicious_keywords.append(('Hex Strings', |
| 1029 | 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) | 1057 | 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) |
| 1058 | + if self.base64_strings: | ||
| 1059 | + self.suspicious_keywords.append(('Base64 Strings', | ||
| 1060 | + 'Base64-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) | ||
| 1061 | + if self.dridex_strings: | ||
| 1062 | + self.suspicious_keywords.append(('Dridex Strings', | ||
| 1063 | + 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) | ||
| 1030 | for keyword, description in self.autoexec_keywords: | 1064 | for keyword, description in self.autoexec_keywords: |
| 1031 | results.append(('AutoExec', keyword, description)) | 1065 | results.append(('AutoExec', keyword, description)) |
| 1032 | for keyword, description in self.suspicious_keywords: | 1066 | for keyword, description in self.suspicious_keywords: |
| @@ -1036,8 +1070,10 @@ class VBA_Scanner (object): | @@ -1036,8 +1070,10 @@ class VBA_Scanner (object): | ||
| 1036 | if include_hex_strings: | 1070 | if include_hex_strings: |
| 1037 | for encoded, decoded in self.hex_strings: | 1071 | for encoded, decoded in self.hex_strings: |
| 1038 | results.append(('Hex String', repr(decoded), encoded)) | 1072 | results.append(('Hex String', repr(decoded), encoded)) |
| 1039 | - for encoded, decoded in self.dridex_strings: | ||
| 1040 | - results.append(('Dridex string', repr(decoded), encoded)) | 1073 | + for encoded, decoded in self.base64_strings: |
| 1074 | + results.append(('Base64 String', repr(decoded), encoded)) | ||
| 1075 | + for encoded, decoded in self.dridex_strings: | ||
| 1076 | + results.append(('Dridex string', repr(decoded), encoded)) | ||
| 1041 | return results | 1077 | return results |
| 1042 | 1078 | ||
| 1043 | 1079 | ||
| @@ -1470,7 +1506,7 @@ def main(): | @@ -1470,7 +1506,7 @@ def main(): | ||
| 1470 | # input file provided with VBA source code to be analyzed directly: | 1506 | # input file provided with VBA source code to be analyzed directly: |
| 1471 | print 'Analysis of VBA source code from %s:' % options.input | 1507 | print 'Analysis of VBA source code from %s:' % options.input |
| 1472 | vba_code = open(options.input).read() | 1508 | vba_code = open(options.input).read() |
| 1473 | - print_analysis(vba_code) | 1509 | + print_analysis(vba_code, show_hex_strings=options.show_hex_strings) |
| 1474 | sys.exit() | 1510 | sys.exit() |
| 1475 | 1511 | ||
| 1476 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') | 1512 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') |