Commit 5dfb7b56b9b050b818c2c7544ed026427b5b1aaf

Authored by Philippe Lagadec
1 parent 89272589

olevba: added Base64 obfuscation decoding (contribution from @JamesHabben)

Showing 1 changed file with 40 additions and 4 deletions
oletools/olevba.py
@@ -112,6 +112,8 @@ https://github.com/unixfreak0037/officeparser @@ -112,6 +112,8 @@ https://github.com/unixfreak0037/officeparser
112 # 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding 112 # 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding
113 # - improved display, shows obfuscation name 113 # - improved display, shows obfuscation name
114 # 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename 114 # 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename
  115 +# - added Base64 obfuscation decoding (contribution from
  116 +# @JamesHabben)
115 117
116 __version__ = '0.22' 118 __version__ = '0.22'
117 119
@@ -121,7 +123,6 @@ __version__ = '0.22' @@ -121,7 +123,6 @@ __version__ = '0.22'
121 # + setup logging (common with other oletools) 123 # + setup logging (common with other oletools)
122 124
123 # TODO later: 125 # TODO later:
124 -# + do not show hex strings by default (add option --hex)  
125 # + performance improvement: instead of searching each keyword separately, 126 # + performance improvement: instead of searching each keyword separately,
126 # first split vba code into a list of words (per line), then check each 127 # first split vba code into a list of words (per line), then check each
127 # word against a dict. (or put vba words into a set/dict?) 128 # word against a dict. (or put vba words into a set/dict?)
@@ -156,6 +157,7 @@ import re @@ -156,6 +157,7 @@ import re
156 import optparse 157 import optparse
157 import os.path 158 import os.path
158 import binascii 159 import binascii
  160 +import base64
159 161
160 import thirdparty.olefile as olefile 162 import thirdparty.olefile as olefile
161 from thirdparty.prettytable import prettytable 163 from thirdparty.prettytable import prettytable
@@ -289,6 +291,8 @@ RE_PATTERNS = ( @@ -289,6 +291,8 @@ RE_PATTERNS = (
289 # regex to detect strings encoded in hexadecimal 291 # regex to detect strings encoded in hexadecimal
290 re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') 292 re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
291 293
  294 +# regex to detect strings encoded in base64
  295 +re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"')
292 296
293 #--- FUNCTIONS ---------------------------------------------------------------- 297 #--- FUNCTIONS ----------------------------------------------------------------
294 298
@@ -929,6 +933,24 @@ def detect_hex_strings(vba_code): @@ -929,6 +933,24 @@ def detect_hex_strings(vba_code):
929 return results 933 return results
930 934
931 935
  936 +def detect_base64_strings(vba_code):
  937 + """
  938 + Detect if the VBA code contains strings encoded in base64.
  939 +
  940 + :param vba_code: str, VBA source code
  941 + :return: list of str tuples (encoded string, decoded string)
  942 + """
  943 + results = []
  944 + found = set()
  945 + for match in re_base64_string.finditer(vba_code):
  946 + value = match.group()
  947 + if value not in found:
  948 + decoded = base64.b64decode(value)
  949 + results.append((value, decoded))
  950 + found.add(value)
  951 + return results
  952 +
  953 +
932 def detect_dridex_strings(vba_code): 954 def detect_dridex_strings(vba_code):
933 """ 955 """
934 Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples. 956 Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples.
@@ -973,6 +995,7 @@ class VBA_Scanner (object): @@ -973,6 +995,7 @@ class VBA_Scanner (object):
973 self.code_hex = '' 995 self.code_hex = ''
974 self.code_hex_rev = '' 996 self.code_hex_rev = ''
975 self.code_rev_hex = '' 997 self.code_rev_hex = ''
  998 + self.code_base64 = ''
976 self.code_dridex = '' 999 self.code_dridex = ''
977 1000
978 1001
@@ -1003,6 +1026,10 @@ class VBA_Scanner (object): @@ -1003,6 +1026,10 @@ class VBA_Scanner (object):
1003 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ 1026 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
1004 #TODO: also append the full code reversed if StrReverse? (risk of false positives?) 1027 #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
1005 #TODO: show which IOCs have been found using hex, strrev or both 1028 #TODO: show which IOCs have been found using hex, strrev or both
  1029 + # Detect Base64-encoded strings
  1030 + self.base64_strings = detect_base64_strings(self.code)
  1031 + for encoded, decoded in self.base64_strings:
  1032 + self.code_base64 += '\n'+decoded
1006 # Detect Dridex-encoded strings 1033 # Detect Dridex-encoded strings
1007 self.dridex_strings = detect_dridex_strings(self.code) 1034 self.dridex_strings = detect_dridex_strings(self.code)
1008 for encoded, decoded in self.dridex_strings: 1035 for encoded, decoded in self.dridex_strings:
@@ -1017,6 +1044,7 @@ class VBA_Scanner (object): @@ -1017,6 +1044,7 @@ class VBA_Scanner (object):
1017 (self.code_hex, 'Hex'), 1044 (self.code_hex, 'Hex'),
1018 (self.code_hex_rev, 'Hex+StrReverse'), 1045 (self.code_hex_rev, 'Hex+StrReverse'),
1019 (self.code_rev_hex, 'StrReverse+Hex'), 1046 (self.code_rev_hex, 'StrReverse+Hex'),
  1047 + (self.code_base64, 'Base64'),
1020 (self.code_dridex, 'Dridex'), 1048 (self.code_dridex, 'Dridex'),
1021 ): 1049 ):
1022 self.autoexec_keywords += detect_autoexec(code, obfuscation) 1050 self.autoexec_keywords += detect_autoexec(code, obfuscation)
@@ -1027,6 +1055,12 @@ class VBA_Scanner (object): @@ -1027,6 +1055,12 @@ class VBA_Scanner (object):
1027 if self.hex_strings: 1055 if self.hex_strings:
1028 self.suspicious_keywords.append(('Hex Strings', 1056 self.suspicious_keywords.append(('Hex Strings',
1029 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) 1057 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
  1058 + if self.base64_strings:
  1059 + self.suspicious_keywords.append(('Base64 Strings',
  1060 + 'Base64-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
  1061 + if self.dridex_strings:
  1062 + self.suspicious_keywords.append(('Dridex Strings',
  1063 + 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
1030 for keyword, description in self.autoexec_keywords: 1064 for keyword, description in self.autoexec_keywords:
1031 results.append(('AutoExec', keyword, description)) 1065 results.append(('AutoExec', keyword, description))
1032 for keyword, description in self.suspicious_keywords: 1066 for keyword, description in self.suspicious_keywords:
@@ -1036,8 +1070,10 @@ class VBA_Scanner (object): @@ -1036,8 +1070,10 @@ class VBA_Scanner (object):
1036 if include_hex_strings: 1070 if include_hex_strings:
1037 for encoded, decoded in self.hex_strings: 1071 for encoded, decoded in self.hex_strings:
1038 results.append(('Hex String', repr(decoded), encoded)) 1072 results.append(('Hex String', repr(decoded), encoded))
1039 - for encoded, decoded in self.dridex_strings:  
1040 - results.append(('Dridex string', repr(decoded), encoded)) 1073 + for encoded, decoded in self.base64_strings:
  1074 + results.append(('Base64 String', repr(decoded), encoded))
  1075 + for encoded, decoded in self.dridex_strings:
  1076 + results.append(('Dridex string', repr(decoded), encoded))
1041 return results 1077 return results
1042 1078
1043 1079
@@ -1470,7 +1506,7 @@ def main(): @@ -1470,7 +1506,7 @@ def main():
1470 # input file provided with VBA source code to be analyzed directly: 1506 # input file provided with VBA source code to be analyzed directly:
1471 print 'Analysis of VBA source code from %s:' % options.input 1507 print 'Analysis of VBA source code from %s:' % options.input
1472 vba_code = open(options.input).read() 1508 vba_code = open(options.input).read()
1473 - print_analysis(vba_code) 1509 + print_analysis(vba_code, show_hex_strings=options.show_hex_strings)
1474 sys.exit() 1510 sys.exit()
1475 1511
1476 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') 1512 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')