Commit 5dfb7b56b9b050b818c2c7544ed026427b5b1aaf

Authored by Philippe Lagadec
1 parent 89272589

olevba: added Base64 obfuscation decoding (contribution from @JamesHabben)

Showing 1 changed file with 40 additions and 4 deletions
oletools/olevba.py
... ... @@ -112,6 +112,8 @@ https://github.com/unixfreak0037/officeparser
112 112 # 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding
113 113 # - improved display, shows obfuscation name
114 114 # 2015-02-01 v0.22 PL: - fixed issue #4: regex for URL, e-mail and exe filename
  115 +# - added Base64 obfuscation decoding (contribution from
  116 +# @JamesHabben)
115 117  
116 118 __version__ = '0.22'
117 119  
... ... @@ -121,7 +123,6 @@ __version__ = '0.22'
121 123 # + setup logging (common with other oletools)
122 124  
123 125 # TODO later:
124   -# + do not show hex strings by default (add option --hex)
125 126 # + performance improvement: instead of searching each keyword separately,
126 127 # first split vba code into a list of words (per line), then check each
127 128 # word against a dict. (or put vba words into a set/dict?)
... ... @@ -156,6 +157,7 @@ import re
156 157 import optparse
157 158 import os.path
158 159 import binascii
  160 +import base64
159 161  
160 162 import thirdparty.olefile as olefile
161 163 from thirdparty.prettytable import prettytable
... ... @@ -289,6 +291,8 @@ RE_PATTERNS = (
289 291 # regex to detect strings encoded in hexadecimal
290 292 re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
291 293  
  294 +# regex to detect strings encoded in base64
  295 +re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"')
292 296  
293 297 #--- FUNCTIONS ----------------------------------------------------------------
294 298  
... ... @@ -929,6 +933,24 @@ def detect_hex_strings(vba_code):
929 933 return results
930 934  
931 935  
  936 +def detect_base64_strings(vba_code):
  937 + """
  938 + Detect if the VBA code contains strings encoded in base64.
  939 +
  940 + :param vba_code: str, VBA source code
  941 + :return: list of str tuples (encoded string, decoded string)
  942 + """
  943 + results = []
  944 + found = set()
  945 + for match in re_base64_string.finditer(vba_code):
  946 + value = match.group()
  947 + if value not in found:
  948 + decoded = base64.b64decode(value)
  949 + results.append((value, decoded))
  950 + found.add(value)
  951 + return results
  952 +
  953 +
932 954 def detect_dridex_strings(vba_code):
933 955 """
934 956 Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples.
... ... @@ -973,6 +995,7 @@ class VBA_Scanner (object):
973 995 self.code_hex = ''
974 996 self.code_hex_rev = ''
975 997 self.code_rev_hex = ''
  998 + self.code_base64 = ''
976 999 self.code_dridex = ''
977 1000  
978 1001  
... ... @@ -1003,6 +1026,10 @@ class VBA_Scanner (object):
1003 1026 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
1004 1027 #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
1005 1028 #TODO: show which IOCs have been found using hex, strrev or both
  1029 + # Detect Base64-encoded strings
  1030 + self.base64_strings = detect_base64_strings(self.code)
  1031 + for encoded, decoded in self.base64_strings:
  1032 + self.code_base64 += '\n'+decoded
1006 1033 # Detect Dridex-encoded strings
1007 1034 self.dridex_strings = detect_dridex_strings(self.code)
1008 1035 for encoded, decoded in self.dridex_strings:
... ... @@ -1017,6 +1044,7 @@ class VBA_Scanner (object):
1017 1044 (self.code_hex, 'Hex'),
1018 1045 (self.code_hex_rev, 'Hex+StrReverse'),
1019 1046 (self.code_rev_hex, 'StrReverse+Hex'),
  1047 + (self.code_base64, 'Base64'),
1020 1048 (self.code_dridex, 'Dridex'),
1021 1049 ):
1022 1050 self.autoexec_keywords += detect_autoexec(code, obfuscation)
... ... @@ -1027,6 +1055,12 @@ class VBA_Scanner (object):
1027 1055 if self.hex_strings:
1028 1056 self.suspicious_keywords.append(('Hex Strings',
1029 1057 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
  1058 + if self.base64_strings:
  1059 + self.suspicious_keywords.append(('Base64 Strings',
  1060 + 'Base64-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
  1061 + if self.dridex_strings:
  1062 + self.suspicious_keywords.append(('Dridex Strings',
  1063 + 'Dridex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
1030 1064 for keyword, description in self.autoexec_keywords:
1031 1065 results.append(('AutoExec', keyword, description))
1032 1066 for keyword, description in self.suspicious_keywords:
... ... @@ -1036,8 +1070,10 @@ class VBA_Scanner (object):
1036 1070 if include_hex_strings:
1037 1071 for encoded, decoded in self.hex_strings:
1038 1072 results.append(('Hex String', repr(decoded), encoded))
1039   - for encoded, decoded in self.dridex_strings:
1040   - results.append(('Dridex string', repr(decoded), encoded))
  1073 + for encoded, decoded in self.base64_strings:
  1074 + results.append(('Base64 String', repr(decoded), encoded))
  1075 + for encoded, decoded in self.dridex_strings:
  1076 + results.append(('Dridex string', repr(decoded), encoded))
1041 1077 return results
1042 1078  
1043 1079  
... ... @@ -1470,7 +1506,7 @@ def main():
1470 1506 # input file provided with VBA source code to be analyzed directly:
1471 1507 print 'Analysis of VBA source code from %s:' % options.input
1472 1508 vba_code = open(options.input).read()
1473   - print_analysis(vba_code)
  1509 + print_analysis(vba_code, show_hex_strings=options.show_hex_strings)
1474 1510 sys.exit()
1475 1511  
1476 1512 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
... ...