Commit 732e9a0ab1fbee9333c0ab13774d0e9e5ebcd1a4

Authored by Philippe Lagadec
1 parent bcbb6086

olevba: added Dridex obfuscation decoding, improved display, shows obfuscation name

Showing 1 changed file with 136 additions and 44 deletions
oletools/olevba.py
@@ -109,8 +109,10 @@ https://github.com/unixfreak0037/officeparser @@ -109,8 +109,10 @@ https://github.com/unixfreak0037/officeparser
109 # 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex 109 # 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex
110 # strings and StrReverse 110 # strings and StrReverse
111 # 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded 111 # 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded
  112 +# 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding
  113 +# - improved display, shows obfuscation name
112 114
113 -__version__ = '0.20' 115 +__version__ = '0.21'
114 116
115 #------------------------------------------------------------------------------ 117 #------------------------------------------------------------------------------
116 # TODO: 118 # TODO:
@@ -814,50 +816,58 @@ def filter_vba(vba_code): @@ -814,50 +816,58 @@ def filter_vba(vba_code):
814 return vba 816 return vba
815 817
816 818
817 -def detect_autoexec(vba_code): 819 +def detect_autoexec(vba_code, obfuscation=None):
818 """ 820 """
819 Detect if the VBA code contains keywords corresponding to macros running 821 Detect if the VBA code contains keywords corresponding to macros running
820 automatically when triggered by specific actions (e.g. when a document is 822 automatically when triggered by specific actions (e.g. when a document is
821 opened or closed). 823 opened or closed).
822 824
823 :param vba_code: str, VBA source code 825 :param vba_code: str, VBA source code
  826 + :param obfuscation: None or str, name of obfuscation to be added to description
824 :return: list of str tuples (keyword, description) 827 :return: list of str tuples (keyword, description)
825 """ 828 """
826 #TODO: merge code with detect_suspicious 829 #TODO: merge code with detect_suspicious
827 # case-insensitive search 830 # case-insensitive search
828 #vba_code = vba_code.lower() 831 #vba_code = vba_code.lower()
829 results = [] 832 results = []
  833 + obf_text = ''
  834 + if obfuscation:
  835 + obf_text = ' (obfuscation: %s)' % obfuscation
830 for description, keywords in AUTOEXEC_KEYWORDS.items(): 836 for description, keywords in AUTOEXEC_KEYWORDS.items():
831 for keyword in keywords: 837 for keyword in keywords:
832 #TODO: if keyword is already a compiled regex, use it as-is 838 #TODO: if keyword is already a compiled regex, use it as-is
833 # search using regex to detect word boundaries: 839 # search using regex to detect word boundaries:
834 if re.search(r'(?i)\b'+keyword+r'\b', vba_code): 840 if re.search(r'(?i)\b'+keyword+r'\b', vba_code):
835 #if keyword.lower() in vba_code: 841 #if keyword.lower() in vba_code:
836 - results.append((keyword, description)) 842 + results.append((keyword, description+obf_text))
837 return results 843 return results
838 844
839 845
840 -def detect_suspicious(vba_code): 846 +def detect_suspicious(vba_code, obfuscation=None):
841 """ 847 """
842 Detect if the VBA code contains suspicious keywords corresponding to 848 Detect if the VBA code contains suspicious keywords corresponding to
843 potential malware behaviour. 849 potential malware behaviour.
844 850
845 :param vba_code: str, VBA source code 851 :param vba_code: str, VBA source code
  852 + :param obfuscation: None or str, name of obfuscation to be added to description
846 :return: list of str tuples (keyword, description) 853 :return: list of str tuples (keyword, description)
847 """ 854 """
848 # case-insensitive search 855 # case-insensitive search
849 #vba_code = vba_code.lower() 856 #vba_code = vba_code.lower()
850 results = [] 857 results = []
  858 + obf_text = ''
  859 + if obfuscation:
  860 + obf_text = ' (obfuscation: %s)' % obfuscation
851 for description, keywords in SUSPICIOUS_KEYWORDS.items(): 861 for description, keywords in SUSPICIOUS_KEYWORDS.items():
852 for keyword in keywords: 862 for keyword in keywords:
853 # search using regex to detect word boundaries: 863 # search using regex to detect word boundaries:
854 if re.search(r'(?i)\b'+keyword+r'\b', vba_code): 864 if re.search(r'(?i)\b'+keyword+r'\b', vba_code):
855 #if keyword.lower() in vba_code: 865 #if keyword.lower() in vba_code:
856 - results.append((keyword, description)) 866 + results.append((keyword, description+obf_text))
857 return results 867 return results
858 868
859 869
860 -def detect_patterns(vba_code): 870 +def detect_patterns(vba_code, obfuscation=None):
861 """ 871 """
862 Detect if the VBA code contains specific patterns such as IP addresses, 872 Detect if the VBA code contains specific patterns such as IP addresses,
863 URLs, e-mail addresses, executable file names, etc. 873 URLs, e-mail addresses, executable file names, etc.
@@ -867,11 +877,14 @@ def detect_patterns(vba_code): @@ -867,11 +877,14 @@ def detect_patterns(vba_code):
867 """ 877 """
868 results = [] 878 results = []
869 found = set() 879 found = set()
  880 + obf_text = ''
  881 + if obfuscation:
  882 + obf_text = ' (obfuscation: %s)' % obfuscation
870 for pattern_type, pattern_re in RE_PATTERNS: 883 for pattern_type, pattern_re in RE_PATTERNS:
871 for match in pattern_re.finditer(vba_code): 884 for match in pattern_re.finditer(vba_code):
872 value = match.group() 885 value = match.group()
873 if value not in found: 886 if value not in found:
874 - results.append((pattern_type, value)) 887 + results.append((pattern_type+obf_text, value))
875 found.add(value) 888 found.add(value)
876 return results 889 return results
877 890
@@ -894,7 +907,120 @@ def detect_hex_strings(vba_code): @@ -894,7 +907,120 @@ def detect_hex_strings(vba_code):
894 return results 907 return results
895 908
896 909
897 -def scan_vba(vba_code, include_hex_strings=False): 910 +def detect_dridex_strings(vba_code):
  911 + """
  912 + Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples.
  913 +
  914 + :param vba_code: str, VBA source code
  915 + :return: list of str tuples (encoded string, decoded string)
  916 + """
  917 + from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode
  918 + results = []
  919 + found = set()
  920 + re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"')
  921 + # regex to check that it is not just a hex string:
  922 + re_dridex_check = re.compile(r'[G-Zg-z]')
  923 + for match in re_dridex_string.finditer(vba_code):
  924 + value = match.group()[1:-1]
  925 + if not re_dridex_check.search(value):
  926 + continue
  927 + if value not in found:
  928 + try:
  929 + decoded = DridexUrlDecode(value)
  930 + results.append((value, decoded))
  931 + found.add(value)
  932 + except:
  933 + # if an exception occurs, it is likely not a dridex-encoded string
  934 + pass
  935 + return results
  936 +
  937 +
  938 +class VBA_Scanner (object):
  939 + """
  940 + Class to scan the source code of a VBA module to find obfuscated strings,
  941 + suspicious keywords, IOCs, auto-executable macros, etc.
  942 + """
  943 +
  944 + def __init__(self, vba_code):
  945 + """
  946 + VBA_Scanner constructor
  947 +
  948 + :param vba_code: str, VBA source code to be analyzed
  949 + """
  950 + self.code = vba_code
  951 + self.code_hex = ''
  952 + self.code_hex_rev = ''
  953 + self.code_rev_hex = ''
  954 + self.code_dridex = ''
  955 +
  956 +
  957 + def scan(self, include_hex_strings=False):
  958 + """
  959 + Analyze the provided VBA code to detect suspicious keywords,
  960 + auto-executable macros, IOC patterns, obfuscation patterns
  961 + such as hex-encoded strings.
  962 +
  963 + :param include_hex_strings: bool, if True hex-encoded strings will be included with their decoded content.
  964 + :return: list of tuples (type, keyword, description)
  965 + (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String')
  966 + """
  967 + # First, detect and extract hex-encoded strings:
  968 + self.hex_strings = detect_hex_strings(self.code)
  969 + # detect if the code contains StrReverse:
  970 + self.strReverse = False
  971 + if 'strreverse' in self.code.lower(): self.strReverse = True
  972 + # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
  973 + for encoded, decoded in self.hex_strings:
  974 + self.code_hex += '\n'+decoded
  975 + # if the code contains "StrReverse", also append the hex strings in reverse order:
  976 + if self.strReverse:
  977 + # StrReverse after hex decoding:
  978 + self.code_hex_rev += '\n'+decoded[::-1]
  979 + # StrReverse before hex decoding:
  980 + self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1])
  981 + #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
  982 + #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
  983 + #TODO: show which IOCs have been found using hex, strrev or both
  984 + # Detect Dridex-encoded strings
  985 + self.dridex_strings = detect_dridex_strings(self.code)
  986 + for encoded, decoded in self.dridex_strings:
  987 + self.code_dridex += '\n'+decoded
  988 + results = []
  989 + self.autoexec_keywords = []
  990 + self.suspicious_keywords = []
  991 + self.iocs = []
  992 +
  993 + for code, obfuscation in (
  994 + (self.code, None),
  995 + (self.code_hex, 'Hex'),
  996 + (self.code_hex_rev, 'Hex+StrReverse'),
  997 + (self.code_rev_hex, 'StrReverse+Hex'),
  998 + (self.code_dridex, 'Dridex'),
  999 + ):
  1000 + self.autoexec_keywords += detect_autoexec(code, obfuscation)
  1001 + self.suspicious_keywords += detect_suspicious(code, obfuscation)
  1002 + self.iocs += detect_patterns(code, obfuscation)
  1003 +
  1004 + # If hex-encoded strings were discovered, add an item to suspicious keywords:
  1005 + if self.hex_strings:
  1006 + self.suspicious_keywords.append(('Hex Strings',
  1007 + 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
  1008 + for keyword, description in self.autoexec_keywords:
  1009 + results.append(('AutoExec', keyword, description))
  1010 + for keyword, description in self.suspicious_keywords:
  1011 + results.append(('Suspicious', keyword, description))
  1012 + for pattern_type, value in self.iocs:
  1013 + results.append(('IOC', value, pattern_type))
  1014 + if include_hex_strings:
  1015 + for encoded, decoded in self.hex_strings:
  1016 + results.append(('Hex String', repr(decoded), encoded))
  1017 + for encoded, decoded in self.dridex_strings:
  1018 + results.append(('Dridex string', repr(decoded), encoded))
  1019 + return results
  1020 +
  1021 +
  1022 +
  1023 +def scan_vba(vba_code, include_hex_strings):
898 """ 1024 """
899 Analyze the provided VBA code to detect suspicious keywords, 1025 Analyze the provided VBA code to detect suspicious keywords,
900 auto-executable macros, IOC patterns, obfuscation patterns 1026 auto-executable macros, IOC patterns, obfuscation patterns
@@ -905,41 +1031,7 @@ def scan_vba(vba_code, include_hex_strings=False): @@ -905,41 +1031,7 @@ def scan_vba(vba_code, include_hex_strings=False):
905 :return: list of tuples (type, keyword, description) 1031 :return: list of tuples (type, keyword, description)
906 (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String') 1032 (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String')
907 """ 1033 """
908 - # First, detect and extract hex-encoded strings:  
909 - hex_strings = detect_hex_strings(vba_code)  
910 - # detect if the code contains StrReverse:  
911 - if 'strreverse' in vba_code.lower(): strreverse = True  
912 - else: strreverse = False  
913 - # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:  
914 - for encoded, decoded in hex_strings:  
915 - vba_code += '\n'+decoded  
916 - # if the code contains "StrReverse", also append the hex strings in reverse order:  
917 - if strreverse:  
918 - # StrReverse after hex decoding:  
919 - vba_code += '\n'+decoded[::-1]  
920 - # StrReverse before hex decoding:  
921 - vba_code += '\n'+binascii.unhexlify(encoded[::-1])  
922 - #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/  
923 - #TODO: also append the full code reversed if StrReverse? (risk of false positives?)  
924 - #TODO: show which IOCs have been found using hex, strrev or both  
925 - autoexec_keywords = detect_autoexec(vba_code)  
926 - suspicious_keywords = detect_suspicious(vba_code)  
927 - # If hex-encoded strings were discovered, add an item to suspicious keywords:  
928 - if hex_strings:  
929 - suspicious_keywords.append(('Hex Strings',  
930 - 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))  
931 - patterns = detect_patterns(vba_code)  
932 - results = []  
933 - for keyword, description in autoexec_keywords:  
934 - results.append(('AutoExec', keyword, description))  
935 - for keyword, description in suspicious_keywords:  
936 - results.append(('Suspicious', keyword, description))  
937 - for pattern_type, value in patterns:  
938 - results.append(('IOC', value, pattern_type))  
939 - if include_hex_strings:  
940 - for encoded, decoded in hex_strings:  
941 - results.append(('Hex String', repr(decoded), encoded))  
942 - return results 1034 + return VBA_Scanner(vba_code).scan(include_hex_strings)
943 1035
944 1036
945 #=== CLASSES ================================================================= 1037 #=== CLASSES =================================================================
@@ -1226,7 +1318,7 @@ def process_file (container, filename, data, show_hex_strings=False): @@ -1226,7 +1318,7 @@ def process_file (container, filename, data, show_hex_strings=False):
1226 else: 1318 else:
1227 print 'No VBA macros found.' 1319 print 'No VBA macros found.'
1228 except: #TypeError: 1320 except: #TypeError:
1229 - #raise 1321 + raise
1230 #TODO: print more info if debug mode 1322 #TODO: print more info if debug mode
1231 print sys.exc_value 1323 print sys.exc_value
1232 print '' 1324 print ''