Commit 732e9a0ab1fbee9333c0ab13774d0e9e5ebcd1a4
1 parent
bcbb6086
olevba: added Dridex obfuscation decoding, improved display, shows obfuscation name
Showing
1 changed file
with
136 additions
and
44 deletions
oletools/olevba.py
| @@ -109,8 +109,10 @@ https://github.com/unixfreak0037/officeparser | @@ -109,8 +109,10 @@ https://github.com/unixfreak0037/officeparser | ||
| 109 | # 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex | 109 | # 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex |
| 110 | # strings and StrReverse | 110 | # strings and StrReverse |
| 111 | # 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded | 111 | # 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded |
| 112 | +# 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding | ||
| 113 | +# - improved display, shows obfuscation name | ||
| 112 | 114 | ||
| 113 | -__version__ = '0.20' | 115 | +__version__ = '0.21' |
| 114 | 116 | ||
| 115 | #------------------------------------------------------------------------------ | 117 | #------------------------------------------------------------------------------ |
| 116 | # TODO: | 118 | # TODO: |
| @@ -814,50 +816,58 @@ def filter_vba(vba_code): | @@ -814,50 +816,58 @@ def filter_vba(vba_code): | ||
| 814 | return vba | 816 | return vba |
| 815 | 817 | ||
| 816 | 818 | ||
| 817 | -def detect_autoexec(vba_code): | 819 | +def detect_autoexec(vba_code, obfuscation=None): |
| 818 | """ | 820 | """ |
| 819 | Detect if the VBA code contains keywords corresponding to macros running | 821 | Detect if the VBA code contains keywords corresponding to macros running |
| 820 | automatically when triggered by specific actions (e.g. when a document is | 822 | automatically when triggered by specific actions (e.g. when a document is |
| 821 | opened or closed). | 823 | opened or closed). |
| 822 | 824 | ||
| 823 | :param vba_code: str, VBA source code | 825 | :param vba_code: str, VBA source code |
| 826 | + :param obfuscation: None or str, name of obfuscation to be added to description | ||
| 824 | :return: list of str tuples (keyword, description) | 827 | :return: list of str tuples (keyword, description) |
| 825 | """ | 828 | """ |
| 826 | #TODO: merge code with detect_suspicious | 829 | #TODO: merge code with detect_suspicious |
| 827 | # case-insensitive search | 830 | # case-insensitive search |
| 828 | #vba_code = vba_code.lower() | 831 | #vba_code = vba_code.lower() |
| 829 | results = [] | 832 | results = [] |
| 833 | + obf_text = '' | ||
| 834 | + if obfuscation: | ||
| 835 | + obf_text = ' (obfuscation: %s)' % obfuscation | ||
| 830 | for description, keywords in AUTOEXEC_KEYWORDS.items(): | 836 | for description, keywords in AUTOEXEC_KEYWORDS.items(): |
| 831 | for keyword in keywords: | 837 | for keyword in keywords: |
| 832 | #TODO: if keyword is already a compiled regex, use it as-is | 838 | #TODO: if keyword is already a compiled regex, use it as-is |
| 833 | # search using regex to detect word boundaries: | 839 | # search using regex to detect word boundaries: |
| 834 | if re.search(r'(?i)\b'+keyword+r'\b', vba_code): | 840 | if re.search(r'(?i)\b'+keyword+r'\b', vba_code): |
| 835 | #if keyword.lower() in vba_code: | 841 | #if keyword.lower() in vba_code: |
| 836 | - results.append((keyword, description)) | 842 | + results.append((keyword, description+obf_text)) |
| 837 | return results | 843 | return results |
| 838 | 844 | ||
| 839 | 845 | ||
| 840 | -def detect_suspicious(vba_code): | 846 | +def detect_suspicious(vba_code, obfuscation=None): |
| 841 | """ | 847 | """ |
| 842 | Detect if the VBA code contains suspicious keywords corresponding to | 848 | Detect if the VBA code contains suspicious keywords corresponding to |
| 843 | potential malware behaviour. | 849 | potential malware behaviour. |
| 844 | 850 | ||
| 845 | :param vba_code: str, VBA source code | 851 | :param vba_code: str, VBA source code |
| 852 | + :param obfuscation: None or str, name of obfuscation to be added to description | ||
| 846 | :return: list of str tuples (keyword, description) | 853 | :return: list of str tuples (keyword, description) |
| 847 | """ | 854 | """ |
| 848 | # case-insensitive search | 855 | # case-insensitive search |
| 849 | #vba_code = vba_code.lower() | 856 | #vba_code = vba_code.lower() |
| 850 | results = [] | 857 | results = [] |
| 858 | + obf_text = '' | ||
| 859 | + if obfuscation: | ||
| 860 | + obf_text = ' (obfuscation: %s)' % obfuscation | ||
| 851 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): | 861 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): |
| 852 | for keyword in keywords: | 862 | for keyword in keywords: |
| 853 | # search using regex to detect word boundaries: | 863 | # search using regex to detect word boundaries: |
| 854 | if re.search(r'(?i)\b'+keyword+r'\b', vba_code): | 864 | if re.search(r'(?i)\b'+keyword+r'\b', vba_code): |
| 855 | #if keyword.lower() in vba_code: | 865 | #if keyword.lower() in vba_code: |
| 856 | - results.append((keyword, description)) | 866 | + results.append((keyword, description+obf_text)) |
| 857 | return results | 867 | return results |
| 858 | 868 | ||
| 859 | 869 | ||
| 860 | -def detect_patterns(vba_code): | 870 | +def detect_patterns(vba_code, obfuscation=None): |
| 861 | """ | 871 | """ |
| 862 | Detect if the VBA code contains specific patterns such as IP addresses, | 872 | Detect if the VBA code contains specific patterns such as IP addresses, |
| 863 | URLs, e-mail addresses, executable file names, etc. | 873 | URLs, e-mail addresses, executable file names, etc. |
| @@ -867,11 +877,14 @@ def detect_patterns(vba_code): | @@ -867,11 +877,14 @@ def detect_patterns(vba_code): | ||
| 867 | """ | 877 | """ |
| 868 | results = [] | 878 | results = [] |
| 869 | found = set() | 879 | found = set() |
| 880 | + obf_text = '' | ||
| 881 | + if obfuscation: | ||
| 882 | + obf_text = ' (obfuscation: %s)' % obfuscation | ||
| 870 | for pattern_type, pattern_re in RE_PATTERNS: | 883 | for pattern_type, pattern_re in RE_PATTERNS: |
| 871 | for match in pattern_re.finditer(vba_code): | 884 | for match in pattern_re.finditer(vba_code): |
| 872 | value = match.group() | 885 | value = match.group() |
| 873 | if value not in found: | 886 | if value not in found: |
| 874 | - results.append((pattern_type, value)) | 887 | + results.append((pattern_type+obf_text, value)) |
| 875 | found.add(value) | 888 | found.add(value) |
| 876 | return results | 889 | return results |
| 877 | 890 | ||
| @@ -894,7 +907,120 @@ def detect_hex_strings(vba_code): | @@ -894,7 +907,120 @@ def detect_hex_strings(vba_code): | ||
| 894 | return results | 907 | return results |
| 895 | 908 | ||
| 896 | 909 | ||
| 897 | -def scan_vba(vba_code, include_hex_strings=False): | 910 | +def detect_dridex_strings(vba_code): |
| 911 | + """ | ||
| 912 | + Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples. | ||
| 913 | + | ||
| 914 | + :param vba_code: str, VBA source code | ||
| 915 | + :return: list of str tuples (encoded string, decoded string) | ||
| 916 | + """ | ||
| 917 | + from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode | ||
| 918 | + results = [] | ||
| 919 | + found = set() | ||
| 920 | + re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') | ||
| 921 | + # regex to check that it is not just a hex string: | ||
| 922 | + re_dridex_check = re.compile(r'[G-Zg-z]') | ||
| 923 | + for match in re_dridex_string.finditer(vba_code): | ||
| 924 | + value = match.group()[1:-1] | ||
| 925 | + if not re_dridex_check.search(value): | ||
| 926 | + continue | ||
| 927 | + if value not in found: | ||
| 928 | + try: | ||
| 929 | + decoded = DridexUrlDecode(value) | ||
| 930 | + results.append((value, decoded)) | ||
| 931 | + found.add(value) | ||
| 932 | + except: | ||
| 933 | + # if an exception occurs, it is likely not a dridex-encoded string | ||
| 934 | + pass | ||
| 935 | + return results | ||
| 936 | + | ||
| 937 | + | ||
| 938 | +class VBA_Scanner (object): | ||
| 939 | + """ | ||
| 940 | + Class to scan the source code of a VBA module to find obfuscated strings, | ||
| 941 | + suspicious keywords, IOCs, auto-executable macros, etc. | ||
| 942 | + """ | ||
| 943 | + | ||
| 944 | + def __init__(self, vba_code): | ||
| 945 | + """ | ||
| 946 | + VBA_Scanner constructor | ||
| 947 | + | ||
| 948 | + :param vba_code: str, VBA source code to be analyzed | ||
| 949 | + """ | ||
| 950 | + self.code = vba_code | ||
| 951 | + self.code_hex = '' | ||
| 952 | + self.code_hex_rev = '' | ||
| 953 | + self.code_rev_hex = '' | ||
| 954 | + self.code_dridex = '' | ||
| 955 | + | ||
| 956 | + | ||
| 957 | + def scan(self, include_hex_strings=False): | ||
| 958 | + """ | ||
| 959 | + Analyze the provided VBA code to detect suspicious keywords, | ||
| 960 | + auto-executable macros, IOC patterns, obfuscation patterns | ||
| 961 | + such as hex-encoded strings. | ||
| 962 | + | ||
| 963 | + :param include_hex_strings: bool, if True hex-encoded strings will be included with their decoded content. | ||
| 964 | + :return: list of tuples (type, keyword, description) | ||
| 965 | + (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String') | ||
| 966 | + """ | ||
| 967 | + # First, detect and extract hex-encoded strings: | ||
| 968 | + self.hex_strings = detect_hex_strings(self.code) | ||
| 969 | + # detect if the code contains StrReverse: | ||
| 970 | + self.strReverse = False | ||
| 971 | + if 'strreverse' in self.code.lower(): self.strReverse = True | ||
| 972 | + # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: | ||
| 973 | + for encoded, decoded in self.hex_strings: | ||
| 974 | + self.code_hex += '\n'+decoded | ||
| 975 | + # if the code contains "StrReverse", also append the hex strings in reverse order: | ||
| 976 | + if self.strReverse: | ||
| 977 | + # StrReverse after hex decoding: | ||
| 978 | + self.code_hex_rev += '\n'+decoded[::-1] | ||
| 979 | + # StrReverse before hex decoding: | ||
| 980 | + self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1]) | ||
| 981 | + #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ | ||
| 982 | + #TODO: also append the full code reversed if StrReverse? (risk of false positives?) | ||
| 983 | + #TODO: show which IOCs have been found using hex, strrev or both | ||
| 984 | + # Detect Dridex-encoded strings | ||
| 985 | + self.dridex_strings = detect_dridex_strings(self.code) | ||
| 986 | + for encoded, decoded in self.dridex_strings: | ||
| 987 | + self.code_dridex += '\n'+decoded | ||
| 988 | + results = [] | ||
| 989 | + self.autoexec_keywords = [] | ||
| 990 | + self.suspicious_keywords = [] | ||
| 991 | + self.iocs = [] | ||
| 992 | + | ||
| 993 | + for code, obfuscation in ( | ||
| 994 | + (self.code, None), | ||
| 995 | + (self.code_hex, 'Hex'), | ||
| 996 | + (self.code_hex_rev, 'Hex+StrReverse'), | ||
| 997 | + (self.code_rev_hex, 'StrReverse+Hex'), | ||
| 998 | + (self.code_dridex, 'Dridex'), | ||
| 999 | + ): | ||
| 1000 | + self.autoexec_keywords += detect_autoexec(code, obfuscation) | ||
| 1001 | + self.suspicious_keywords += detect_suspicious(code, obfuscation) | ||
| 1002 | + self.iocs += detect_patterns(code, obfuscation) | ||
| 1003 | + | ||
| 1004 | + # If hex-encoded strings were discovered, add an item to suspicious keywords: | ||
| 1005 | + if self.hex_strings: | ||
| 1006 | + self.suspicious_keywords.append(('Hex Strings', | ||
| 1007 | + 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) | ||
| 1008 | + for keyword, description in self.autoexec_keywords: | ||
| 1009 | + results.append(('AutoExec', keyword, description)) | ||
| 1010 | + for keyword, description in self.suspicious_keywords: | ||
| 1011 | + results.append(('Suspicious', keyword, description)) | ||
| 1012 | + for pattern_type, value in self.iocs: | ||
| 1013 | + results.append(('IOC', value, pattern_type)) | ||
| 1014 | + if include_hex_strings: | ||
| 1015 | + for encoded, decoded in self.hex_strings: | ||
| 1016 | + results.append(('Hex String', repr(decoded), encoded)) | ||
| 1017 | + for encoded, decoded in self.dridex_strings: | ||
| 1018 | + results.append(('Dridex string', repr(decoded), encoded)) | ||
| 1019 | + return results | ||
| 1020 | + | ||
| 1021 | + | ||
| 1022 | + | ||
| 1023 | +def scan_vba(vba_code, include_hex_strings): | ||
| 898 | """ | 1024 | """ |
| 899 | Analyze the provided VBA code to detect suspicious keywords, | 1025 | Analyze the provided VBA code to detect suspicious keywords, |
| 900 | auto-executable macros, IOC patterns, obfuscation patterns | 1026 | auto-executable macros, IOC patterns, obfuscation patterns |
| @@ -905,41 +1031,7 @@ def scan_vba(vba_code, include_hex_strings=False): | @@ -905,41 +1031,7 @@ def scan_vba(vba_code, include_hex_strings=False): | ||
| 905 | :return: list of tuples (type, keyword, description) | 1031 | :return: list of tuples (type, keyword, description) |
| 906 | (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String') | 1032 | (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String') |
| 907 | """ | 1033 | """ |
| 908 | - # First, detect and extract hex-encoded strings: | ||
| 909 | - hex_strings = detect_hex_strings(vba_code) | ||
| 910 | - # detect if the code contains StrReverse: | ||
| 911 | - if 'strreverse' in vba_code.lower(): strreverse = True | ||
| 912 | - else: strreverse = False | ||
| 913 | - # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: | ||
| 914 | - for encoded, decoded in hex_strings: | ||
| 915 | - vba_code += '\n'+decoded | ||
| 916 | - # if the code contains "StrReverse", also append the hex strings in reverse order: | ||
| 917 | - if strreverse: | ||
| 918 | - # StrReverse after hex decoding: | ||
| 919 | - vba_code += '\n'+decoded[::-1] | ||
| 920 | - # StrReverse before hex decoding: | ||
| 921 | - vba_code += '\n'+binascii.unhexlify(encoded[::-1]) | ||
| 922 | - #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ | ||
| 923 | - #TODO: also append the full code reversed if StrReverse? (risk of false positives?) | ||
| 924 | - #TODO: show which IOCs have been found using hex, strrev or both | ||
| 925 | - autoexec_keywords = detect_autoexec(vba_code) | ||
| 926 | - suspicious_keywords = detect_suspicious(vba_code) | ||
| 927 | - # If hex-encoded strings were discovered, add an item to suspicious keywords: | ||
| 928 | - if hex_strings: | ||
| 929 | - suspicious_keywords.append(('Hex Strings', | ||
| 930 | - 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) | ||
| 931 | - patterns = detect_patterns(vba_code) | ||
| 932 | - results = [] | ||
| 933 | - for keyword, description in autoexec_keywords: | ||
| 934 | - results.append(('AutoExec', keyword, description)) | ||
| 935 | - for keyword, description in suspicious_keywords: | ||
| 936 | - results.append(('Suspicious', keyword, description)) | ||
| 937 | - for pattern_type, value in patterns: | ||
| 938 | - results.append(('IOC', value, pattern_type)) | ||
| 939 | - if include_hex_strings: | ||
| 940 | - for encoded, decoded in hex_strings: | ||
| 941 | - results.append(('Hex String', repr(decoded), encoded)) | ||
| 942 | - return results | 1034 | + return VBA_Scanner(vba_code).scan(include_hex_strings) |
| 943 | 1035 | ||
| 944 | 1036 | ||
| 945 | #=== CLASSES ================================================================= | 1037 | #=== CLASSES ================================================================= |
| @@ -1226,7 +1318,7 @@ def process_file (container, filename, data, show_hex_strings=False): | @@ -1226,7 +1318,7 @@ def process_file (container, filename, data, show_hex_strings=False): | ||
| 1226 | else: | 1318 | else: |
| 1227 | print 'No VBA macros found.' | 1319 | print 'No VBA macros found.' |
| 1228 | except: #TypeError: | 1320 | except: #TypeError: |
| 1229 | - #raise | 1321 | + raise |
| 1230 | #TODO: print more info if debug mode | 1322 | #TODO: print more info if debug mode |
| 1231 | print sys.exc_value | 1323 | print sys.exc_value |
| 1232 | print '' | 1324 | print '' |