Commit 732e9a0ab1fbee9333c0ab13774d0e9e5ebcd1a4

Authored by Philippe Lagadec
1 parent bcbb6086

olevba: added Dridex obfuscation decoding, improved display, shows obfuscation name

Showing 1 changed file with 136 additions and 44 deletions
oletools/olevba.py
... ... @@ -109,8 +109,10 @@ https://github.com/unixfreak0037/officeparser
109 109 # 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex
110 110 # strings and StrReverse
111 111 # 2015-01-26 v0.20 PL: - added option --hex to show all hex strings decoded
  112 +# 2015-01-29 v0.21 PL: - added Dridex obfuscation decoding
  113 +# - improved display, shows obfuscation name
112 114  
113   -__version__ = '0.20'
  115 +__version__ = '0.21'
114 116  
115 117 #------------------------------------------------------------------------------
116 118 # TODO:
... ... @@ -814,50 +816,58 @@ def filter_vba(vba_code):
814 816 return vba
815 817  
816 818  
817   -def detect_autoexec(vba_code):
  819 +def detect_autoexec(vba_code, obfuscation=None):
818 820 """
819 821 Detect if the VBA code contains keywords corresponding to macros running
820 822 automatically when triggered by specific actions (e.g. when a document is
821 823 opened or closed).
822 824  
823 825 :param vba_code: str, VBA source code
  826 + :param obfuscation: None or str, name of obfuscation to be added to description
824 827 :return: list of str tuples (keyword, description)
825 828 """
826 829 #TODO: merge code with detect_suspicious
827 830 # case-insensitive search
828 831 #vba_code = vba_code.lower()
829 832 results = []
  833 + obf_text = ''
  834 + if obfuscation:
  835 + obf_text = ' (obfuscation: %s)' % obfuscation
830 836 for description, keywords in AUTOEXEC_KEYWORDS.items():
831 837 for keyword in keywords:
832 838 #TODO: if keyword is already a compiled regex, use it as-is
833 839 # search using regex to detect word boundaries:
834 840 if re.search(r'(?i)\b'+keyword+r'\b', vba_code):
835 841 #if keyword.lower() in vba_code:
836   - results.append((keyword, description))
  842 + results.append((keyword, description+obf_text))
837 843 return results
838 844  
839 845  
840   -def detect_suspicious(vba_code):
  846 +def detect_suspicious(vba_code, obfuscation=None):
841 847 """
842 848 Detect if the VBA code contains suspicious keywords corresponding to
843 849 potential malware behaviour.
844 850  
845 851 :param vba_code: str, VBA source code
  852 + :param obfuscation: None or str, name of obfuscation to be added to description
846 853 :return: list of str tuples (keyword, description)
847 854 """
848 855 # case-insensitive search
849 856 #vba_code = vba_code.lower()
850 857 results = []
  858 + obf_text = ''
  859 + if obfuscation:
  860 + obf_text = ' (obfuscation: %s)' % obfuscation
851 861 for description, keywords in SUSPICIOUS_KEYWORDS.items():
852 862 for keyword in keywords:
853 863 # search using regex to detect word boundaries:
854 864 if re.search(r'(?i)\b'+keyword+r'\b', vba_code):
855 865 #if keyword.lower() in vba_code:
856   - results.append((keyword, description))
  866 + results.append((keyword, description+obf_text))
857 867 return results
858 868  
859 869  
860   -def detect_patterns(vba_code):
  870 +def detect_patterns(vba_code, obfuscation=None):
861 871 """
862 872 Detect if the VBA code contains specific patterns such as IP addresses,
863 873 URLs, e-mail addresses, executable file names, etc.
... ... @@ -867,11 +877,14 @@ def detect_patterns(vba_code):
867 877 """
868 878 results = []
869 879 found = set()
  880 + obf_text = ''
  881 + if obfuscation:
  882 + obf_text = ' (obfuscation: %s)' % obfuscation
870 883 for pattern_type, pattern_re in RE_PATTERNS:
871 884 for match in pattern_re.finditer(vba_code):
872 885 value = match.group()
873 886 if value not in found:
874   - results.append((pattern_type, value))
  887 + results.append((pattern_type+obf_text, value))
875 888 found.add(value)
876 889 return results
877 890  
... ... @@ -894,7 +907,120 @@ def detect_hex_strings(vba_code):
894 907 return results
895 908  
896 909  
897   -def scan_vba(vba_code, include_hex_strings=False):
  910 +def detect_dridex_strings(vba_code):
  911 + """
  912 + Detect if the VBA code contains strings obfuscated with a specific algorithm found in Dridex samples.
  913 +
  914 + :param vba_code: str, VBA source code
  915 + :return: list of str tuples (encoded string, decoded string)
  916 + """
  917 + from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode
  918 + results = []
  919 + found = set()
  920 + re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"')
  921 + # regex to check that it is not just a hex string:
  922 + re_dridex_check = re.compile(r'[G-Zg-z]')
  923 + for match in re_dridex_string.finditer(vba_code):
  924 + value = match.group()[1:-1]
  925 + if not re_dridex_check.search(value):
  926 + continue
  927 + if value not in found:
  928 + try:
  929 + decoded = DridexUrlDecode(value)
  930 + results.append((value, decoded))
  931 + found.add(value)
  932 + except:
  933 + # if an exception occurs, it is likely not a dridex-encoded string
  934 + pass
  935 + return results
  936 +
  937 +
  938 +class VBA_Scanner (object):
  939 + """
  940 + Class to scan the source code of a VBA module to find obfuscated strings,
  941 + suspicious keywords, IOCs, auto-executable macros, etc.
  942 + """
  943 +
  944 + def __init__(self, vba_code):
  945 + """
  946 + VBA_Scanner constructor
  947 +
  948 + :param vba_code: str, VBA source code to be analyzed
  949 + """
  950 + self.code = vba_code
  951 + self.code_hex = ''
  952 + self.code_hex_rev = ''
  953 + self.code_rev_hex = ''
  954 + self.code_dridex = ''
  955 +
  956 +
  957 + def scan(self, include_hex_strings=False):
  958 + """
  959 + Analyze the provided VBA code to detect suspicious keywords,
  960 + auto-executable macros, IOC patterns, obfuscation patterns
  961 + such as hex-encoded strings.
  962 +
  963 + :param include_hex_strings: bool, if True hex-encoded strings will be included with their decoded content.
  964 + :return: list of tuples (type, keyword, description)
  965 + (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String')
  966 + """
  967 + # First, detect and extract hex-encoded strings:
  968 + self.hex_strings = detect_hex_strings(self.code)
  969 + # detect if the code contains StrReverse:
  970 + self.strReverse = False
  971 + if 'strreverse' in self.code.lower(): self.strReverse = True
  972 + # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
  973 + for encoded, decoded in self.hex_strings:
  974 + self.code_hex += '\n'+decoded
  975 + # if the code contains "StrReverse", also append the hex strings in reverse order:
  976 + if self.strReverse:
  977 + # StrReverse after hex decoding:
  978 + self.code_hex_rev += '\n'+decoded[::-1]
  979 + # StrReverse before hex decoding:
  980 + self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1])
  981 + #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
  982 + #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
  983 + #TODO: show which IOCs have been found using hex, strrev or both
  984 + # Detect Dridex-encoded strings
  985 + self.dridex_strings = detect_dridex_strings(self.code)
  986 + for encoded, decoded in self.dridex_strings:
  987 + self.code_dridex += '\n'+decoded
  988 + results = []
  989 + self.autoexec_keywords = []
  990 + self.suspicious_keywords = []
  991 + self.iocs = []
  992 +
  993 + for code, obfuscation in (
  994 + (self.code, None),
  995 + (self.code_hex, 'Hex'),
  996 + (self.code_hex_rev, 'Hex+StrReverse'),
  997 + (self.code_rev_hex, 'StrReverse+Hex'),
  998 + (self.code_dridex, 'Dridex'),
  999 + ):
  1000 + self.autoexec_keywords += detect_autoexec(code, obfuscation)
  1001 + self.suspicious_keywords += detect_suspicious(code, obfuscation)
  1002 + self.iocs += detect_patterns(code, obfuscation)
  1003 +
  1004 + # If hex-encoded strings were discovered, add an item to suspicious keywords:
  1005 + if self.hex_strings:
  1006 + self.suspicious_keywords.append(('Hex Strings',
  1007 + 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
  1008 + for keyword, description in self.autoexec_keywords:
  1009 + results.append(('AutoExec', keyword, description))
  1010 + for keyword, description in self.suspicious_keywords:
  1011 + results.append(('Suspicious', keyword, description))
  1012 + for pattern_type, value in self.iocs:
  1013 + results.append(('IOC', value, pattern_type))
  1014 + if include_hex_strings:
  1015 + for encoded, decoded in self.hex_strings:
  1016 + results.append(('Hex String', repr(decoded), encoded))
  1017 + for encoded, decoded in self.dridex_strings:
  1018 + results.append(('Dridex string', repr(decoded), encoded))
  1019 + return results
  1020 +
  1021 +
  1022 +
  1023 +def scan_vba(vba_code, include_hex_strings):
898 1024 """
899 1025 Analyze the provided VBA code to detect suspicious keywords,
900 1026 auto-executable macros, IOC patterns, obfuscation patterns
... ... @@ -905,41 +1031,7 @@ def scan_vba(vba_code, include_hex_strings=False):
905 1031 :return: list of tuples (type, keyword, description)
906 1032 (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String')
907 1033 """
908   - # First, detect and extract hex-encoded strings:
909   - hex_strings = detect_hex_strings(vba_code)
910   - # detect if the code contains StrReverse:
911   - if 'strreverse' in vba_code.lower(): strreverse = True
912   - else: strreverse = False
913   - # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
914   - for encoded, decoded in hex_strings:
915   - vba_code += '\n'+decoded
916   - # if the code contains "StrReverse", also append the hex strings in reverse order:
917   - if strreverse:
918   - # StrReverse after hex decoding:
919   - vba_code += '\n'+decoded[::-1]
920   - # StrReverse before hex decoding:
921   - vba_code += '\n'+binascii.unhexlify(encoded[::-1])
922   - #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
923   - #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
924   - #TODO: show which IOCs have been found using hex, strrev or both
925   - autoexec_keywords = detect_autoexec(vba_code)
926   - suspicious_keywords = detect_suspicious(vba_code)
927   - # If hex-encoded strings were discovered, add an item to suspicious keywords:
928   - if hex_strings:
929   - suspicious_keywords.append(('Hex Strings',
930   - 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
931   - patterns = detect_patterns(vba_code)
932   - results = []
933   - for keyword, description in autoexec_keywords:
934   - results.append(('AutoExec', keyword, description))
935   - for keyword, description in suspicious_keywords:
936   - results.append(('Suspicious', keyword, description))
937   - for pattern_type, value in patterns:
938   - results.append(('IOC', value, pattern_type))
939   - if include_hex_strings:
940   - for encoded, decoded in hex_strings:
941   - results.append(('Hex String', repr(decoded), encoded))
942   - return results
  1034 + return VBA_Scanner(vba_code).scan(include_hex_strings)
943 1035  
944 1036  
945 1037 #=== CLASSES =================================================================
... ... @@ -1226,7 +1318,7 @@ def process_file (container, filename, data, show_hex_strings=False):
1226 1318 else:
1227 1319 print 'No VBA macros found.'
1228 1320 except: #TypeError:
1229   - #raise
  1321 + raise
1230 1322 #TODO: print more info if debug mode
1231 1323 print sys.exc_value
1232 1324 print ''
... ...