Commit 782a526787afb7139b610009e50e3514f9f4d3a5

Authored by Philippe Lagadec
1 parent 249db149

olevba: removed .com from the list of executable extensions, added scan_vba to r…

…un all detection algorithms, decoded hex strings are now also scanned
Showing 1 changed file with 46 additions and 16 deletions
oletools/olevba.py
... ... @@ -102,8 +102,11 @@ https://github.com/unixfreak0037/officeparser
102 102 # 2015-01-16 v0.16 PL: - fix for issue #3 (exception when module name="text")
103 103 # - added several suspicious keywords
104 104 # - added option -i to analyze VBA source code directly
  105 +# 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions
  106 +# - added scan_vba to run all detection algorithms
  107 +# - decoded hex strings are now also scanned
105 108  
106   -__version__ = '0.16'
  109 +__version__ = '0.17'
107 110  
108 111 #------------------------------------------------------------------------------
109 112 # TODO:
... ... @@ -248,7 +251,8 @@ RE_PATTERNS = (
248 251 ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")),
249 252 ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')),
250 253 # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')),
251   - ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")),
  254 + # Executable file name with known extensions (except .com which is present in many URLs):
  255 + ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")),
252 256 # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
253 257 #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types
254 258 #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')),
... ... @@ -882,6 +886,41 @@ def detect_hex_strings(vba_code):
882 886 return results
883 887  
884 888  
  889 +def scan_vba(vba_code):
  890 + """
  891 + Analyze the provided VBA code to detect suspicious keywords,
  892 + auto-executable macros, IOC patterns, obfuscation patterns
  893 + such as hex-encoded strings.
  894 +
  895 + :param vba_code: str, VBA source code to be analyzed
  896 + :return: list of tuples (type, keyword, description)
  897 + (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String')
  898 + """
  899 + # First, detect and extract hex-encoded strings:
  900 + hex_strings = detect_hex_strings(vba_code)
  901 + # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
  902 + for encoded, decoded in hex_strings:
  903 + vba_code += '\n'+decoded
  904 + #TODO: also add reverse strings (before and after decoding), for StrReverse obfuscation
  905 + autoexec_keywords = detect_autoexec(vba_code)
  906 + suspicious_keywords = detect_suspicious(vba_code)
  907 + # If hex-encoded strings were discovered, add an item to suspicious keywords:
  908 + if hex_strings:
  909 + suspicious_keywords.append(('Hex Strings', 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
  910 + patterns = detect_patterns(vba_code)
  911 + results = []
  912 + for keyword, description in autoexec_keywords:
  913 + results.append(('AutoExec', keyword, description))
  914 + for keyword, description in suspicious_keywords:
  915 + results.append(('Suspicious', keyword, description))
  916 + for pattern_type, value in patterns:
  917 + results.append(('IOC', value, pattern_type))
  918 + # Only if option --hex:
  919 + # for encoded, decoded in hex_strings:
  920 + # results.append(('Hex String', repr(decoded), encoded))
  921 + return results
  922 +
  923 +
885 924 #=== CLASSES =================================================================
886 925  
887 926 class VBA_Parser(object):
... ... @@ -1106,27 +1145,18 @@ def print_analysis(vba_code):
1106 1145 :param vba_code: str, VBA source code to be analyzed
1107 1146 :return: None
1108 1147 """
1109   - autoexec_keywords = detect_autoexec(vba_code)
1110   - suspicious_keywords = detect_suspicious(vba_code)
1111   - patterns = detect_patterns(vba_code)
1112   - hex_strings = detect_hex_strings(vba_code)
1113   - if autoexec_keywords or suspicious_keywords or patterns:
  1148 + results = scan_vba(vba_code)
  1149 + if results:
1114 1150 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
1115 1151 t.align = 'l'
1116 1152 t.max_width['Type'] = 10
1117 1153 t.max_width['Keyword'] = 20
1118 1154 t.max_width['Description'] = 39
1119   - for keyword, description in autoexec_keywords:
1120   - t.add_row(('AutoExec', keyword, description))
1121   - for keyword, description in suspicious_keywords:
1122   - t.add_row(('Suspicious', keyword, description))
1123   - for pattern_type, value in patterns:
1124   - t.add_row(('IOC', value, pattern_type))
1125   - for encoded, decoded in hex_strings:
1126   - t.add_row(('Hex String', repr(decoded), encoded))
  1155 + for kw_type, keyword, description in results:
  1156 + t.add_row((kw_type, keyword, description))
1127 1157 print t
1128 1158 else:
1129   - print 'No suspicious keyword or pattern found.'
  1159 + print 'No suspicious keyword or IOC found.'
1130 1160  
1131 1161  
1132 1162  
... ...