Commit 9f45875afa47440c5c5ece443171da2e617e5658

Authored by Philippe Lagadec
1 parent 85f94f92

olevba: added hex strings detection and decoding

Showing 1 changed file with 29 additions and 1 deletions
oletools/olevba.py
@@ -95,8 +95,9 @@ https://github.com/unixfreak0037/officeparser @@ -95,8 +95,9 @@ https://github.com/unixfreak0037/officeparser
95 # - process_file: improved display, shows container file 95 # - process_file: improved display, shows container file
96 # - improved list of executable file extensions 96 # - improved list of executable file extensions
97 # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display 97 # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display
  98 +# 2015-01-08 v0.14 PL: - added hex strings detection and decoding
98 99
99 -__version__ = '0.13' 100 +__version__ = '0.14'
100 101
101 #------------------------------------------------------------------------------ 102 #------------------------------------------------------------------------------
102 # TODO: 103 # TODO:
@@ -105,6 +106,7 @@ __version__ = '0.13' @@ -105,6 +106,7 @@ __version__ = '0.13'
105 # + update readme, wiki and decalage.info, pypi (link to sample files) 106 # + update readme, wiki and decalage.info, pypi (link to sample files)
106 107
107 # TODO later: 108 # TODO later:
  109 +# - append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords
108 # + performance improvement: instead of searching each keyword separately, 110 # + performance improvement: instead of searching each keyword separately,
109 # first split vba code into a list of words (per line), then check each 111 # first split vba code into a list of words (per line), then check each
110 # word against a dict. (or put vba words into a set/dict?) 112 # word against a dict. (or put vba words into a set/dict?)
@@ -138,6 +140,7 @@ import zipfile @@ -138,6 +140,7 @@ import zipfile
138 import re 140 import re
139 import optparse 141 import optparse
140 import os.path 142 import os.path
  143 +import binascii
141 144
142 import thirdparty.olefile as olefile 145 import thirdparty.olefile as olefile
143 from thirdparty.prettytable import prettytable 146 from thirdparty.prettytable import prettytable
@@ -236,8 +239,12 @@ RE_PATTERNS = ( @@ -236,8 +239,12 @@ RE_PATTERNS = (
236 ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")), 239 ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")),
237 # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ 240 # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
238 #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types 241 #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types
  242 + #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')),
239 ) 243 )
240 244
  245 +# regex to detect strings encoded in hexadecimal
  246 +re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
  247 +
241 248
242 #--- FUNCTIONS ---------------------------------------------------------------- 249 #--- FUNCTIONS ----------------------------------------------------------------
243 250
@@ -839,6 +846,24 @@ def detect_patterns(vba_code): @@ -839,6 +846,24 @@ def detect_patterns(vba_code):
839 return results 846 return results
840 847
841 848
  849 +def detect_hex_strings(vba_code):
  850 + """
  851 + Detect if the VBA code contains strings encoded in hexadecimal.
  852 +
  853 + :param vba_code: str, VBA source code
  854 + :return: list of str tuples (encoded string, decoded string)
  855 + """
  856 + results = []
  857 + found = set()
  858 + for match in re_hex_string.finditer(vba_code):
  859 + value = match.group()
  860 + if value not in found:
  861 + decoded = binascii.unhexlify(value)
  862 + results.append((value, decoded))
  863 + found.add(value)
  864 + return results
  865 +
  866 +
842 #=== CLASSES ================================================================= 867 #=== CLASSES =================================================================
843 868
844 class VBA_Parser(object): 869 class VBA_Parser(object):
@@ -1094,6 +1119,7 @@ def process_file (container, filename, data): @@ -1094,6 +1119,7 @@ def process_file (container, filename, data):
1094 autoexec_keywords = detect_autoexec(vba_code) 1119 autoexec_keywords = detect_autoexec(vba_code)
1095 suspicious_keywords = detect_suspicious(vba_code) 1120 suspicious_keywords = detect_suspicious(vba_code)
1096 patterns = detect_patterns(vba_code) 1121 patterns = detect_patterns(vba_code)
  1122 + hex_strings = detect_hex_strings(vba_code)
1097 if autoexec_keywords or suspicious_keywords or patterns: 1123 if autoexec_keywords or suspicious_keywords or patterns:
1098 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description')) 1124 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
1099 t.align = 'l' 1125 t.align = 'l'
@@ -1106,6 +1132,8 @@ def process_file (container, filename, data): @@ -1106,6 +1132,8 @@ def process_file (container, filename, data):
1106 t.add_row(('Suspicious', keyword, description)) 1132 t.add_row(('Suspicious', keyword, description))
1107 for pattern_type, value in patterns: 1133 for pattern_type, value in patterns:
1108 t.add_row(('IOC', value, pattern_type)) 1134 t.add_row(('IOC', value, pattern_type))
  1135 + for encoded, decoded in hex_strings:
  1136 + t.add_row(('Hex String', repr(decoded), encoded))
1109 print t 1137 print t
1110 else: 1138 else:
1111 print 'No suspicious keyword or pattern found.' 1139 print 'No suspicious keyword or pattern found.'