Commit 9f45875afa47440c5c5ece443171da2e617e5658

Authored by Philippe Lagadec
1 parent 85f94f92

olevba: added hex strings detection and decoding

Showing 1 changed file with 29 additions and 1 deletions
oletools/olevba.py
... ... @@ -95,8 +95,9 @@ https://github.com/unixfreak0037/officeparser
95 95 # - process_file: improved display, shows container file
96 96 # - improved list of executable file extensions
97 97 # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display
  98 +# 2015-01-08 v0.14 PL: - added hex strings detection and decoding
98 99  
99   -__version__ = '0.13'
  100 +__version__ = '0.14'
100 101  
101 102 #------------------------------------------------------------------------------
102 103 # TODO:
... ... @@ -105,6 +106,7 @@ __version__ = '0.13'
105 106 # + update readme, wiki and decalage.info, pypi (link to sample files)
106 107  
107 108 # TODO later:
  109 +# - append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords
108 110 # + performance improvement: instead of searching each keyword separately,
109 111 # first split vba code into a list of words (per line), then check each
110 112 # word against a dict. (or put vba words into a set/dict?)
... ... @@ -138,6 +140,7 @@ import zipfile
138 140 import re
139 141 import optparse
140 142 import os.path
  143 +import binascii
141 144  
142 145 import thirdparty.olefile as olefile
143 146 from thirdparty.prettytable import prettytable
... ... @@ -236,8 +239,12 @@ RE_PATTERNS = (
236 239 ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")),
237 240 # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
238 241 #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types
  242 + #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')),
239 243 )
240 244  
  245 +# regex to detect strings encoded in hexadecimal
  246 +re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
  247 +
241 248  
242 249 #--- FUNCTIONS ----------------------------------------------------------------
243 250  
... ... @@ -839,6 +846,24 @@ def detect_patterns(vba_code):
839 846 return results
840 847  
841 848  
  849 +def detect_hex_strings(vba_code):
  850 + """
  851 + Detect if the VBA code contains strings encoded in hexadecimal.
  852 +
  853 + :param vba_code: str, VBA source code
  854 + :return: list of str tuples (encoded string, decoded string)
  855 + """
  856 + results = []
  857 + found = set()
  858 + for match in re_hex_string.finditer(vba_code):
  859 + value = match.group()
  860 + if value not in found:
  861 + decoded = binascii.unhexlify(value)
  862 + results.append((value, decoded))
  863 + found.add(value)
  864 + return results
  865 +
  866 +
842 867 #=== CLASSES =================================================================
843 868  
844 869 class VBA_Parser(object):
... ... @@ -1094,6 +1119,7 @@ def process_file (container, filename, data):
1094 1119 autoexec_keywords = detect_autoexec(vba_code)
1095 1120 suspicious_keywords = detect_suspicious(vba_code)
1096 1121 patterns = detect_patterns(vba_code)
  1122 + hex_strings = detect_hex_strings(vba_code)
1097 1123 if autoexec_keywords or suspicious_keywords or patterns:
1098 1124 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
1099 1125 t.align = 'l'
... ... @@ -1106,6 +1132,8 @@ def process_file (container, filename, data):
1106 1132 t.add_row(('Suspicious', keyword, description))
1107 1133 for pattern_type, value in patterns:
1108 1134 t.add_row(('IOC', value, pattern_type))
  1135 + for encoded, decoded in hex_strings:
  1136 + t.add_row(('Hex String', repr(decoded), encoded))
1109 1137 print t
1110 1138 else:
1111 1139 print 'No suspicious keyword or pattern found.'
... ...