Commit ebdb4e2d20ee635b2733be4b3222a1f74486d352

Authored by Philippe Lagadec
1 parent 56759d61

olevba: added new triage mode, options -t and -d

Showing 1 changed file with 128 additions and 9 deletions
oletools/olevba.py
@@ -98,17 +98,18 @@ https://github.com/unixfreak0037/officeparser @@ -98,17 +98,18 @@ https://github.com/unixfreak0037/officeparser
98 # 2015-01-08 v0.14 PL: - added hex strings detection and decoding 98 # 2015-01-08 v0.14 PL: - added hex strings detection and decoding
99 # - fixed issue #2, decoding VBA stream names using 99 # - fixed issue #2, decoding VBA stream names using
100 # specified codepage and unicode stream names 100 # specified codepage and unicode stream names
  101 +# 2015-01-11 v0.15 PL: - added new triage mode, options -t and -d
101 102
102 -__version__ = '0.14' 103 +__version__ = '0.15'
103 104
104 #------------------------------------------------------------------------------ 105 #------------------------------------------------------------------------------
105 # TODO: 106 # TODO:
106 # + do not use logging, but a provided logger (null logger by default) 107 # + do not use logging, but a provided logger (null logger by default)
107 # + setup logging (common with other oletools) 108 # + setup logging (common with other oletools)
108 -# + update readme, wiki and decalage.info, pypi (link to sample files)  
109 109
110 # TODO later: 110 # TODO later:
111 -# - append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords 111 +# + append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords
  112 +# + do not show hex strings by default (add option --hex)
112 # + performance improvement: instead of searching each keyword separately, 113 # + performance improvement: instead of searching each keyword separately,
113 # first split vba code into a list of words (per line), then check each 114 # first split vba code into a list of words (per line), then check each
114 # word against a dict. (or put vba words into a set/dict?) 115 # word against a dict. (or put vba words into a set/dict?)
@@ -150,6 +151,9 @@ from thirdparty.xglob import xglob @@ -150,6 +151,9 @@ from thirdparty.xglob import xglob
150 151
151 #--- CONSTANTS ---------------------------------------------------------------- 152 #--- CONSTANTS ----------------------------------------------------------------
152 153
  154 +TYPE_OLE = 'OLE'
  155 +TYPE_OpenXML = 'OpenXML'
  156 +
153 MODULE_EXTENSION = "bas" 157 MODULE_EXTENSION = "bas"
154 CLASS_EXTENSION = "cls" 158 CLASS_EXTENSION = "cls"
155 FORM_EXTENSION = "frm" 159 FORM_EXTENSION = "frm"
@@ -237,7 +241,7 @@ RE_PATTERNS = ( @@ -237,7 +241,7 @@ RE_PATTERNS = (
237 ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), 241 ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')),
238 ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), 242 ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")),
239 ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), 243 ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')),
240 - ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')), 244 + # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')),
241 ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")), 245 ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")),
242 # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ 246 # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
243 #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types 247 #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types
@@ -917,14 +921,15 @@ class VBA_Parser(object): @@ -917,14 +921,15 @@ class VBA_Parser(object):
917 # This looks like an OLE file 921 # This looks like an OLE file
918 logging.info('Parsing OLE file %s' % self.filename) 922 logging.info('Parsing OLE file %s' % self.filename)
919 self.ole_file = olefile.OleFileIO(_file) 923 self.ole_file = olefile.OleFileIO(_file)
920 - self.type = 'OLE' 924 + self.type = TYPE_OLE
  925 + #TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet
921 elif zipfile.is_zipfile(_file): 926 elif zipfile.is_zipfile(_file):
922 # This looks like a zip file, need to look for vbaProject.bin inside 927 # This looks like a zip file, need to look for vbaProject.bin inside
923 # It can be any OLE file inside the archive 928 # It can be any OLE file inside the archive
924 #...because vbaProject.bin can be renamed: 929 #...because vbaProject.bin can be renamed:
925 # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18 930 # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18
926 logging.info('Opening ZIP/OpenXML file %s' % self.filename) 931 logging.info('Opening ZIP/OpenXML file %s' % self.filename)
927 - self.type = 'OpenXML' 932 + self.type = TYPE_OpenXML
928 z = zipfile.ZipFile(_file) 933 z = zipfile.ZipFile(_file)
929 #TODO: check if this is actually an OpenXML file 934 #TODO: check if this is actually an OpenXML file
930 # check each file within the zip if it is an OLE file, by reading its magic: 935 # check each file within the zip if it is an OLE file, by reading its magic:
@@ -1155,6 +1160,86 @@ def process_file (container, filename, data): @@ -1155,6 +1160,86 @@ def process_file (container, filename, data):
1155 print '' 1160 print ''
1156 1161
1157 1162
  1163 +def process_file_triage (container, filename, data):
  1164 + """
  1165 + Process a single file
  1166 +
  1167 + :param container: str, path and filename of container if the file is within
  1168 + a zip archive, None otherwise.
  1169 + :param filename: str, path and filename of file on disk, or within the container.
  1170 + :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
  1171 + """
  1172 + #TODO: replace print by writing to a provided output file (sys.stdout by default)
  1173 + nb_macros = 0
  1174 + nb_autoexec = 0
  1175 + nb_suspicious = 0
  1176 + nb_iocs = 0
  1177 + nb_hexstrings = 0
  1178 + # ftype = 'Other'
  1179 + message = ''
  1180 + try:
  1181 + #TODO: handle olefile errors, when an OLE file is malformed
  1182 + vba = VBA_Parser(filename, data)
  1183 + if vba.detect_vba_macros():
  1184 + for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
  1185 + nb_macros += 1
  1186 + if vba_code.strip() != '':
  1187 + nb_autoexec += len(detect_autoexec(vba_code))
  1188 + nb_suspicious += len(detect_suspicious(vba_code))
  1189 + nb_iocs += len(detect_patterns(vba_code))
  1190 + nb_hexstrings += len(detect_hex_strings(vba_code))
  1191 + if vba.type == TYPE_OLE:
  1192 + flags = 'O'
  1193 + else:
  1194 + flags = 'X'
  1195 + macros = autoexec = suspicious = iocs = hexstrings = '-'
  1196 + if nb_macros: macros = 'M'
  1197 + if nb_autoexec: autoexec = 'A'
  1198 + if nb_suspicious: suspicious = 'S'
  1199 + if nb_iocs: iocs = 'I'
  1200 + if nb_hexstrings: hexstrings = 'H'
  1201 + flags += '%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings)
  1202 +
  1203 + # macros = autoexec = suspicious = iocs = hexstrings = 'no'
  1204 + # if nb_macros: macros = 'YES:%d' % nb_macros
  1205 + # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
  1206 + # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
  1207 + # if nb_iocs: iocs = 'YES:%d' % nb_iocs
  1208 + # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
  1209 + # # 2nd line = info
  1210 + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (vba.type, macros, autoexec, suspicious, iocs, hexstrings)
  1211 + except TypeError:
  1212 + # file type not OLE nor OpenXML
  1213 + flags = '?'
  1214 + message = 'File format not supported'
  1215 + except:
  1216 + # another error occurred
  1217 + #raise
  1218 + #TODO: print more info if debug mode
  1219 + #TODO: distinguish real errors from incorrect file types
  1220 + flags = '!ERROR'
  1221 + message = sys.exc_value
  1222 + line = '%-6s %s' % (flags, filename)
  1223 + if message:
  1224 + line += ' - %s' % message
  1225 + print line
  1226 +
  1227 + # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
  1228 + # header=False, border=False)
  1229 + # t.align = 'l'
  1230 + # t.max_width['filename'] = 30
  1231 + # t.max_width['type'] = 10
  1232 + # t.max_width['macros'] = 6
  1233 + # t.max_width['autoexec'] = 6
  1234 + # t.max_width['suspicious'] = 6
  1235 + # t.max_width['ioc'] = 6
  1236 + # t.max_width['hexstrings'] = 6
  1237 + # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))
  1238 + # print t
  1239 +
  1240 +def main_triage_quick():
  1241 + pass
  1242 +
1158 #=== MAIN ===================================================================== 1243 #=== MAIN =====================================================================
1159 1244
1160 def main(): 1245 def main():
@@ -1173,20 +1258,54 @@ def main(): @@ -1173,20 +1258,54 @@ def main():
1173 help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') 1258 help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
1174 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', 1259 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
1175 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') 1260 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
  1261 + parser.add_option("-t", action="store_true", dest="triage_mode",
  1262 + help='triage mode, display results as a summary table (default for multiple files)')
  1263 + parser.add_option("-d", action="store_true", dest="detailed_mode",
  1264 + help='detailed mode, display full results (default for single file)')
1176 1265
1177 (options, args) = parser.parse_args() 1266 (options, args) = parser.parse_args()
1178 1267
1179 - # Print help if no argurments are passed 1268 + # Print help if no arguments are passed
1180 if len(args) == 0: 1269 if len(args) == 0:
1181 print __doc__ 1270 print __doc__
1182 parser.print_help() 1271 parser.print_help()
1183 sys.exit() 1272 sys.exit()
1184 1273
1185 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) 1274 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO)
1186 - 1275 + # For now, all logging is disabled:
  1276 + logging.disable(logging.CRITICAL)
  1277 +
  1278 + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
  1279 + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
  1280 + if not options.detailed_mode or options.triage_mode:
  1281 + print '%-6s %-72s' % ('Flags', 'Filename')
  1282 + print '%-6s %-72s' % ('-'*6, '-'*72)
  1283 + previous_container = None
  1284 + count = 0
  1285 + container = filename = data = None
1187 for container, filename, data in xglob.iter_files(args, recursive=options.recursive, 1286 for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
1188 zip_password=options.zip_password, zip_fname=options.zip_fname): 1287 zip_password=options.zip_password, zip_fname=options.zip_fname):
1189 - #data = open(filespec, 'rb').read() 1288 + # ignore directory names stored in zip files:
  1289 + if container and filename.endswith('/'):
  1290 + continue
  1291 + if options.detailed_mode and not options.triage_mode:
  1292 + # fully detailed output
  1293 + process_file(container, filename, data)
  1294 + else:
  1295 + # print container name when it changes:
  1296 + if container != previous_container:
  1297 + if container is not None:
  1298 + print '\nFiles in %s:' % container
  1299 + previous_container = container
  1300 + # summarized output for triage:
  1301 + process_file_triage(container, filename, data)
  1302 + count += 1
  1303 + if not options.detailed_mode or options.triage_mode:
  1304 + print '\n(Flags: O=OLE, X=OpenXML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex-encoded strings, ?=Unknown)\n'
  1305 +
  1306 + if count == 1 and not options.triage_mode and not options.detailed_mode:
  1307 + # if options -t and -d were not specified and it's a single file, print details:
  1308 + #TODO: avoid doing the analysis twice by storing results
1190 process_file(container, filename, data) 1309 process_file(container, filename, data)
1191 1310
1192 if __name__ == '__main__': 1311 if __name__ == '__main__':