Commit ebdb4e2d20ee635b2733be4b3222a1f74486d352

Authored by Philippe Lagadec
1 parent 56759d61

olevba: added new triage mode, options -t and -d

Showing 1 changed file with 128 additions and 9 deletions
oletools/olevba.py
... ... @@ -98,17 +98,18 @@ https://github.com/unixfreak0037/officeparser
98 98 # 2015-01-08 v0.14 PL: - added hex strings detection and decoding
99 99 # - fixed issue #2, decoding VBA stream names using
100 100 # specified codepage and unicode stream names
  101 +# 2015-01-11 v0.15 PL: - added new triage mode, options -t and -d
101 102  
102   -__version__ = '0.14'
  103 +__version__ = '0.15'
103 104  
104 105 #------------------------------------------------------------------------------
105 106 # TODO:
106 107 # + do not use logging, but a provided logger (null logger by default)
107 108 # + setup logging (common with other oletools)
108   -# + update readme, wiki and decalage.info, pypi (link to sample files)
109 109  
110 110 # TODO later:
111   -# - append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords
  111 +# + append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords
  112 +# + do not show hex strings by default (add option --hex)
112 113 # + performance improvement: instead of searching each keyword separately,
113 114 # first split vba code into a list of words (per line), then check each
114 115 # word against a dict. (or put vba words into a set/dict?)
... ... @@ -150,6 +151,9 @@ from thirdparty.xglob import xglob
150 151  
151 152 #--- CONSTANTS ----------------------------------------------------------------
152 153  
  154 +TYPE_OLE = 'OLE'
  155 +TYPE_OpenXML = 'OpenXML'
  156 +
153 157 MODULE_EXTENSION = "bas"
154 158 CLASS_EXTENSION = "cls"
155 159 FORM_EXTENSION = "frm"
... ... @@ -237,7 +241,7 @@ RE_PATTERNS = (
237 241 ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')),
238 242 ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")),
239 243 ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')),
240   - ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')),
  244 + # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')),
241 245 ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")),
242 246 # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
243 247 #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types
... ... @@ -917,14 +921,15 @@ class VBA_Parser(object):
917 921 # This looks like an OLE file
918 922 logging.info('Parsing OLE file %s' % self.filename)
919 923 self.ole_file = olefile.OleFileIO(_file)
920   - self.type = 'OLE'
  924 + self.type = TYPE_OLE
  925 + #TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet
921 926 elif zipfile.is_zipfile(_file):
922 927 # This looks like a zip file, need to look for vbaProject.bin inside
923 928 # It can be any OLE file inside the archive
924 929 #...because vbaProject.bin can be renamed:
925 930 # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18
926 931 logging.info('Opening ZIP/OpenXML file %s' % self.filename)
927   - self.type = 'OpenXML'
  932 + self.type = TYPE_OpenXML
928 933 z = zipfile.ZipFile(_file)
929 934 #TODO: check if this is actually an OpenXML file
930 935 # check each file within the zip if it is an OLE file, by reading its magic:
... ... @@ -1155,6 +1160,86 @@ def process_file (container, filename, data):
1155 1160 print ''
1156 1161  
1157 1162  
  1163 +def process_file_triage (container, filename, data):
  1164 + """
  1165 + Process a single file
  1166 +
  1167 + :param container: str, path and filename of container if the file is within
  1168 + a zip archive, None otherwise.
  1169 + :param filename: str, path and filename of file on disk, or within the container.
  1170 + :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
  1171 + """
  1172 + #TODO: replace print by writing to a provided output file (sys.stdout by default)
  1173 + nb_macros = 0
  1174 + nb_autoexec = 0
  1175 + nb_suspicious = 0
  1176 + nb_iocs = 0
  1177 + nb_hexstrings = 0
  1178 + # ftype = 'Other'
  1179 + message = ''
  1180 + try:
  1181 + #TODO: handle olefile errors, when an OLE file is malformed
  1182 + vba = VBA_Parser(filename, data)
  1183 + if vba.detect_vba_macros():
  1184 + for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
  1185 + nb_macros += 1
  1186 + if vba_code.strip() != '':
  1187 + nb_autoexec += len(detect_autoexec(vba_code))
  1188 + nb_suspicious += len(detect_suspicious(vba_code))
  1189 + nb_iocs += len(detect_patterns(vba_code))
  1190 + nb_hexstrings += len(detect_hex_strings(vba_code))
  1191 + if vba.type == TYPE_OLE:
  1192 + flags = 'O'
  1193 + else:
  1194 + flags = 'X'
  1195 + macros = autoexec = suspicious = iocs = hexstrings = '-'
  1196 + if nb_macros: macros = 'M'
  1197 + if nb_autoexec: autoexec = 'A'
  1198 + if nb_suspicious: suspicious = 'S'
  1199 + if nb_iocs: iocs = 'I'
  1200 + if nb_hexstrings: hexstrings = 'H'
  1201 + flags += '%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings)
  1202 +
  1203 + # macros = autoexec = suspicious = iocs = hexstrings = 'no'
  1204 + # if nb_macros: macros = 'YES:%d' % nb_macros
  1205 + # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
  1206 + # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
  1207 + # if nb_iocs: iocs = 'YES:%d' % nb_iocs
  1208 + # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
  1209 + # # 2nd line = info
  1210 + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (vba.type, macros, autoexec, suspicious, iocs, hexstrings)
  1211 + except TypeError:
  1212 + # file type not OLE nor OpenXML
  1213 + flags = '?'
  1214 + message = 'File format not supported'
  1215 + except:
  1216 + # another error occurred
  1217 + #raise
  1218 + #TODO: print more info if debug mode
  1219 + #TODO: distinguish real errors from incorrect file types
  1220 + flags = '!ERROR'
  1221 + message = sys.exc_value
  1222 + line = '%-6s %s' % (flags, filename)
  1223 + if message:
  1224 + line += ' - %s' % message
  1225 + print line
  1226 +
  1227 + # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
  1228 + # header=False, border=False)
  1229 + # t.align = 'l'
  1230 + # t.max_width['filename'] = 30
  1231 + # t.max_width['type'] = 10
  1232 + # t.max_width['macros'] = 6
  1233 + # t.max_width['autoexec'] = 6
  1234 + # t.max_width['suspicious'] = 6
  1235 + # t.max_width['ioc'] = 6
  1236 + # t.max_width['hexstrings'] = 6
  1237 + # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))
  1238 + # print t
  1239 +
  1240 +def main_triage_quick():
  1241 + pass
  1242 +
1158 1243 #=== MAIN =====================================================================
1159 1244  
1160 1245 def main():
... ... @@ -1173,20 +1258,54 @@ def main():
1173 1258 help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
1174 1259 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
1175 1260 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
  1261 + parser.add_option("-t", action="store_true", dest="triage_mode",
  1262 + help='triage mode, display results as a summary table (default for multiple files)')
  1263 + parser.add_option("-d", action="store_true", dest="detailed_mode",
  1264 + help='detailed mode, display full results (default for single file)')
1176 1265  
1177 1266 (options, args) = parser.parse_args()
1178 1267  
1179   - # Print help if no argurments are passed
  1268 + # Print help if no arguments are passed
1180 1269 if len(args) == 0:
1181 1270 print __doc__
1182 1271 parser.print_help()
1183 1272 sys.exit()
1184 1273  
1185 1274 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO)
1186   -
  1275 + # For now, all logging is disabled:
  1276 + logging.disable(logging.CRITICAL)
  1277 +
  1278 + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
  1279 + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
  1280 + if not options.detailed_mode or options.triage_mode:
  1281 + print '%-6s %-72s' % ('Flags', 'Filename')
  1282 + print '%-6s %-72s' % ('-'*6, '-'*72)
  1283 + previous_container = None
  1284 + count = 0
  1285 + container = filename = data = None
1187 1286 for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
1188 1287 zip_password=options.zip_password, zip_fname=options.zip_fname):
1189   - #data = open(filespec, 'rb').read()
  1288 + # ignore directory names stored in zip files:
  1289 + if container and filename.endswith('/'):
  1290 + continue
  1291 + if options.detailed_mode and not options.triage_mode:
  1292 + # fully detailed output
  1293 + process_file(container, filename, data)
  1294 + else:
  1295 + # print container name when it changes:
  1296 + if container != previous_container:
  1297 + if container is not None:
  1298 + print '\nFiles in %s:' % container
  1299 + previous_container = container
  1300 + # summarized output for triage:
  1301 + process_file_triage(container, filename, data)
  1302 + count += 1
  1303 + if not options.detailed_mode or options.triage_mode:
  1304 + print '\n(Flags: O=OLE, X=OpenXML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex-encoded strings, ?=Unknown)\n'
  1305 +
  1306 + if count == 1 and not options.triage_mode and not options.detailed_mode:
  1307 + # if options -t and -d were not specified and it's a single file, print details:
  1308 + #TODO: avoid doing the analysis twice by storing results
1190 1309 process_file(container, filename, data)
1191 1310  
1192 1311 if __name__ == '__main__':
... ...