Commit ebdb4e2d20ee635b2733be4b3222a1f74486d352
1 parent
56759d61
olevba: added new triage mode, options -t and -d
Showing
1 changed file
with
128 additions
and
9 deletions
oletools/olevba.py
| @@ -98,17 +98,18 @@ https://github.com/unixfreak0037/officeparser | @@ -98,17 +98,18 @@ https://github.com/unixfreak0037/officeparser | ||
| 98 | # 2015-01-08 v0.14 PL: - added hex strings detection and decoding | 98 | # 2015-01-08 v0.14 PL: - added hex strings detection and decoding |
| 99 | # - fixed issue #2, decoding VBA stream names using | 99 | # - fixed issue #2, decoding VBA stream names using |
| 100 | # specified codepage and unicode stream names | 100 | # specified codepage and unicode stream names |
| 101 | +# 2015-01-11 v0.15 PL: - added new triage mode, options -t and -d | ||
| 101 | 102 | ||
| 102 | -__version__ = '0.14' | 103 | +__version__ = '0.15' |
| 103 | 104 | ||
| 104 | #------------------------------------------------------------------------------ | 105 | #------------------------------------------------------------------------------ |
| 105 | # TODO: | 106 | # TODO: |
| 106 | # + do not use logging, but a provided logger (null logger by default) | 107 | # + do not use logging, but a provided logger (null logger by default) |
| 107 | # + setup logging (common with other oletools) | 108 | # + setup logging (common with other oletools) |
| 108 | -# + update readme, wiki and decalage.info, pypi (link to sample files) | ||
| 109 | 109 | ||
| 110 | # TODO later: | 110 | # TODO later: |
| 111 | -# - append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords | 111 | +# + append decoded hex strings to VBA code, in order to detect IOCs and suspicious keywords |
| 112 | +# + do not show hex strings by default (add option --hex) | ||
| 112 | # + performance improvement: instead of searching each keyword separately, | 113 | # + performance improvement: instead of searching each keyword separately, |
| 113 | # first split vba code into a list of words (per line), then check each | 114 | # first split vba code into a list of words (per line), then check each |
| 114 | # word against a dict. (or put vba words into a set/dict?) | 115 | # word against a dict. (or put vba words into a set/dict?) |
| @@ -150,6 +151,9 @@ from thirdparty.xglob import xglob | @@ -150,6 +151,9 @@ from thirdparty.xglob import xglob | ||
| 150 | 151 | ||
| 151 | #--- CONSTANTS ---------------------------------------------------------------- | 152 | #--- CONSTANTS ---------------------------------------------------------------- |
| 152 | 153 | ||
| 154 | +TYPE_OLE = 'OLE' | ||
| 155 | +TYPE_OpenXML = 'OpenXML' | ||
| 156 | + | ||
| 153 | MODULE_EXTENSION = "bas" | 157 | MODULE_EXTENSION = "bas" |
| 154 | CLASS_EXTENSION = "cls" | 158 | CLASS_EXTENSION = "cls" |
| 155 | FORM_EXTENSION = "frm" | 159 | FORM_EXTENSION = "frm" |
| @@ -237,7 +241,7 @@ RE_PATTERNS = ( | @@ -237,7 +241,7 @@ RE_PATTERNS = ( | ||
| 237 | ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), | 241 | ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), |
| 238 | ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), | 242 | ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), |
| 239 | ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), | 243 | ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), |
| 240 | - ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')), | 244 | + # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')), |
| 241 | ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")), | 245 | ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|PIF|APPLICATION|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")), |
| 242 | # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ | 246 | # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/ |
| 243 | #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types | 247 | #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types |
| @@ -917,14 +921,15 @@ class VBA_Parser(object): | @@ -917,14 +921,15 @@ class VBA_Parser(object): | ||
| 917 | # This looks like an OLE file | 921 | # This looks like an OLE file |
| 918 | logging.info('Parsing OLE file %s' % self.filename) | 922 | logging.info('Parsing OLE file %s' % self.filename) |
| 919 | self.ole_file = olefile.OleFileIO(_file) | 923 | self.ole_file = olefile.OleFileIO(_file) |
| 920 | - self.type = 'OLE' | 924 | + self.type = TYPE_OLE |
| 925 | + #TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet | ||
| 921 | elif zipfile.is_zipfile(_file): | 926 | elif zipfile.is_zipfile(_file): |
| 922 | # This looks like a zip file, need to look for vbaProject.bin inside | 927 | # This looks like a zip file, need to look for vbaProject.bin inside |
| 923 | # It can be any OLE file inside the archive | 928 | # It can be any OLE file inside the archive |
| 924 | #...because vbaProject.bin can be renamed: | 929 | #...because vbaProject.bin can be renamed: |
| 925 | # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18 | 930 | # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18 |
| 926 | logging.info('Opening ZIP/OpenXML file %s' % self.filename) | 931 | logging.info('Opening ZIP/OpenXML file %s' % self.filename) |
| 927 | - self.type = 'OpenXML' | 932 | + self.type = TYPE_OpenXML |
| 928 | z = zipfile.ZipFile(_file) | 933 | z = zipfile.ZipFile(_file) |
| 929 | #TODO: check if this is actually an OpenXML file | 934 | #TODO: check if this is actually an OpenXML file |
| 930 | # check each file within the zip if it is an OLE file, by reading its magic: | 935 | # check each file within the zip if it is an OLE file, by reading its magic: |
| @@ -1155,6 +1160,86 @@ def process_file (container, filename, data): | @@ -1155,6 +1160,86 @@ def process_file (container, filename, data): | ||
| 1155 | print '' | 1160 | print '' |
| 1156 | 1161 | ||
| 1157 | 1162 | ||
| 1163 | +def process_file_triage (container, filename, data): | ||
| 1164 | + """ | ||
| 1165 | + Process a single file | ||
| 1166 | + | ||
| 1167 | + :param container: str, path and filename of container if the file is within | ||
| 1168 | + a zip archive, None otherwise. | ||
| 1169 | + :param filename: str, path and filename of file on disk, or within the container. | ||
| 1170 | + :param data: bytes, content of the file if it is in a container, None if it is a file on disk. | ||
| 1171 | + """ | ||
| 1172 | + #TODO: replace print by writing to a provided output file (sys.stdout by default) | ||
| 1173 | + nb_macros = 0 | ||
| 1174 | + nb_autoexec = 0 | ||
| 1175 | + nb_suspicious = 0 | ||
| 1176 | + nb_iocs = 0 | ||
| 1177 | + nb_hexstrings = 0 | ||
| 1178 | + # ftype = 'Other' | ||
| 1179 | + message = '' | ||
| 1180 | + try: | ||
| 1181 | + #TODO: handle olefile errors, when an OLE file is malformed | ||
| 1182 | + vba = VBA_Parser(filename, data) | ||
| 1183 | + if vba.detect_vba_macros(): | ||
| 1184 | + for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): | ||
| 1185 | + nb_macros += 1 | ||
| 1186 | + if vba_code.strip() != '': | ||
| 1187 | + nb_autoexec += len(detect_autoexec(vba_code)) | ||
| 1188 | + nb_suspicious += len(detect_suspicious(vba_code)) | ||
| 1189 | + nb_iocs += len(detect_patterns(vba_code)) | ||
| 1190 | + nb_hexstrings += len(detect_hex_strings(vba_code)) | ||
| 1191 | + if vba.type == TYPE_OLE: | ||
| 1192 | + flags = 'O' | ||
| 1193 | + else: | ||
| 1194 | + flags = 'X' | ||
| 1195 | + macros = autoexec = suspicious = iocs = hexstrings = '-' | ||
| 1196 | + if nb_macros: macros = 'M' | ||
| 1197 | + if nb_autoexec: autoexec = 'A' | ||
| 1198 | + if nb_suspicious: suspicious = 'S' | ||
| 1199 | + if nb_iocs: iocs = 'I' | ||
| 1200 | + if nb_hexstrings: hexstrings = 'H' | ||
| 1201 | + flags += '%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings) | ||
| 1202 | + | ||
| 1203 | + # macros = autoexec = suspicious = iocs = hexstrings = 'no' | ||
| 1204 | + # if nb_macros: macros = 'YES:%d' % nb_macros | ||
| 1205 | + # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec | ||
| 1206 | + # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious | ||
| 1207 | + # if nb_iocs: iocs = 'YES:%d' % nb_iocs | ||
| 1208 | + # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings | ||
| 1209 | + # # 2nd line = info | ||
| 1210 | + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (vba.type, macros, autoexec, suspicious, iocs, hexstrings) | ||
| 1211 | + except TypeError: | ||
| 1212 | + # file type not OLE nor OpenXML | ||
| 1213 | + flags = '?' | ||
| 1214 | + message = 'File format not supported' | ||
| 1215 | + except: | ||
| 1216 | + # another error occurred | ||
| 1217 | + #raise | ||
| 1218 | + #TODO: print more info if debug mode | ||
| 1219 | + #TODO: distinguish real errors from incorrect file types | ||
| 1220 | + flags = '!ERROR' | ||
| 1221 | + message = sys.exc_value | ||
| 1222 | + line = '%-6s %s' % (flags, filename) | ||
| 1223 | + if message: | ||
| 1224 | + line += ' - %s' % message | ||
| 1225 | + print line | ||
| 1226 | + | ||
| 1227 | + # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'), | ||
| 1228 | + # header=False, border=False) | ||
| 1229 | + # t.align = 'l' | ||
| 1230 | + # t.max_width['filename'] = 30 | ||
| 1231 | + # t.max_width['type'] = 10 | ||
| 1232 | + # t.max_width['macros'] = 6 | ||
| 1233 | + # t.max_width['autoexec'] = 6 | ||
| 1234 | + # t.max_width['suspicious'] = 6 | ||
| 1235 | + # t.max_width['ioc'] = 6 | ||
| 1236 | + # t.max_width['hexstrings'] = 6 | ||
| 1237 | + # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings)) | ||
| 1238 | + # print t | ||
| 1239 | + | ||
| 1240 | +def main_triage_quick(): | ||
| 1241 | + pass | ||
| 1242 | + | ||
| 1158 | #=== MAIN ===================================================================== | 1243 | #=== MAIN ===================================================================== |
| 1159 | 1244 | ||
| 1160 | def main(): | 1245 | def main(): |
| @@ -1173,20 +1258,54 @@ def main(): | @@ -1173,20 +1258,54 @@ def main(): | ||
| 1173 | help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | 1258 | help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') |
| 1174 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | 1259 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', |
| 1175 | help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | 1260 | help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') |
| 1261 | + parser.add_option("-t", action="store_true", dest="triage_mode", | ||
| 1262 | + help='triage mode, display results as a summary table (default for multiple files)') | ||
| 1263 | + parser.add_option("-d", action="store_true", dest="detailed_mode", | ||
| 1264 | + help='detailed mode, display full results (default for single file)') | ||
| 1176 | 1265 | ||
| 1177 | (options, args) = parser.parse_args() | 1266 | (options, args) = parser.parse_args() |
| 1178 | 1267 | ||
| 1179 | - # Print help if no argurments are passed | 1268 | + # Print help if no arguments are passed |
| 1180 | if len(args) == 0: | 1269 | if len(args) == 0: |
| 1181 | print __doc__ | 1270 | print __doc__ |
| 1182 | parser.print_help() | 1271 | parser.print_help() |
| 1183 | sys.exit() | 1272 | sys.exit() |
| 1184 | 1273 | ||
| 1185 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) | 1274 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) |
| 1186 | - | 1275 | + # For now, all logging is disabled: |
| 1276 | + logging.disable(logging.CRITICAL) | ||
| 1277 | + | ||
| 1278 | + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') | ||
| 1279 | + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) | ||
| 1280 | + if not options.detailed_mode or options.triage_mode: | ||
| 1281 | + print '%-6s %-72s' % ('Flags', 'Filename') | ||
| 1282 | + print '%-6s %-72s' % ('-'*6, '-'*72) | ||
| 1283 | + previous_container = None | ||
| 1284 | + count = 0 | ||
| 1285 | + container = filename = data = None | ||
| 1187 | for container, filename, data in xglob.iter_files(args, recursive=options.recursive, | 1286 | for container, filename, data in xglob.iter_files(args, recursive=options.recursive, |
| 1188 | zip_password=options.zip_password, zip_fname=options.zip_fname): | 1287 | zip_password=options.zip_password, zip_fname=options.zip_fname): |
| 1189 | - #data = open(filespec, 'rb').read() | 1288 | + # ignore directory names stored in zip files: |
| 1289 | + if container and filename.endswith('/'): | ||
| 1290 | + continue | ||
| 1291 | + if options.detailed_mode and not options.triage_mode: | ||
| 1292 | + # fully detailed output | ||
| 1293 | + process_file(container, filename, data) | ||
| 1294 | + else: | ||
| 1295 | + # print container name when it changes: | ||
| 1296 | + if container != previous_container: | ||
| 1297 | + if container is not None: | ||
| 1298 | + print '\nFiles in %s:' % container | ||
| 1299 | + previous_container = container | ||
| 1300 | + # summarized output for triage: | ||
| 1301 | + process_file_triage(container, filename, data) | ||
| 1302 | + count += 1 | ||
| 1303 | + if not options.detailed_mode or options.triage_mode: | ||
| 1304 | + print '\n(Flags: O=OLE, X=OpenXML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex-encoded strings, ?=Unknown)\n' | ||
| 1305 | + | ||
| 1306 | + if count == 1 and not options.triage_mode and not options.detailed_mode: | ||
| 1307 | + # if options -t and -d were not specified and it's a single file, print details: | ||
| 1308 | + #TODO: avoid doing the analysis twice by storing results | ||
| 1190 | process_file(container, filename, data) | 1309 | process_file(container, filename, data) |
| 1191 | 1310 | ||
| 1192 | if __name__ == '__main__': | 1311 | if __name__ == '__main__': |