Commit b984e77ab807ad25d922ae456b1bebd47c31e1f5

Authored by Philippe Lagadec
1 parent aca4787e

olevba: improved Base64 decoding, fixed triage mode not to scan attrib lines

Showing 1 changed file with 18 additions and 12 deletions
oletools/olevba.py
@@ -121,6 +121,7 @@ https://github.com/unixfreak0037/officeparser @@ -121,6 +121,7 @@ https://github.com/unixfreak0037/officeparser
121 # - display exceptions with stack trace 121 # - display exceptions with stack trace
122 # - added several suspicious keywords 122 # - added several suspicious keywords
123 # - improved Base64 detection and decoding 123 # - improved Base64 detection and decoding
  124 +# - fixed triage mode not to scan attrib lines
124 125
125 __version__ = '0.24' 126 __version__ = '0.24'
126 127
@@ -309,13 +310,13 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') @@ -309,13 +310,13 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
309 # better version from balbuzard, less false positives: 310 # better version from balbuzard, less false positives:
310 re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') 311 re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"')
311 # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): 312 # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase):
312 -BASE64_WHITELIST = set(['thisdocument']) 313 +BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit'])
313 314
314 # regex to detect strings encoded with a specific Dridex algorithm 315 # regex to detect strings encoded with a specific Dridex algorithm
315 # (see https://github.com/JamesHabben/MalwareStuff) 316 # (see https://github.com/JamesHabben/MalwareStuff)
316 re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') 317 re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"')
317 # regex to check that it is not just a hex string: 318 # regex to check that it is not just a hex string:
318 -re_dridex_check = re.compile(r'[G-Zg-z]') 319 +re_nothex_check = re.compile(r'[G-Zg-z]')
319 320
320 #--- FUNCTIONS ---------------------------------------------------------------- 321 #--- FUNCTIONS ----------------------------------------------------------------
321 322
@@ -969,6 +970,9 @@ def detect_base64_strings(vba_code): @@ -969,6 +970,9 @@ def detect_base64_strings(vba_code):
969 for match in re_base64_string.finditer(vba_code): 970 for match in re_base64_string.finditer(vba_code):
970 # extract the base64 string without quotes: 971 # extract the base64 string without quotes:
971 value = match.group().strip('"') 972 value = match.group().strip('"')
  973 + # check it is not just a hex string:
  974 + if not re_nothex_check.search(value):
  975 + continue
972 # only keep new values and not in the whitelist: 976 # only keep new values and not in the whitelist:
973 if value not in found and value.lower() not in BASE64_WHITELIST: 977 if value not in found and value.lower() not in BASE64_WHITELIST:
974 try: 978 try:
@@ -993,7 +997,8 @@ def detect_dridex_strings(vba_code): @@ -993,7 +997,8 @@ def detect_dridex_strings(vba_code):
993 found = set() 997 found = set()
994 for match in re_dridex_string.finditer(vba_code): 998 for match in re_dridex_string.finditer(vba_code):
995 value = match.group()[1:-1] 999 value = match.group()[1:-1]
996 - if not re_dridex_check.search(value): 1000 + # check it is not just a hex string:
  1001 + if not re_nothex_check.search(value):
997 continue 1002 continue
998 if value not in found: 1003 if value not in found:
999 try: 1004 try:
@@ -1052,7 +1057,6 @@ class VBA_Scanner (object): @@ -1052,7 +1057,6 @@ class VBA_Scanner (object):
1052 self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1]) 1057 self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1])
1053 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ 1058 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
1054 #TODO: also append the full code reversed if StrReverse? (risk of false positives?) 1059 #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
1055 - #TODO: show which IOCs have been found using hex, strrev or both  
1056 # Detect Base64-encoded strings 1060 # Detect Base64-encoded strings
1057 self.base64_strings = detect_base64_strings(self.code) 1061 self.base64_strings = detect_base64_strings(self.code)
1058 for encoded, decoded in self.base64_strings: 1062 for encoded, decoded in self.base64_strings:
@@ -1401,19 +1405,20 @@ def process_file (container, filename, data, show_decoded_strings=False): @@ -1401,19 +1405,20 @@ def process_file (container, filename, data, show_decoded_strings=False):
1401 for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): 1405 for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
1402 # hide attribute lines: 1406 # hide attribute lines:
1403 #TODO: option to disable attribute filtering 1407 #TODO: option to disable attribute filtering
1404 - vba_code = filter_vba(vba_code) 1408 + vba_code_filtered = filter_vba(vba_code)
1405 print '-'*79 1409 print '-'*79
1406 print 'VBA MACRO %s ' % vba_filename 1410 print 'VBA MACRO %s ' % vba_filename
1407 print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) 1411 print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
1408 print '- '*39 1412 print '- '*39
1409 # detect empty macros: 1413 # detect empty macros:
1410 - if vba_code.strip() == '': 1414 + if vba_code_filtered.strip() == '':
1411 print '(empty macro)' 1415 print '(empty macro)'
1412 else: 1416 else:
1413 - print vba_code 1417 + print vba_code_filtered
1414 print '- '*39 1418 print '- '*39
1415 print 'ANALYSIS:' 1419 print 'ANALYSIS:'
1416 - print_analysis(vba_code, show_decoded_strings) 1420 + # analyse the whole code, filtered to avoid false positives:
  1421 + print_analysis(vba_code_filtered, show_decoded_strings)
1417 else: 1422 else:
1418 print 'No VBA macros found.' 1423 print 'No VBA macros found.'
1419 except: #TypeError: 1424 except: #TypeError:
@@ -1451,7 +1456,8 @@ def process_file_triage (container, filename, data): @@ -1451,7 +1456,8 @@ def process_file_triage (container, filename, data):
1451 for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): 1456 for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
1452 nb_macros += 1 1457 nb_macros += 1
1453 if vba_code.strip() != '': 1458 if vba_code.strip() != '':
1454 - scanner = VBA_Scanner(vba_code) 1459 + # analyse the whole code, filtered to avoid false positives:
  1460 + scanner = VBA_Scanner(filter_vba(vba_code))
1455 autoexec, suspicious, iocs, hexstrings, base64strings, dridex = scanner.scan_summary() 1461 autoexec, suspicious, iocs, hexstrings, base64strings, dridex = scanner.scan_summary()
1456 nb_autoexec += autoexec 1462 nb_autoexec += autoexec
1457 nb_suspicious += suspicious 1463 nb_suspicious += suspicious
@@ -1463,16 +1469,16 @@ def process_file_triage (container, filename, data): @@ -1463,16 +1469,16 @@ def process_file_triage (container, filename, data):
1463 flags = 'OLE:' 1469 flags = 'OLE:'
1464 else: 1470 else:
1465 flags = 'OpX:' 1471 flags = 'OpX:'
1466 - macros = autoexec = suspicious = iocs = hexstrings = base64strings = dridex = '-' 1472 + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-'
1467 if nb_macros: macros = 'M' 1473 if nb_macros: macros = 'M'
1468 if nb_autoexec: autoexec = 'A' 1474 if nb_autoexec: autoexec = 'A'
1469 if nb_suspicious: suspicious = 'S' 1475 if nb_suspicious: suspicious = 'S'
1470 if nb_iocs: iocs = 'I' 1476 if nb_iocs: iocs = 'I'
1471 if nb_hexstrings: hexstrings = 'H' 1477 if nb_hexstrings: hexstrings = 'H'
1472 - if nb_base64strings: base64strings = 'B' 1478 + if nb_base64strings: base64obf = 'B'
1473 if nb_dridexstrings: dridex = 'D' 1479 if nb_dridexstrings: dridex = 'D'
1474 flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, 1480 flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
1475 - base64strings, dridex) 1481 + base64obf, dridex)
1476 1482
1477 # macros = autoexec = suspicious = iocs = hexstrings = 'no' 1483 # macros = autoexec = suspicious = iocs = hexstrings = 'no'
1478 # if nb_macros: macros = 'YES:%d' % nb_macros 1484 # if nb_macros: macros = 'YES:%d' % nb_macros