Commit b984e77ab807ad25d922ae456b1bebd47c31e1f5

Authored by Philippe Lagadec
1 parent aca4787e

olevba: improved Base64 decoding, fixed triage mode not to scan attrib lines

Showing 1 changed file with 18 additions and 12 deletions
oletools/olevba.py
... ... @@ -121,6 +121,7 @@ https://github.com/unixfreak0037/officeparser
121 121 # - display exceptions with stack trace
122 122 # - added several suspicious keywords
123 123 # - improved Base64 detection and decoding
  124 +# - fixed triage mode not to scan attrib lines
124 125  
125 126 __version__ = '0.24'
126 127  
... ... @@ -309,13 +310,13 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
309 310 # better version from balbuzard, less false positives:
310 311 re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"')
311 312 # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase):
312   -BASE64_WHITELIST = set(['thisdocument'])
  313 +BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit'])
313 314  
314 315 # regex to detect strings encoded with a specific Dridex algorithm
315 316 # (see https://github.com/JamesHabben/MalwareStuff)
316 317 re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"')
317 318 # regex to check that it is not just a hex string:
318   -re_dridex_check = re.compile(r'[G-Zg-z]')
  319 +re_nothex_check = re.compile(r'[G-Zg-z]')
319 320  
320 321 #--- FUNCTIONS ----------------------------------------------------------------
321 322  
... ... @@ -969,6 +970,9 @@ def detect_base64_strings(vba_code):
969 970 for match in re_base64_string.finditer(vba_code):
970 971 # extract the base64 string without quotes:
971 972 value = match.group().strip('"')
  973 + # check it is not just a hex string:
  974 + if not re_nothex_check.search(value):
  975 + continue
972 976 # only keep new values and not in the whitelist:
973 977 if value not in found and value.lower() not in BASE64_WHITELIST:
974 978 try:
... ... @@ -993,7 +997,8 @@ def detect_dridex_strings(vba_code):
993 997 found = set()
994 998 for match in re_dridex_string.finditer(vba_code):
995 999 value = match.group()[1:-1]
996   - if not re_dridex_check.search(value):
  1000 + # check it is not just a hex string:
  1001 + if not re_nothex_check.search(value):
997 1002 continue
998 1003 if value not in found:
999 1004 try:
... ... @@ -1052,7 +1057,6 @@ class VBA_Scanner (object):
1052 1057 self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1])
1053 1058 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
1054 1059 #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
1055   - #TODO: show which IOCs have been found using hex, strrev or both
1056 1060 # Detect Base64-encoded strings
1057 1061 self.base64_strings = detect_base64_strings(self.code)
1058 1062 for encoded, decoded in self.base64_strings:
... ... @@ -1401,19 +1405,20 @@ def process_file (container, filename, data, show_decoded_strings=False):
1401 1405 for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
1402 1406 # hide attribute lines:
1403 1407 #TODO: option to disable attribute filtering
1404   - vba_code = filter_vba(vba_code)
  1408 + vba_code_filtered = filter_vba(vba_code)
1405 1409 print '-'*79
1406 1410 print 'VBA MACRO %s ' % vba_filename
1407 1411 print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
1408 1412 print '- '*39
1409 1413 # detect empty macros:
1410   - if vba_code.strip() == '':
  1414 + if vba_code_filtered.strip() == '':
1411 1415 print '(empty macro)'
1412 1416 else:
1413   - print vba_code
  1417 + print vba_code_filtered
1414 1418 print '- '*39
1415 1419 print 'ANALYSIS:'
1416   - print_analysis(vba_code, show_decoded_strings)
  1420 + # analyse the whole code, filtered to avoid false positives:
  1421 + print_analysis(vba_code_filtered, show_decoded_strings)
1417 1422 else:
1418 1423 print 'No VBA macros found.'
1419 1424 except: #TypeError:
... ... @@ -1451,7 +1456,8 @@ def process_file_triage (container, filename, data):
1451 1456 for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
1452 1457 nb_macros += 1
1453 1458 if vba_code.strip() != '':
1454   - scanner = VBA_Scanner(vba_code)
  1459 + # analyse the whole code, filtered to avoid false positives:
  1460 + scanner = VBA_Scanner(filter_vba(vba_code))
1455 1461 autoexec, suspicious, iocs, hexstrings, base64strings, dridex = scanner.scan_summary()
1456 1462 nb_autoexec += autoexec
1457 1463 nb_suspicious += suspicious
... ... @@ -1463,16 +1469,16 @@ def process_file_triage (container, filename, data):
1463 1469 flags = 'OLE:'
1464 1470 else:
1465 1471 flags = 'OpX:'
1466   - macros = autoexec = suspicious = iocs = hexstrings = base64strings = dridex = '-'
  1472 + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-'
1467 1473 if nb_macros: macros = 'M'
1468 1474 if nb_autoexec: autoexec = 'A'
1469 1475 if nb_suspicious: suspicious = 'S'
1470 1476 if nb_iocs: iocs = 'I'
1471 1477 if nb_hexstrings: hexstrings = 'H'
1472   - if nb_base64strings: base64strings = 'B'
  1478 + if nb_base64strings: base64obf = 'B'
1473 1479 if nb_dridexstrings: dridex = 'D'
1474 1480 flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
1475   - base64strings, dridex)
  1481 + base64obf, dridex)
1476 1482  
1477 1483 # macros = autoexec = suspicious = iocs = hexstrings = 'no'
1478 1484 # if nb_macros: macros = 'YES:%d' % nb_macros
... ...