Commit b984e77ab807ad25d922ae456b1bebd47c31e1f5
1 parent
aca4787e
olevba: improved Base64 decoding, fixed triage mode not to scan attrib lines
Showing
1 changed file
with
18 additions
and
12 deletions
oletools/olevba.py
| @@ -121,6 +121,7 @@ https://github.com/unixfreak0037/officeparser | @@ -121,6 +121,7 @@ https://github.com/unixfreak0037/officeparser | ||
| 121 | # - display exceptions with stack trace | 121 | # - display exceptions with stack trace |
| 122 | # - added several suspicious keywords | 122 | # - added several suspicious keywords |
| 123 | # - improved Base64 detection and decoding | 123 | # - improved Base64 detection and decoding |
| 124 | +# - fixed triage mode not to scan attrib lines | ||
| 124 | 125 | ||
| 125 | __version__ = '0.24' | 126 | __version__ = '0.24' |
| 126 | 127 | ||
| @@ -309,13 +310,13 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | @@ -309,13 +310,13 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | ||
| 309 | # better version from balbuzard, less false positives: | 310 | # better version from balbuzard, less false positives: |
| 310 | re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') | 311 | re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') |
| 311 | # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): | 312 | # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): |
| 312 | -BASE64_WHITELIST = set(['thisdocument']) | 313 | +BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) |
| 313 | 314 | ||
| 314 | # regex to detect strings encoded with a specific Dridex algorithm | 315 | # regex to detect strings encoded with a specific Dridex algorithm |
| 315 | # (see https://github.com/JamesHabben/MalwareStuff) | 316 | # (see https://github.com/JamesHabben/MalwareStuff) |
| 316 | re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') | 317 | re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') |
| 317 | # regex to check that it is not just a hex string: | 318 | # regex to check that it is not just a hex string: |
| 318 | -re_dridex_check = re.compile(r'[G-Zg-z]') | 319 | +re_nothex_check = re.compile(r'[G-Zg-z]') |
| 319 | 320 | ||
| 320 | #--- FUNCTIONS ---------------------------------------------------------------- | 321 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 321 | 322 | ||
| @@ -969,6 +970,9 @@ def detect_base64_strings(vba_code): | @@ -969,6 +970,9 @@ def detect_base64_strings(vba_code): | ||
| 969 | for match in re_base64_string.finditer(vba_code): | 970 | for match in re_base64_string.finditer(vba_code): |
| 970 | # extract the base64 string without quotes: | 971 | # extract the base64 string without quotes: |
| 971 | value = match.group().strip('"') | 972 | value = match.group().strip('"') |
| 973 | + # check it is not just a hex string: | ||
| 974 | + if not re_nothex_check.search(value): | ||
| 975 | + continue | ||
| 972 | # only keep new values and not in the whitelist: | 976 | # only keep new values and not in the whitelist: |
| 973 | if value not in found and value.lower() not in BASE64_WHITELIST: | 977 | if value not in found and value.lower() not in BASE64_WHITELIST: |
| 974 | try: | 978 | try: |
| @@ -993,7 +997,8 @@ def detect_dridex_strings(vba_code): | @@ -993,7 +997,8 @@ def detect_dridex_strings(vba_code): | ||
| 993 | found = set() | 997 | found = set() |
| 994 | for match in re_dridex_string.finditer(vba_code): | 998 | for match in re_dridex_string.finditer(vba_code): |
| 995 | value = match.group()[1:-1] | 999 | value = match.group()[1:-1] |
| 996 | - if not re_dridex_check.search(value): | 1000 | + # check it is not just a hex string: |
| 1001 | + if not re_nothex_check.search(value): | ||
| 997 | continue | 1002 | continue |
| 998 | if value not in found: | 1003 | if value not in found: |
| 999 | try: | 1004 | try: |
| @@ -1052,7 +1057,6 @@ class VBA_Scanner (object): | @@ -1052,7 +1057,6 @@ class VBA_Scanner (object): | ||
| 1052 | self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1]) | 1057 | self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1]) |
| 1053 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ | 1058 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 1054 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) | 1059 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 1055 | - #TODO: show which IOCs have been found using hex, strrev or both | ||
| 1056 | # Detect Base64-encoded strings | 1060 | # Detect Base64-encoded strings |
| 1057 | self.base64_strings = detect_base64_strings(self.code) | 1061 | self.base64_strings = detect_base64_strings(self.code) |
| 1058 | for encoded, decoded in self.base64_strings: | 1062 | for encoded, decoded in self.base64_strings: |
| @@ -1401,19 +1405,20 @@ def process_file (container, filename, data, show_decoded_strings=False): | @@ -1401,19 +1405,20 @@ def process_file (container, filename, data, show_decoded_strings=False): | ||
| 1401 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): | 1405 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): |
| 1402 | # hide attribute lines: | 1406 | # hide attribute lines: |
| 1403 | #TODO: option to disable attribute filtering | 1407 | #TODO: option to disable attribute filtering |
| 1404 | - vba_code = filter_vba(vba_code) | 1408 | + vba_code_filtered = filter_vba(vba_code) |
| 1405 | print '-'*79 | 1409 | print '-'*79 |
| 1406 | print 'VBA MACRO %s ' % vba_filename | 1410 | print 'VBA MACRO %s ' % vba_filename |
| 1407 | print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) | 1411 | print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) |
| 1408 | print '- '*39 | 1412 | print '- '*39 |
| 1409 | # detect empty macros: | 1413 | # detect empty macros: |
| 1410 | - if vba_code.strip() == '': | 1414 | + if vba_code_filtered.strip() == '': |
| 1411 | print '(empty macro)' | 1415 | print '(empty macro)' |
| 1412 | else: | 1416 | else: |
| 1413 | - print vba_code | 1417 | + print vba_code_filtered |
| 1414 | print '- '*39 | 1418 | print '- '*39 |
| 1415 | print 'ANALYSIS:' | 1419 | print 'ANALYSIS:' |
| 1416 | - print_analysis(vba_code, show_decoded_strings) | 1420 | + # analyse the whole code, filtered to avoid false positives: |
| 1421 | + print_analysis(vba_code_filtered, show_decoded_strings) | ||
| 1417 | else: | 1422 | else: |
| 1418 | print 'No VBA macros found.' | 1423 | print 'No VBA macros found.' |
| 1419 | except: #TypeError: | 1424 | except: #TypeError: |
| @@ -1451,7 +1456,8 @@ def process_file_triage (container, filename, data): | @@ -1451,7 +1456,8 @@ def process_file_triage (container, filename, data): | ||
| 1451 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): | 1456 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): |
| 1452 | nb_macros += 1 | 1457 | nb_macros += 1 |
| 1453 | if vba_code.strip() != '': | 1458 | if vba_code.strip() != '': |
| 1454 | - scanner = VBA_Scanner(vba_code) | 1459 | + # analyse the whole code, filtered to avoid false positives: |
| 1460 | + scanner = VBA_Scanner(filter_vba(vba_code)) | ||
| 1455 | autoexec, suspicious, iocs, hexstrings, base64strings, dridex = scanner.scan_summary() | 1461 | autoexec, suspicious, iocs, hexstrings, base64strings, dridex = scanner.scan_summary() |
| 1456 | nb_autoexec += autoexec | 1462 | nb_autoexec += autoexec |
| 1457 | nb_suspicious += suspicious | 1463 | nb_suspicious += suspicious |
| @@ -1463,16 +1469,16 @@ def process_file_triage (container, filename, data): | @@ -1463,16 +1469,16 @@ def process_file_triage (container, filename, data): | ||
| 1463 | flags = 'OLE:' | 1469 | flags = 'OLE:' |
| 1464 | else: | 1470 | else: |
| 1465 | flags = 'OpX:' | 1471 | flags = 'OpX:' |
| 1466 | - macros = autoexec = suspicious = iocs = hexstrings = base64strings = dridex = '-' | 1472 | + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-' |
| 1467 | if nb_macros: macros = 'M' | 1473 | if nb_macros: macros = 'M' |
| 1468 | if nb_autoexec: autoexec = 'A' | 1474 | if nb_autoexec: autoexec = 'A' |
| 1469 | if nb_suspicious: suspicious = 'S' | 1475 | if nb_suspicious: suspicious = 'S' |
| 1470 | if nb_iocs: iocs = 'I' | 1476 | if nb_iocs: iocs = 'I' |
| 1471 | if nb_hexstrings: hexstrings = 'H' | 1477 | if nb_hexstrings: hexstrings = 'H' |
| 1472 | - if nb_base64strings: base64strings = 'B' | 1478 | + if nb_base64strings: base64obf = 'B' |
| 1473 | if nb_dridexstrings: dridex = 'D' | 1479 | if nb_dridexstrings: dridex = 'D' |
| 1474 | flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, | 1480 | flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, |
| 1475 | - base64strings, dridex) | 1481 | + base64obf, dridex) |
| 1476 | 1482 | ||
| 1477 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' | 1483 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' |
| 1478 | # if nb_macros: macros = 'YES:%d' % nb_macros | 1484 | # if nb_macros: macros = 'YES:%d' % nb_macros |