Commit b984e77ab807ad25d922ae456b1bebd47c31e1f5
1 parent
aca4787e
olevba: improved Base64 decoding, fixed triage mode not to scan attrib lines
Showing
1 changed file
with
18 additions
and
12 deletions
oletools/olevba.py
| ... | ... | @@ -121,6 +121,7 @@ https://github.com/unixfreak0037/officeparser |
| 121 | 121 | # - display exceptions with stack trace |
| 122 | 122 | # - added several suspicious keywords |
| 123 | 123 | # - improved Base64 detection and decoding |
| 124 | +# - fixed triage mode not to scan attrib lines | |
| 124 | 125 | |
| 125 | 126 | __version__ = '0.24' |
| 126 | 127 | |
| ... | ... | @@ -309,13 +310,13 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') |
| 309 | 310 | # better version from balbuzard, less false positives: |
| 310 | 311 | re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') |
| 311 | 312 | # white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): |
| 312 | -BASE64_WHITELIST = set(['thisdocument']) | |
| 313 | +BASE64_WHITELIST = set(['thisdocument', 'thisworkbook', 'test', 'temp', 'http', 'open', 'exit']) | |
| 313 | 314 | |
| 314 | 315 | # regex to detect strings encoded with a specific Dridex algorithm |
| 315 | 316 | # (see https://github.com/JamesHabben/MalwareStuff) |
| 316 | 317 | re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') |
| 317 | 318 | # regex to check that it is not just a hex string: |
| 318 | -re_dridex_check = re.compile(r'[G-Zg-z]') | |
| 319 | +re_nothex_check = re.compile(r'[G-Zg-z]') | |
| 319 | 320 | |
| 320 | 321 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 321 | 322 | |
| ... | ... | @@ -969,6 +970,9 @@ def detect_base64_strings(vba_code): |
| 969 | 970 | for match in re_base64_string.finditer(vba_code): |
| 970 | 971 | # extract the base64 string without quotes: |
| 971 | 972 | value = match.group().strip('"') |
| 973 | + # check it is not just a hex string: | |
| 974 | + if not re_nothex_check.search(value): | |
| 975 | + continue | |
| 972 | 976 | # only keep new values and not in the whitelist: |
| 973 | 977 | if value not in found and value.lower() not in BASE64_WHITELIST: |
| 974 | 978 | try: |
| ... | ... | @@ -993,7 +997,8 @@ def detect_dridex_strings(vba_code): |
| 993 | 997 | found = set() |
| 994 | 998 | for match in re_dridex_string.finditer(vba_code): |
| 995 | 999 | value = match.group()[1:-1] |
| 996 | - if not re_dridex_check.search(value): | |
| 1000 | + # check it is not just a hex string: | |
| 1001 | + if not re_nothex_check.search(value): | |
| 997 | 1002 | continue |
| 998 | 1003 | if value not in found: |
| 999 | 1004 | try: |
| ... | ... | @@ -1052,7 +1057,6 @@ class VBA_Scanner (object): |
| 1052 | 1057 | self.code_rev_hex += '\n'+binascii.unhexlify(encoded[::-1]) |
| 1053 | 1058 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 1054 | 1059 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 1055 | - #TODO: show which IOCs have been found using hex, strrev or both | |
| 1056 | 1060 | # Detect Base64-encoded strings |
| 1057 | 1061 | self.base64_strings = detect_base64_strings(self.code) |
| 1058 | 1062 | for encoded, decoded in self.base64_strings: |
| ... | ... | @@ -1401,19 +1405,20 @@ def process_file (container, filename, data, show_decoded_strings=False): |
| 1401 | 1405 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): |
| 1402 | 1406 | # hide attribute lines: |
| 1403 | 1407 | #TODO: option to disable attribute filtering |
| 1404 | - vba_code = filter_vba(vba_code) | |
| 1408 | + vba_code_filtered = filter_vba(vba_code) | |
| 1405 | 1409 | print '-'*79 |
| 1406 | 1410 | print 'VBA MACRO %s ' % vba_filename |
| 1407 | 1411 | print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) |
| 1408 | 1412 | print '- '*39 |
| 1409 | 1413 | # detect empty macros: |
| 1410 | - if vba_code.strip() == '': | |
| 1414 | + if vba_code_filtered.strip() == '': | |
| 1411 | 1415 | print '(empty macro)' |
| 1412 | 1416 | else: |
| 1413 | - print vba_code | |
| 1417 | + print vba_code_filtered | |
| 1414 | 1418 | print '- '*39 |
| 1415 | 1419 | print 'ANALYSIS:' |
| 1416 | - print_analysis(vba_code, show_decoded_strings) | |
| 1420 | + # analyse the whole code, filtered to avoid false positives: | |
| 1421 | + print_analysis(vba_code_filtered, show_decoded_strings) | |
| 1417 | 1422 | else: |
| 1418 | 1423 | print 'No VBA macros found.' |
| 1419 | 1424 | except: #TypeError: |
| ... | ... | @@ -1451,7 +1456,8 @@ def process_file_triage (container, filename, data): |
| 1451 | 1456 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): |
| 1452 | 1457 | nb_macros += 1 |
| 1453 | 1458 | if vba_code.strip() != '': |
| 1454 | - scanner = VBA_Scanner(vba_code) | |
| 1459 | + # analyse the whole code, filtered to avoid false positives: | |
| 1460 | + scanner = VBA_Scanner(filter_vba(vba_code)) | |
| 1455 | 1461 | autoexec, suspicious, iocs, hexstrings, base64strings, dridex = scanner.scan_summary() |
| 1456 | 1462 | nb_autoexec += autoexec |
| 1457 | 1463 | nb_suspicious += suspicious |
| ... | ... | @@ -1463,16 +1469,16 @@ def process_file_triage (container, filename, data): |
| 1463 | 1469 | flags = 'OLE:' |
| 1464 | 1470 | else: |
| 1465 | 1471 | flags = 'OpX:' |
| 1466 | - macros = autoexec = suspicious = iocs = hexstrings = base64strings = dridex = '-' | |
| 1472 | + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = '-' | |
| 1467 | 1473 | if nb_macros: macros = 'M' |
| 1468 | 1474 | if nb_autoexec: autoexec = 'A' |
| 1469 | 1475 | if nb_suspicious: suspicious = 'S' |
| 1470 | 1476 | if nb_iocs: iocs = 'I' |
| 1471 | 1477 | if nb_hexstrings: hexstrings = 'H' |
| 1472 | - if nb_base64strings: base64strings = 'B' | |
| 1478 | + if nb_base64strings: base64obf = 'B' | |
| 1473 | 1479 | if nb_dridexstrings: dridex = 'D' |
| 1474 | 1480 | flags += '%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, |
| 1475 | - base64strings, dridex) | |
| 1481 | + base64obf, dridex) | |
| 1476 | 1482 | |
| 1477 | 1483 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' |
| 1478 | 1484 | # if nb_macros: macros = 'YES:%d' % nb_macros | ... | ... |