Commit 67f0725b7be892dcda47317ee5e180dce881d410
1 parent
8f37786d
olevba: improved Base64 detection and decoding
Showing
1 changed file
with
18 additions
and
8 deletions
oletools/olevba.py
| @@ -120,6 +120,7 @@ https://github.com/unixfreak0037/officeparser | @@ -120,6 +120,7 @@ https://github.com/unixfreak0037/officeparser | ||
| 120 | # 2015-02-07 v0.24 PL: - renamed option --hex to --decode, fixed display | 120 | # 2015-02-07 v0.24 PL: - renamed option --hex to --decode, fixed display |
| 121 | # - display exceptions with stack trace | 121 | # - display exceptions with stack trace |
| 122 | # - added several suspicious keywords | 122 | # - added several suspicious keywords |
| 123 | +# - improved Base64 detection and decoding | ||
| 123 | 124 | ||
| 124 | __version__ = '0.24' | 125 | __version__ = '0.24' |
| 125 | 126 | ||
| @@ -127,6 +128,8 @@ __version__ = '0.24' | @@ -127,6 +128,8 @@ __version__ = '0.24' | ||
| 127 | # TODO: | 128 | # TODO: |
| 128 | # + do not use logging, but a provided logger (null logger by default) | 129 | # + do not use logging, but a provided logger (null logger by default) |
| 129 | # + setup logging (common with other oletools) | 130 | # + setup logging (common with other oletools) |
| 131 | +# + add xor bruteforcing like bbharvest | ||
| 132 | +# + add chr() decoding | ||
| 130 | 133 | ||
| 131 | # TODO later: | 134 | # TODO later: |
| 132 | # + performance improvement: instead of searching each keyword separately, | 135 | # + performance improvement: instead of searching each keyword separately, |
| @@ -249,7 +252,7 @@ SUSPICIOUS_KEYWORDS = { | @@ -249,7 +252,7 @@ SUSPICIOUS_KEYWORDS = { | ||
| 249 | ('Lib',), | 252 | ('Lib',), |
| 250 | 'May download files from the Internet': | 253 | 'May download files from the Internet': |
| 251 | #TODO: regex to find urlmon+URLDownloadToFileA on same line | 254 | #TODO: regex to find urlmon+URLDownloadToFileA on same line |
| 252 | - ('URLDownloadToFileA',), | 255 | + ('URLDownloadToFileA', 'Msxml2.XMLHTTP', 'Microsoft.XMLHTTP'), |
| 253 | 'May control another application by simulating user keystrokes': | 256 | 'May control another application by simulating user keystrokes': |
| 254 | ('SendKeys', 'AppActivate'), | 257 | ('SendKeys', 'AppActivate'), |
| 255 | #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx | 258 | #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx |
| @@ -303,8 +306,16 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | @@ -303,8 +306,16 @@ re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}') | ||
| 303 | 306 | ||
| 304 | # regex to detect strings encoded in base64 | 307 | # regex to detect strings encoded in base64 |
| 305 | #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') | 308 | #re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') |
| 306 | -# alternate version from balbuzard: | ||
| 307 | -re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)"') | 309 | +# better version from balbuzard, less false positives: |
| 310 | +re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)?"') | ||
| 311 | +# white list of common strings matching the base64 regex, but which are not base64 strings (all lowercase): | ||
| 312 | +BASE64_WHITELIST = set(['thisdocument']) | ||
| 313 | + | ||
| 314 | +# regex to detect strings encoded with a specific Dridex algorithm | ||
| 315 | +# (see https://github.com/JamesHabben/MalwareStuff) | ||
| 316 | +re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') | ||
| 317 | +# regex to check that it is not just a hex string: | ||
| 318 | +re_dridex_check = re.compile(r'[G-Zg-z]') | ||
| 308 | 319 | ||
| 309 | #--- FUNCTIONS ---------------------------------------------------------------- | 320 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 310 | 321 | ||
| @@ -956,8 +967,10 @@ def detect_base64_strings(vba_code): | @@ -956,8 +967,10 @@ def detect_base64_strings(vba_code): | ||
| 956 | results = [] | 967 | results = [] |
| 957 | found = set() | 968 | found = set() |
| 958 | for match in re_base64_string.finditer(vba_code): | 969 | for match in re_base64_string.finditer(vba_code): |
| 959 | - value = match.group() | ||
| 960 | - if value not in found: | 970 | + # extract the base64 string without quotes: |
| 971 | + value = match.group().strip('"') | ||
| 972 | + # only keep new values and not in the whitelist: | ||
| 973 | + if value not in found and value.lower() not in BASE64_WHITELIST: | ||
| 961 | try: | 974 | try: |
| 962 | decoded = base64.b64decode(value) | 975 | decoded = base64.b64decode(value) |
| 963 | results.append((value, decoded)) | 976 | results.append((value, decoded)) |
| @@ -978,9 +991,6 @@ def detect_dridex_strings(vba_code): | @@ -978,9 +991,6 @@ def detect_dridex_strings(vba_code): | ||
| 978 | from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode | 991 | from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode |
| 979 | results = [] | 992 | results = [] |
| 980 | found = set() | 993 | found = set() |
| 981 | - re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') | ||
| 982 | - # regex to check that it is not just a hex string: | ||
| 983 | - re_dridex_check = re.compile(r'[G-Zg-z]') | ||
| 984 | for match in re_dridex_string.finditer(vba_code): | 994 | for match in re_dridex_string.finditer(vba_code): |
| 985 | value = match.group()[1:-1] | 995 | value = match.group()[1:-1] |
| 986 | if not re_dridex_check.search(value): | 996 | if not re_dridex_check.search(value): |