Commit 35d65e6bf3f7571e6c6fb1dc9b1e5e0469367be8
1 parent
b038d927
olevba: added bytes2str to decode bytes to unicode on Python 3 only, fixed StrRe…
…verse+Hex decoding (issue #106)
Showing
1 changed file
with
24 additions
and
16 deletions
oletools/olevba.py
| ... | ... | @@ -375,6 +375,23 @@ def unicode2str(unicode_string): |
| 375 | 375 | return unicode_string |
| 376 | 376 | |
| 377 | 377 | |
| 378 | +def bytes2str(bytes_string, encoding='utf8'): | |
| 379 | + """ | |
| 380 | + convert a bytes string to a native str: | |
| 381 | + - on Python 2, it returns the same string (bytes=str) | |
| 382 | + - on Python 3, the string is decoded using the provided encoding | |
| 383 | + (UTF-8 by default) to a unicode str | |
| 384 | + :param bytes_string: bytes string to be converted | |
| 385 | + :param encoding: codec to be used for decoding | |
| 386 | + :return: the string converted to str | |
| 387 | + :rtype: str | |
| 388 | + """ | |
| 389 | + if PYTHON2: | |
| 390 | + return bytes_string | |
| 391 | + else: | |
| 392 | + return bytes_string.decode('utf8', errors='replace') | |
| 393 | + | |
| 394 | + | |
| 378 | 395 | # === LOGGING ================================================================= |
| 379 | 396 | |
| 380 | 397 | def get_logger(name, level=logging.CRITICAL+1): |
| ... | ... | @@ -2128,10 +2145,7 @@ def detect_hex_strings(vba_code): |
| 2128 | 2145 | for match in re_hex_string.finditer(vba_code): |
| 2129 | 2146 | value = match.group() |
| 2130 | 2147 | if value not in found: |
| 2131 | - decoded = binascii.unhexlify(value) | |
| 2132 | - # On python 3, convert it to unicode | |
| 2133 | - if not PYTHON2: | |
| 2134 | - decoded = decoded.decode('utf8', errors='replace') | |
| 2148 | + decoded = bytes2str(binascii.unhexlify(value)) | |
| 2135 | 2149 | results.append((value, decoded)) |
| 2136 | 2150 | found.add(value) |
| 2137 | 2151 | return results |
| ... | ... | @@ -2156,10 +2170,7 @@ def detect_base64_strings(vba_code): |
| 2156 | 2170 | # only keep new values and not in the whitelist: |
| 2157 | 2171 | if value not in found and value.lower() not in BASE64_WHITELIST: |
| 2158 | 2172 | try: |
| 2159 | - decoded = base64.b64decode(value) | |
| 2160 | - # On python 3, convert it to unicode | |
| 2161 | - if not PYTHON2: | |
| 2162 | - decoded = decoded.decode('utf8', errors='replace') | |
| 2173 | + decoded = bytes2str(base64.b64decode(value)) | |
| 2163 | 2174 | results.append((value, decoded)) |
| 2164 | 2175 | found.add(value) |
| 2165 | 2176 | except (TypeError, ValueError) as exc: |
| ... | ... | @@ -2187,10 +2198,7 @@ def detect_dridex_strings(vba_code): |
| 2187 | 2198 | continue |
| 2188 | 2199 | if value not in found: |
| 2189 | 2200 | try: |
| 2190 | - decoded = DridexUrlDecode(value) | |
| 2191 | - # On python 3, convert it to unicode | |
| 2192 | - if not PYTHON2: | |
| 2193 | - decoded = decoded.decode('utf8', errors='replace') | |
| 2201 | + decoded = bytes2str(DridexUrlDecode(value)) | |
| 2194 | 2202 | results.append((value, decoded)) |
| 2195 | 2203 | found.add(value) |
| 2196 | 2204 | except Exception as exc: |
| ... | ... | @@ -2366,7 +2374,7 @@ class VBA_Scanner(object): |
| 2366 | 2374 | # StrReverse after hex decoding: |
| 2367 | 2375 | self.code_hex_rev += '\n' + decoded[::-1] |
| 2368 | 2376 | # StrReverse before hex decoding: |
| 2369 | - self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1]) | |
| 2377 | + self.code_rev_hex += '\n' + bytes2str(binascii.unhexlify(encoded[::-1])) | |
| 2370 | 2378 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 2371 | 2379 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 2372 | 2380 | # Detect Base64-encoded strings |
| ... | ... | @@ -3494,15 +3502,15 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3494 | 3502 | print('(empty macro)') |
| 3495 | 3503 | else: |
| 3496 | 3504 | # check if the VBA code contains special characters such as backspace (issue #358) |
| 3497 | - if b'\x08' in vba_code_filtered: | |
| 3505 | + if '\x08' in vba_code_filtered: | |
| 3498 | 3506 | log.warning('The VBA code contains special characters such as backspace, that may be used for obfuscation.') |
| 3499 | 3507 | if sys.stdout.isatty(): |
| 3500 | 3508 | # if the standard output is the console, we'll display colors |
| 3501 | 3509 | backspace = colorclass.Color(b'{autored}\\x08{/red}') |
| 3502 | 3510 | else: |
| 3503 | - backspace = b'\x08' | |
| 3511 | + backspace = '\x08' | |
| 3504 | 3512 | # replace backspace by "\x08" for display |
| 3505 | - vba_code_filtered = vba_code_filtered.replace(b'\x08', backspace) | |
| 3513 | + vba_code_filtered = vba_code_filtered.replace('\x08', backspace) | |
| 3506 | 3514 | try: |
| 3507 | 3515 | # Colorize the interesting keywords in the output: |
| 3508 | 3516 | vba_code_filtered = colorclass.Color(self.colorize_keywords(vba_code_filtered)) | ... | ... |