Commit 35d65e6bf3f7571e6c6fb1dc9b1e5e0469367be8

Authored by decalage2
1 parent b038d927

olevba: added bytes2str to decode bytes to unicode on Python 3 only, fixed StrRe…

…verse+Hex decoding (issue #106)
Showing 1 changed file with 24 additions and 16 deletions
oletools/olevba.py
... ... @@ -375,6 +375,23 @@ def unicode2str(unicode_string):
375 375 return unicode_string
376 376  
377 377  
  378 +def bytes2str(bytes_string, encoding='utf8'):
  379 + """
  380 + convert a bytes string to a native str:
  381 + - on Python 2, it returns the same string (bytes=str)
  382 + - on Python 3, the string is decoded using the provided encoding
  383 + (UTF-8 by default) to a unicode str
  384 + :param bytes_string: bytes string to be converted
  385 + :param encoding: codec to be used for decoding
  386 + :return: the string converted to str
  387 + :rtype: str
  388 + """
  389 + if PYTHON2:
  390 + return bytes_string
  391 + else:
  392 + return bytes_string.decode('utf8', errors='replace')
  393 +
  394 +
378 395 # === LOGGING =================================================================
379 396  
380 397 def get_logger(name, level=logging.CRITICAL+1):
... ... @@ -2128,10 +2145,7 @@ def detect_hex_strings(vba_code):
2128 2145 for match in re_hex_string.finditer(vba_code):
2129 2146 value = match.group()
2130 2147 if value not in found:
2131   - decoded = binascii.unhexlify(value)
2132   - # On python 3, convert it to unicode
2133   - if not PYTHON2:
2134   - decoded = decoded.decode('utf8', errors='replace')
  2148 + decoded = bytes2str(binascii.unhexlify(value))
2135 2149 results.append((value, decoded))
2136 2150 found.add(value)
2137 2151 return results
... ... @@ -2156,10 +2170,7 @@ def detect_base64_strings(vba_code):
2156 2170 # only keep new values and not in the whitelist:
2157 2171 if value not in found and value.lower() not in BASE64_WHITELIST:
2158 2172 try:
2159   - decoded = base64.b64decode(value)
2160   - # On python 3, convert it to unicode
2161   - if not PYTHON2:
2162   - decoded = decoded.decode('utf8', errors='replace')
  2173 + decoded = bytes2str(base64.b64decode(value))
2163 2174 results.append((value, decoded))
2164 2175 found.add(value)
2165 2176 except (TypeError, ValueError) as exc:
... ... @@ -2187,10 +2198,7 @@ def detect_dridex_strings(vba_code):
2187 2198 continue
2188 2199 if value not in found:
2189 2200 try:
2190   - decoded = DridexUrlDecode(value)
2191   - # On python 3, convert it to unicode
2192   - if not PYTHON2:
2193   - decoded = decoded.decode('utf8', errors='replace')
  2201 + decoded = bytes2str(DridexUrlDecode(value))
2194 2202 results.append((value, decoded))
2195 2203 found.add(value)
2196 2204 except Exception as exc:
... ... @@ -2366,7 +2374,7 @@ class VBA_Scanner(object):
2366 2374 # StrReverse after hex decoding:
2367 2375 self.code_hex_rev += '\n' + decoded[::-1]
2368 2376 # StrReverse before hex decoding:
2369   - self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1])
  2377 + self.code_rev_hex += '\n' + bytes2str(binascii.unhexlify(encoded[::-1]))
2370 2378 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
2371 2379 #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
2372 2380 # Detect Base64-encoded strings
... ... @@ -3494,15 +3502,15 @@ class VBA_Parser_CLI(VBA_Parser):
3494 3502 print('(empty macro)')
3495 3503 else:
3496 3504 # check if the VBA code contains special characters such as backspace (issue #358)
3497   - if b'\x08' in vba_code_filtered:
  3505 + if '\x08' in vba_code_filtered:
3498 3506 log.warning('The VBA code contains special characters such as backspace, that may be used for obfuscation.')
3499 3507 if sys.stdout.isatty():
3500 3508 # if the standard output is the console, we'll display colors
3501 3509 backspace = colorclass.Color(b'{autored}\\x08{/red}')
3502 3510 else:
3503   - backspace = b'\x08'
  3511 + backspace = '\x08'
3504 3512 # replace backspace by "\x08" for display
3505   - vba_code_filtered = vba_code_filtered.replace(b'\x08', backspace)
  3513 + vba_code_filtered = vba_code_filtered.replace('\x08', backspace)
3506 3514 try:
3507 3515 # Colorize the interesting keywords in the output:
3508 3516 vba_code_filtered = colorclass.Color(self.colorize_keywords(vba_code_filtered))
... ...