Commit 35d65e6bf3f7571e6c6fb1dc9b1e5e0469367be8

Authored by decalage2
1 parent b038d927

olevba: added bytes2str to decode bytes to unicode on Python 3 only, fixed StrRe…

…verse+Hex decoding (issue #106)
Showing 1 changed file with 24 additions and 16 deletions
oletools/olevba.py
@@ -375,6 +375,23 @@ def unicode2str(unicode_string): @@ -375,6 +375,23 @@ def unicode2str(unicode_string):
375 return unicode_string 375 return unicode_string
376 376
377 377
  378 +def bytes2str(bytes_string, encoding='utf8'):
  379 + """
  380 + convert a bytes string to a native str:
  381 + - on Python 2, it returns the same string (bytes=str)
  382 + - on Python 3, the string is decoded using the provided encoding
  383 + (UTF-8 by default) to a unicode str
  384 + :param bytes_string: bytes string to be converted
  385 + :param encoding: codec to be used for decoding
  386 + :return: the string converted to str
  387 + :rtype: str
  388 + """
  389 + if PYTHON2:
  390 + return bytes_string
  391 + else:
  392 + return bytes_string.decode('utf8', errors='replace')
  393 +
  394 +
378 # === LOGGING ================================================================= 395 # === LOGGING =================================================================
379 396
380 def get_logger(name, level=logging.CRITICAL+1): 397 def get_logger(name, level=logging.CRITICAL+1):
@@ -2128,10 +2145,7 @@ def detect_hex_strings(vba_code): @@ -2128,10 +2145,7 @@ def detect_hex_strings(vba_code):
2128 for match in re_hex_string.finditer(vba_code): 2145 for match in re_hex_string.finditer(vba_code):
2129 value = match.group() 2146 value = match.group()
2130 if value not in found: 2147 if value not in found:
2131 - decoded = binascii.unhexlify(value)  
2132 - # On python 3, convert it to unicode  
2133 - if not PYTHON2:  
2134 - decoded = decoded.decode('utf8', errors='replace') 2148 + decoded = bytes2str(binascii.unhexlify(value))
2135 results.append((value, decoded)) 2149 results.append((value, decoded))
2136 found.add(value) 2150 found.add(value)
2137 return results 2151 return results
@@ -2156,10 +2170,7 @@ def detect_base64_strings(vba_code): @@ -2156,10 +2170,7 @@ def detect_base64_strings(vba_code):
2156 # only keep new values and not in the whitelist: 2170 # only keep new values and not in the whitelist:
2157 if value not in found and value.lower() not in BASE64_WHITELIST: 2171 if value not in found and value.lower() not in BASE64_WHITELIST:
2158 try: 2172 try:
2159 - decoded = base64.b64decode(value)  
2160 - # On python 3, convert it to unicode  
2161 - if not PYTHON2:  
2162 - decoded = decoded.decode('utf8', errors='replace') 2173 + decoded = bytes2str(base64.b64decode(value))
2163 results.append((value, decoded)) 2174 results.append((value, decoded))
2164 found.add(value) 2175 found.add(value)
2165 except (TypeError, ValueError) as exc: 2176 except (TypeError, ValueError) as exc:
@@ -2187,10 +2198,7 @@ def detect_dridex_strings(vba_code): @@ -2187,10 +2198,7 @@ def detect_dridex_strings(vba_code):
2187 continue 2198 continue
2188 if value not in found: 2199 if value not in found:
2189 try: 2200 try:
2190 - decoded = DridexUrlDecode(value)  
2191 - # On python 3, convert it to unicode  
2192 - if not PYTHON2:  
2193 - decoded = decoded.decode('utf8', errors='replace') 2201 + decoded = bytes2str(DridexUrlDecode(value))
2194 results.append((value, decoded)) 2202 results.append((value, decoded))
2195 found.add(value) 2203 found.add(value)
2196 except Exception as exc: 2204 except Exception as exc:
@@ -2366,7 +2374,7 @@ class VBA_Scanner(object): @@ -2366,7 +2374,7 @@ class VBA_Scanner(object):
2366 # StrReverse after hex decoding: 2374 # StrReverse after hex decoding:
2367 self.code_hex_rev += '\n' + decoded[::-1] 2375 self.code_hex_rev += '\n' + decoded[::-1]
2368 # StrReverse before hex decoding: 2376 # StrReverse before hex decoding:
2369 - self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1]) 2377 + self.code_rev_hex += '\n' + bytes2str(binascii.unhexlify(encoded[::-1]))
2370 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ 2378 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
2371 #TODO: also append the full code reversed if StrReverse? (risk of false positives?) 2379 #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
2372 # Detect Base64-encoded strings 2380 # Detect Base64-encoded strings
@@ -3494,15 +3502,15 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3494,15 +3502,15 @@ class VBA_Parser_CLI(VBA_Parser):
3494 print('(empty macro)') 3502 print('(empty macro)')
3495 else: 3503 else:
3496 # check if the VBA code contains special characters such as backspace (issue #358) 3504 # check if the VBA code contains special characters such as backspace (issue #358)
3497 - if b'\x08' in vba_code_filtered: 3505 + if '\x08' in vba_code_filtered:
3498 log.warning('The VBA code contains special characters such as backspace, that may be used for obfuscation.') 3506 log.warning('The VBA code contains special characters such as backspace, that may be used for obfuscation.')
3499 if sys.stdout.isatty(): 3507 if sys.stdout.isatty():
3500 # if the standard output is the console, we'll display colors 3508 # if the standard output is the console, we'll display colors
3501 backspace = colorclass.Color(b'{autored}\\x08{/red}') 3509 backspace = colorclass.Color(b'{autored}\\x08{/red}')
3502 else: 3510 else:
3503 - backspace = b'\x08' 3511 + backspace = '\x08'
3504 # replace backspace by "\x08" for display 3512 # replace backspace by "\x08" for display
3505 - vba_code_filtered = vba_code_filtered.replace(b'\x08', backspace) 3513 + vba_code_filtered = vba_code_filtered.replace('\x08', backspace)
3506 try: 3514 try:
3507 # Colorize the interesting keywords in the output: 3515 # Colorize the interesting keywords in the output:
3508 vba_code_filtered = colorclass.Color(self.colorize_keywords(vba_code_filtered)) 3516 vba_code_filtered = colorclass.Color(self.colorize_keywords(vba_code_filtered))