Commit b038d9276d05d9a84445d7ff741c422aa65162da

Authored by decalage2
1 parent fc17c53d

olevba: convert bytes to unicode for Python 3, escape keywords for regex search (issue #106)

Showing 1 changed file with 12 additions and 2 deletions
oletools/olevba.py
... ... @@ -2055,7 +2055,7 @@ def detect_autoexec(vba_code, obfuscation=None):
2055 2055 for keyword in keywords:
2056 2056 #TODO: if keyword is already a compiled regex, use it as-is
2057 2057 # search using regex to detect word boundaries:
2058   - match = re.search(r'(?i)\b' + keyword + r'\b', vba_code)
  2058 + match = re.search(r'(?i)\b' + re.escape(keyword) + r'\b', vba_code)
2059 2059 if match:
2060 2060 #if keyword.lower() in vba_code:
2061 2061 found_keyword = match.group()
... ... @@ -2081,7 +2081,8 @@ def detect_suspicious(vba_code, obfuscation=None):
2081 2081 for description, keywords in SUSPICIOUS_KEYWORDS.items():
2082 2082 for keyword in keywords:
2083 2083 # search using regex to detect word boundaries:
2084   - match = re.search(r'(?i)\b' + keyword + r'\b', vba_code)
  2084 + # note: each keyword must be escaped if it contains special chars such as '\'
  2085 + match = re.search(r'(?i)\b' + re.escape(keyword) + r'\b', vba_code)
2085 2086 if match:
2086 2087 #if keyword.lower() in vba_code:
2087 2088 found_keyword = match.group()
... ... @@ -2128,6 +2129,9 @@ def detect_hex_strings(vba_code):
2128 2129 value = match.group()
2129 2130 if value not in found:
2130 2131 decoded = binascii.unhexlify(value)
  2132 + # On python 3, convert it to unicode
  2133 + if not PYTHON2:
  2134 + decoded = decoded.decode('utf8', errors='replace')
2131 2135 results.append((value, decoded))
2132 2136 found.add(value)
2133 2137 return results
... ... @@ -2153,6 +2157,9 @@ def detect_base64_strings(vba_code):
2153 2157 if value not in found and value.lower() not in BASE64_WHITELIST:
2154 2158 try:
2155 2159 decoded = base64.b64decode(value)
  2160 + # On python 3, convert it to unicode
  2161 + if not PYTHON2:
  2162 + decoded = decoded.decode('utf8', errors='replace')
2156 2163 results.append((value, decoded))
2157 2164 found.add(value)
2158 2165 except (TypeError, ValueError) as exc:
... ... @@ -2181,6 +2188,9 @@ def detect_dridex_strings(vba_code):
2181 2188 if value not in found:
2182 2189 try:
2183 2190 decoded = DridexUrlDecode(value)
  2191 + # On python 3, convert it to unicode
  2192 + if not PYTHON2:
  2193 + decoded = decoded.decode('utf8', errors='replace')
2184 2194 results.append((value, decoded))
2185 2195 found.add(value)
2186 2196 except Exception as exc:
... ...