Commit b038d9276d05d9a84445d7ff741c422aa65162da
1 parent
fc17c53d
olevba: convert bytes to unicode for Python 3, escape keywords for regex search (issue #106)
Showing
1 changed file
with
12 additions
and
2 deletions
oletools/olevba.py
| ... | ... | @@ -2055,7 +2055,7 @@ def detect_autoexec(vba_code, obfuscation=None): |
| 2055 | 2055 | for keyword in keywords: |
| 2056 | 2056 | #TODO: if keyword is already a compiled regex, use it as-is |
| 2057 | 2057 | # search using regex to detect word boundaries: |
| 2058 | - match = re.search(r'(?i)\b' + keyword + r'\b', vba_code) | |
| 2058 | + match = re.search(r'(?i)\b' + re.escape(keyword) + r'\b', vba_code) | |
| 2059 | 2059 | if match: |
| 2060 | 2060 | #if keyword.lower() in vba_code: |
| 2061 | 2061 | found_keyword = match.group() |
| ... | ... | @@ -2081,7 +2081,8 @@ def detect_suspicious(vba_code, obfuscation=None): |
| 2081 | 2081 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): |
| 2082 | 2082 | for keyword in keywords: |
| 2083 | 2083 | # search using regex to detect word boundaries: |
| 2084 | - match = re.search(r'(?i)\b' + keyword + r'\b', vba_code) | |
| 2084 | + # note: each keyword must be escaped if it contains special chars such as '\' | |
| 2085 | + match = re.search(r'(?i)\b' + re.escape(keyword) + r'\b', vba_code) | |
| 2085 | 2086 | if match: |
| 2086 | 2087 | #if keyword.lower() in vba_code: |
| 2087 | 2088 | found_keyword = match.group() |
| ... | ... | @@ -2128,6 +2129,9 @@ def detect_hex_strings(vba_code): |
| 2128 | 2129 | value = match.group() |
| 2129 | 2130 | if value not in found: |
| 2130 | 2131 | decoded = binascii.unhexlify(value) |
| 2132 | + # On python 3, convert it to unicode | |
| 2133 | + if not PYTHON2: | |
| 2134 | + decoded = decoded.decode('utf8', errors='replace') | |
| 2131 | 2135 | results.append((value, decoded)) |
| 2132 | 2136 | found.add(value) |
| 2133 | 2137 | return results |
| ... | ... | @@ -2153,6 +2157,9 @@ def detect_base64_strings(vba_code): |
| 2153 | 2157 | if value not in found and value.lower() not in BASE64_WHITELIST: |
| 2154 | 2158 | try: |
| 2155 | 2159 | decoded = base64.b64decode(value) |
| 2160 | + # On python 3, convert it to unicode | |
| 2161 | + if not PYTHON2: | |
| 2162 | + decoded = decoded.decode('utf8', errors='replace') | |
| 2156 | 2163 | results.append((value, decoded)) |
| 2157 | 2164 | found.add(value) |
| 2158 | 2165 | except (TypeError, ValueError) as exc: |
| ... | ... | @@ -2181,6 +2188,9 @@ def detect_dridex_strings(vba_code): |
| 2181 | 2188 | if value not in found: |
| 2182 | 2189 | try: |
| 2183 | 2190 | decoded = DridexUrlDecode(value) |
| 2191 | + # On python 3, convert it to unicode | |
| 2192 | + if not PYTHON2: | |
| 2193 | + decoded = decoded.decode('utf8', errors='replace') | |
| 2184 | 2194 | results.append((value, decoded)) |
| 2185 | 2195 | found.add(value) |
| 2186 | 2196 | except Exception as exc: | ... | ... |