diff --git a/oletools/olevba3.py b/oletools/olevba3.py index 9fb0e76..b122399 100755 --- a/oletools/olevba3.py +++ b/oletools/olevba3.py @@ -244,15 +244,15 @@ except ImportError: + "see http://codespeak.net/lxml " \ + "or http://effbot.org/zone/element-index.htm") -import thirdparty.olefile as olefile -from thirdparty.prettytable import prettytable -from thirdparty.xglob import xglob, PathNotFoundException -from thirdparty.pyparsing.pyparsing import \ +import oletools.thirdparty.olefile as olefile +from oletools.thirdparty.prettytable import prettytable +from oletools.thirdparty.xglob import xglob, PathNotFoundException +from oletools.thirdparty.pyparsing.pyparsing import \ CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \ Optional, QuotedString,Regex, Suppress, Word, WordStart, \ alphanums, alphas, hexnums,nums, opAssoc, srange, \ infixNotation -import ppt_parser +import oletools.ppt_parser as ppt_parser # monkeypatch email to fix issue #32: # allow header lines without ":" @@ -1774,7 +1774,7 @@ def detect_hex_strings(vba_code): value = match.group() if value not in found: decoded = binascii.unhexlify(value) - results.append((value, decoded)) + results.append((value, decoded.decode('utf-8','replace'))) found.add(value) return results @@ -1799,7 +1799,7 @@ def detect_base64_strings(vba_code): if value not in found and value.lower() not in BASE64_WHITELIST: try: decoded = base64.b64decode(value) - results.append((value, decoded)) + results.append((value, decoded.decode('utf-8','replace'))) found.add(value) except (TypeError, ValueError) as exc: log.debug('Failed to base64-decode (%s)' % exc) @@ -1814,7 +1814,7 @@ def detect_dridex_strings(vba_code): :param vba_code: str, VBA source code :return: list of str tuples (encoded string, decoded string) """ - from thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode + from oletools.thirdparty.DridexUrlDecoder.DridexUrlDecoder import DridexUrlDecode results = [] found = set() @@ -1959,10 +1959,10 @@ class VBA_Scanner(object): """ # join long lines ending with " _": self.code = vba_collapse_long_lines(vba_code) - self.code_hex = b'' - self.code_hex_rev = b'' - self.code_rev_hex = b'' - self.code_base64 = b'' + self.code_hex = '' + self.code_hex_rev = '' + self.code_rev_hex = '' + self.code_base64 = '' self.code_dridex = '' self.code_vba = '' self.strReverse = None @@ -1995,19 +1995,19 @@ class VBA_Scanner(object): if 'strreverse' in self.code.lower(): self.strReverse = True # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: for encoded, decoded in self.hex_strings: - self.code_hex += b'\n' + decoded + self.code_hex += '\n' + decoded # if the code contains "StrReverse", also append the hex strings in reverse order: if self.strReverse: # StrReverse after hex decoding: - self.code_hex_rev += b'\n' + decoded[::-1] + self.code_hex_rev += '\n' + decoded[::-1] # StrReverse before hex decoding: - self.code_rev_hex += b'\n' + binascii.unhexlify(encoded[::-1]) + self.code_rev_hex += '\n' + str(binascii.unhexlify(encoded[::-1])) #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ #TODO: also append the full code reversed if StrReverse? (risk of false positives?) # Detect Base64-encoded strings self.base64_strings = detect_base64_strings(self.code) for encoded, decoded in self.base64_strings: - self.code_base64 += b'\n' + decoded + self.code_base64 += '\n' + decoded # Detect Dridex-encoded strings self.dridex_strings = detect_dridex_strings(self.code) for encoded, decoded in self.dridex_strings: @@ -2026,10 +2026,10 @@ class VBA_Scanner(object): for code, obfuscation in ( (self.code, None), - (self.code_hex.decode('utf-8','replace'), 'Hex'), + (self.code_hex, 'Hex'), (self.code_hex_rev, 'Hex+StrReverse'), (self.code_rev_hex, 'StrReverse+Hex'), - (self.code_base64.decode('utf-8', 'replace'), 'Base64'), + (self.code_base64, 'Base64'), (self.code_dridex, 'Dridex'), (self.code_vba, 'VBA expression'), ): diff --git a/oletools/ppt_parser.py b/oletools/ppt_parser.py index e89a037..a36867d 100644 --- a/oletools/ppt_parser.py +++ b/oletools/ppt_parser.py @@ -37,7 +37,7 @@ import struct import traceback import os -import thirdparty.olefile as olefile +import oletools.thirdparty.olefile as olefile import zlib diff --git a/setup.py b/setup.py index 50ca086..8062eee 100755 --- a/setup.py +++ b/setup.py @@ -287,11 +287,11 @@ entry_points = { def main(): # TODO: remove this test once all tools are ported to Python 3 # TODO: warning about Python 2.6 - if sys.version >= '3.0': - s = "Sorry, %s %s requires Python 2.x." - print(s % (name, version)) - sys.exit(1) -## if sys.version < required_python_version: +# if sys.version >= '3.0': +# s = "Sorry, %s %s requires Python 2.x." +# print(s % (name, version)) +# sys.exit(1) +# ## if sys.version < required_python_version: ## s = "I'm sorry, but %s %s requires Python %s or later." ## print(s % (name, version, required_python_version)) ## sys.exit(1)