Commit 1541d5dedc31c06d3adbf382b51c5e4794fb711e
Committed by
GitHub
Merge pull request #64 from sebdraven/master
Conversion of all oletools to Python 3.5 (temporarily breaking compatibility with Python 2.7)
Showing
8 changed files
with
119 additions
and
109 deletions
oletools/mraptor.py
| ... | ... | @@ -233,16 +233,16 @@ def main(): |
| 233 | 233 | |
| 234 | 234 | # Print help if no arguments are passed |
| 235 | 235 | if len(args) == 0: |
| 236 | - print __doc__ | |
| 236 | + print(__doc__) | |
| 237 | 237 | parser.print_help() |
| 238 | - print '\nAn exit code is returned based on the analysis result:' | |
| 238 | + print('\nAn exit code is returned based on the analysis result:') | |
| 239 | 239 | for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious): |
| 240 | - print ' - %d: %s' % (result.exit_code, result.name) | |
| 240 | + print(' - %d: %s' % (result.exit_code, result.name)) | |
| 241 | 241 | sys.exit() |
| 242 | 242 | |
| 243 | 243 | # print banner with version |
| 244 | - print 'MacroRaptor %s - http://decalage.info/python/oletools' % __version__ | |
| 245 | - print 'This is work in progress, please report issues at %s' % URL_ISSUES | |
| 244 | + print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__) | |
| 245 | + print('This is work in progress, please report issues at %s' % URL_ISSUES) | |
| 246 | 246 | |
| 247 | 247 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 248 | 248 | # enable logging in the modules: |
| ... | ... | @@ -292,7 +292,7 @@ def main(): |
| 292 | 292 | vba_code_all_modules = '' |
| 293 | 293 | try: |
| 294 | 294 | for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros(): |
| 295 | - vba_code_all_modules += vba_code + '\n' | |
| 295 | + vba_code_all_modules += vba_code.decode('utf-8','replace') + '\n' | |
| 296 | 296 | except Exception as e: |
| 297 | 297 | # log.error('Error when parsing VBA macros from file %r' % full_name) |
| 298 | 298 | result = Result_Error |
| ... | ... | @@ -319,9 +319,9 @@ def main(): |
| 319 | 319 | global_result = result |
| 320 | 320 | exitcode = result.exit_code |
| 321 | 321 | |
| 322 | - print '' | |
| 323 | - print 'Flags: A=AutoExec, W=Write, X=Execute' | |
| 324 | - print 'Exit code: %d - %s' % (exitcode, global_result.name) | |
| 322 | + print('') | |
| 323 | + print('Flags: A=AutoExec, W=Write, X=Execute') | |
| 324 | + print('Exit code: %d - %s' % (exitcode, global_result.name)) | |
| 325 | 325 | sys.exit(exitcode) |
| 326 | 326 | |
| 327 | 327 | if __name__ == '__main__': | ... | ... |
oletools/olemap.py
| ... | ... | @@ -90,14 +90,14 @@ FAT_COLORS = { |
| 90 | 90 | |
| 91 | 91 | if __name__ == '__main__': |
| 92 | 92 | # print banner with version |
| 93 | - print 'olemap %s - http://decalage.info/python/oletools' % __version__ | |
| 93 | + print('olemap %s - http://decalage.info/python/oletools' % __version__) | |
| 94 | 94 | |
| 95 | 95 | fname = sys.argv[1] |
| 96 | 96 | ole = olefile.OleFileIO(fname) |
| 97 | 97 | |
| 98 | - print 'FAT:' | |
| 98 | + print('FAT:') | |
| 99 | 99 | t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #']) |
| 100 | - for i in xrange(ole.nb_sect): | |
| 100 | + for i in range(ole.nb_sect): | |
| 101 | 101 | fat_value = ole.fat[i] |
| 102 | 102 | fat_type = FAT_TYPES.get(fat_value, '<Data>') |
| 103 | 103 | color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default']) |
| ... | ... | @@ -106,15 +106,15 @@ if __name__ == '__main__': |
| 106 | 106 | # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) |
| 107 | 107 | t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value], |
| 108 | 108 | colors=[None, color_type, None, None]) |
| 109 | - print '' | |
| 109 | + print('') | |
| 110 | 110 | |
| 111 | - print 'MiniFAT:' | |
| 111 | + print('MiniFAT:') | |
| 112 | 112 | # load MiniFAT if it wasn't already done: |
| 113 | 113 | ole.loadminifat() |
| 114 | - for i in xrange(len(ole.minifat)): | |
| 114 | + for i in range(len(ole.minifat)): | |
| 115 | 115 | fat_value = ole.minifat[i] |
| 116 | 116 | fat_type = FAT_TYPES.get(fat_value, 'Data') |
| 117 | - print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) | |
| 117 | + print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)) | |
| 118 | 118 | |
| 119 | 119 | ole.close() |
| 120 | 120 | ... | ... |
oletools/oletimes.py
| ... | ... | @@ -94,6 +94,6 @@ for obj in ole.listdir(streams=True, storages=True): |
| 94 | 94 | #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj)) |
| 95 | 95 | t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj)))) |
| 96 | 96 | |
| 97 | -print t | |
| 97 | +print(t) | |
| 98 | 98 | |
| 99 | 99 | ole.close() | ... | ... |
oletools/olevba.py
| ... | ... | @@ -215,7 +215,7 @@ __version__ = '0.50' |
| 215 | 215 | |
| 216 | 216 | import sys, logging |
| 217 | 217 | import struct |
| 218 | -import cStringIO | |
| 218 | +from _io import StringIO,BytesIO | |
| 219 | 219 | import math |
| 220 | 220 | import zipfile |
| 221 | 221 | import re |
| ... | ... | @@ -240,9 +240,9 @@ except ImportError: |
| 240 | 240 | # Python <2.5: standalone ElementTree install |
| 241 | 241 | import elementtree.cElementTree as ET |
| 242 | 242 | except ImportError: |
| 243 | - raise ImportError, "lxml or ElementTree are not installed, " \ | |
| 243 | + raise(ImportError, "lxml or ElementTree are not installed, " \ | |
| 244 | 244 | + "see http://codespeak.net/lxml " \ |
| 245 | - + "or http://effbot.org/zone/element-index.htm" | |
| 245 | + + "or http://effbot.org/zone/element-index.htm") | |
| 246 | 246 | |
| 247 | 247 | import thirdparty.olefile as olefile |
| 248 | 248 | from thirdparty.prettytable import prettytable |
| ... | ... | @@ -421,7 +421,7 @@ TYPE2TAG = { |
| 421 | 421 | |
| 422 | 422 | |
| 423 | 423 | # MSO files ActiveMime header magic |
| 424 | -MSO_ACTIVEMIME_HEADER = 'ActiveMime' | |
| 424 | +MSO_ACTIVEMIME_HEADER = b'ActiveMime' | |
| 425 | 425 | |
| 426 | 426 | MODULE_EXTENSION = "bas" |
| 427 | 427 | CLASS_EXTENSION = "cls" |
| ... | ... | @@ -630,7 +630,7 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') |
| 630 | 630 | re_nothex_check = re.compile(r'[G-Zg-z]') |
| 631 | 631 | |
| 632 | 632 | # regex to extract printable strings (at least 5 chars) from VBA Forms: |
| 633 | -re_printable_string = re.compile(r'[\t\r\n\x20-\xFF]{5,}') | |
| 633 | +re_printable_string = re.compile(rb'[\t\r\n\x20-\xFF]{5,}') | |
| 634 | 634 | |
| 635 | 635 | |
| 636 | 636 | # === PARTIAL VBA GRAMMAR ==================================================== |
| ... | ... | @@ -1060,10 +1060,10 @@ def decompress_stream(compressed_container): |
| 1060 | 1060 | # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the |
| 1061 | 1061 | # DecompressedBuffer (section 2.4.1.1.2). |
| 1062 | 1062 | |
| 1063 | - decompressed_container = '' # result | |
| 1063 | + decompressed_container = b'' # result | |
| 1064 | 1064 | compressed_current = 0 |
| 1065 | 1065 | |
| 1066 | - sig_byte = ord(compressed_container[compressed_current]) | |
| 1066 | + sig_byte = compressed_container[compressed_current] | |
| 1067 | 1067 | if sig_byte != 0x01: |
| 1068 | 1068 | raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) |
| 1069 | 1069 | |
| ... | ... | @@ -1109,7 +1109,7 @@ def decompress_stream(compressed_container): |
| 1109 | 1109 | # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk |
| 1110 | 1110 | # uncompressed chunk: read the next 4096 bytes as-is |
| 1111 | 1111 | #TODO: check if there are at least 4096 bytes left |
| 1112 | - decompressed_container += compressed_container[compressed_current:compressed_current + 4096] | |
| 1112 | + decompressed_container += bytes([compressed_container[compressed_current:compressed_current + 4096]]) | |
| 1113 | 1113 | compressed_current += 4096 |
| 1114 | 1114 | else: |
| 1115 | 1115 | # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk |
| ... | ... | @@ -1120,9 +1120,9 @@ def decompress_stream(compressed_container): |
| 1120 | 1120 | # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) |
| 1121 | 1121 | # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or |
| 1122 | 1122 | # copy tokens (reference to a previous literal token) |
| 1123 | - flag_byte = ord(compressed_container[compressed_current]) | |
| 1123 | + flag_byte = compressed_container[compressed_current] | |
| 1124 | 1124 | compressed_current += 1 |
| 1125 | - for bit_index in xrange(0, 8): | |
| 1125 | + for bit_index in range(0, 8): | |
| 1126 | 1126 | # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) |
| 1127 | 1127 | if compressed_current >= compressed_end: |
| 1128 | 1128 | break |
| ... | ... | @@ -1132,7 +1132,7 @@ def decompress_stream(compressed_container): |
| 1132 | 1132 | #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) |
| 1133 | 1133 | if flag_bit == 0: # LiteralToken |
| 1134 | 1134 | # copy one byte directly to output |
| 1135 | - decompressed_container += compressed_container[compressed_current] | |
| 1135 | + decompressed_container += bytes([compressed_container[compressed_current]]) | |
| 1136 | 1136 | compressed_current += 1 |
| 1137 | 1137 | else: # CopyToken |
| 1138 | 1138 | # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken |
| ... | ... | @@ -1147,8 +1147,8 @@ def decompress_stream(compressed_container): |
| 1147 | 1147 | offset = (temp1 >> temp2) + 1 |
| 1148 | 1148 | #log.debug('offset=%d length=%d' % (offset, length)) |
| 1149 | 1149 | copy_source = len(decompressed_container) - offset |
| 1150 | - for index in xrange(copy_source, copy_source + length): | |
| 1151 | - decompressed_container += decompressed_container[index] | |
| 1150 | + for index in range(copy_source, copy_source + length): | |
| 1151 | + decompressed_container += bytes([decompressed_container[index]]) | |
| 1152 | 1152 | compressed_current += 2 |
| 1153 | 1153 | return decompressed_container |
| 1154 | 1154 | |
| ... | ... | @@ -1191,7 +1191,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1191 | 1191 | code_modules = {} |
| 1192 | 1192 | |
| 1193 | 1193 | for line in project: |
| 1194 | - line = line.strip() | |
| 1194 | + line = line.strip().decode('utf-8','ignore') | |
| 1195 | 1195 | if '=' in line: |
| 1196 | 1196 | # split line at the 1st equal sign: |
| 1197 | 1197 | name, value = line.split('=', 1) |
| ... | ... | @@ -1222,7 +1222,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1222 | 1222 | else: |
| 1223 | 1223 | raise UnexpectedDataError(dir_path, name, expected, value) |
| 1224 | 1224 | |
| 1225 | - dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed)) | |
| 1225 | + dir_stream = BytesIO(decompress_stream(dir_compressed)) | |
| 1226 | 1226 | |
| 1227 | 1227 | # PROJECTSYSKIND Record |
| 1228 | 1228 | projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] |
| ... | ... | @@ -1484,7 +1484,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1484 | 1484 | uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') |
| 1485 | 1485 | |
| 1486 | 1486 | log.debug("parsing {0} modules".format(projectmodules_count)) |
| 1487 | - for projectmodule_index in xrange(0, projectmodules_count): | |
| 1487 | + for projectmodule_index in range(0, projectmodules_count): | |
| 1488 | 1488 | try: |
| 1489 | 1489 | modulename_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1490 | 1490 | check_value('MODULENAME_Id', 0x0019, modulename_id) |
| ... | ... | @@ -1881,19 +1881,19 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): |
| 1881 | 1881 | pass |
| 1882 | 1882 | elif isinstance(json_obj, str): |
| 1883 | 1883 | # de-code and re-encode |
| 1884 | - dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) | |
| 1884 | + dencoded = json_obj | |
| 1885 | 1885 | if dencoded != json_obj: |
| 1886 | 1886 | log.debug('json2ascii: replaced: {0} (len {1})' |
| 1887 | 1887 | .format(json_obj, len(json_obj))) |
| 1888 | 1888 | log.debug('json2ascii: with: {0} (len {1})' |
| 1889 | 1889 | .format(dencoded, len(dencoded))) |
| 1890 | 1890 | return dencoded |
| 1891 | - elif isinstance(json_obj, unicode): | |
| 1891 | + elif isinstance(json_obj, bytes): | |
| 1892 | 1892 | log.debug('json2ascii: encode unicode: {0}' |
| 1893 | - .format(json_obj.encode(encoding, errors))) | |
| 1893 | + .format(json_obj.decode(encoding, errors))) | |
| 1894 | 1894 | # cannot put original into logger |
| 1895 | 1895 | # print 'original: ' json_obj |
| 1896 | - return json_obj.encode(encoding, errors) | |
| 1896 | + return json_obj.decode(encoding, errors) | |
| 1897 | 1897 | elif isinstance(json_obj, dict): |
| 1898 | 1898 | for key in json_obj: |
| 1899 | 1899 | json_obj[key] = json2ascii(json_obj[key]) |
| ... | ... | @@ -1931,18 +1931,18 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts): |
| 1931 | 1931 | json_dict = json_parts |
| 1932 | 1932 | |
| 1933 | 1933 | if not _have_printed_json_start: |
| 1934 | - print '[' | |
| 1934 | + print('[') | |
| 1935 | 1935 | _have_printed_json_start = True |
| 1936 | 1936 | |
| 1937 | 1937 | lines = json.dumps(json2ascii(json_dict), check_circular=False, |
| 1938 | 1938 | indent=4, ensure_ascii=False).splitlines() |
| 1939 | 1939 | for line in lines[:-1]: |
| 1940 | - print ' {0}'.format(line) | |
| 1940 | + print(' {0}'.format(line)) | |
| 1941 | 1941 | if _json_is_last: |
| 1942 | - print ' {0}'.format(lines[-1]) # print last line without comma | |
| 1943 | - print ']' | |
| 1942 | + print(' {0}'.format(lines[-1])) # print last line without comma | |
| 1943 | + print(']') | |
| 1944 | 1944 | else: |
| 1945 | - print ' {0},'.format(lines[-1]) # print last line with comma | |
| 1945 | + print(' {0},'.format(lines[-1])) # print last line with comma | |
| 1946 | 1946 | |
| 1947 | 1947 | |
| 1948 | 1948 | class VBA_Scanner(object): |
| ... | ... | @@ -1959,10 +1959,10 @@ class VBA_Scanner(object): |
| 1959 | 1959 | """ |
| 1960 | 1960 | # join long lines ending with " _": |
| 1961 | 1961 | self.code = vba_collapse_long_lines(vba_code) |
| 1962 | - self.code_hex = '' | |
| 1963 | - self.code_hex_rev = '' | |
| 1964 | - self.code_rev_hex = '' | |
| 1965 | - self.code_base64 = '' | |
| 1962 | + self.code_hex = b'' | |
| 1963 | + self.code_hex_rev = b'' | |
| 1964 | + self.code_rev_hex = b'' | |
| 1965 | + self.code_base64 = b'' | |
| 1966 | 1966 | self.code_dridex = '' |
| 1967 | 1967 | self.code_vba = '' |
| 1968 | 1968 | self.strReverse = None |
| ... | ... | @@ -1995,19 +1995,19 @@ class VBA_Scanner(object): |
| 1995 | 1995 | if 'strreverse' in self.code.lower(): self.strReverse = True |
| 1996 | 1996 | # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: |
| 1997 | 1997 | for encoded, decoded in self.hex_strings: |
| 1998 | - self.code_hex += '\n' + decoded | |
| 1998 | + self.code_hex += b'\n' + decoded | |
| 1999 | 1999 | # if the code contains "StrReverse", also append the hex strings in reverse order: |
| 2000 | 2000 | if self.strReverse: |
| 2001 | 2001 | # StrReverse after hex decoding: |
| 2002 | - self.code_hex_rev += '\n' + decoded[::-1] | |
| 2002 | + self.code_hex_rev += b'\n' + decoded[::-1] | |
| 2003 | 2003 | # StrReverse before hex decoding: |
| 2004 | - self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1]) | |
| 2004 | + self.code_rev_hex += b'\n' + binascii.unhexlify(encoded[::-1]) | |
| 2005 | 2005 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 2006 | 2006 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 2007 | 2007 | # Detect Base64-encoded strings |
| 2008 | 2008 | self.base64_strings = detect_base64_strings(self.code) |
| 2009 | 2009 | for encoded, decoded in self.base64_strings: |
| 2010 | - self.code_base64 += '\n' + decoded | |
| 2010 | + self.code_base64 += b'\n' + decoded | |
| 2011 | 2011 | # Detect Dridex-encoded strings |
| 2012 | 2012 | self.dridex_strings = detect_dridex_strings(self.code) |
| 2013 | 2013 | for encoded, decoded in self.dridex_strings: |
| ... | ... | @@ -2026,13 +2026,15 @@ class VBA_Scanner(object): |
| 2026 | 2026 | |
| 2027 | 2027 | for code, obfuscation in ( |
| 2028 | 2028 | (self.code, None), |
| 2029 | - (self.code_hex, 'Hex'), | |
| 2029 | + (self.code_hex.decode('utf-8','replace'), 'Hex'), | |
| 2030 | 2030 | (self.code_hex_rev, 'Hex+StrReverse'), |
| 2031 | 2031 | (self.code_rev_hex, 'StrReverse+Hex'), |
| 2032 | - (self.code_base64, 'Base64'), | |
| 2032 | + (self.code_base64.decode('utf-8', 'replace'), 'Base64'), | |
| 2033 | 2033 | (self.code_dridex, 'Dridex'), |
| 2034 | 2034 | (self.code_vba, 'VBA expression'), |
| 2035 | 2035 | ): |
| 2036 | + if isinstance(code,bytes): | |
| 2037 | + code=code.decode('utf-8','replace') | |
| 2036 | 2038 | self.autoexec_keywords += detect_autoexec(code, obfuscation) |
| 2037 | 2039 | self.suspicious_keywords += detect_suspicious(code, obfuscation) |
| 2038 | 2040 | self.iocs += detect_patterns(code, obfuscation) |
| ... | ... | @@ -2158,7 +2160,7 @@ class VBA_Parser(object): |
| 2158 | 2160 | _file = filename |
| 2159 | 2161 | else: |
| 2160 | 2162 | # file already read in memory, make it a file-like object for zipfile: |
| 2161 | - _file = cStringIO.StringIO(data) | |
| 2163 | + _file = BytesIO(data) | |
| 2162 | 2164 | #self.file = _file |
| 2163 | 2165 | self.ole_file = None |
| 2164 | 2166 | self.ole_subfiles = [] |
| ... | ... | @@ -2207,7 +2209,7 @@ class VBA_Parser(object): |
| 2207 | 2209 | if data is None: |
| 2208 | 2210 | data = open(filename, 'rb').read() |
| 2209 | 2211 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 2210 | - if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | |
| 2212 | + if b'http://schemas.microsoft.com/office/word/2003/wordml' in data: | |
| 2211 | 2213 | self.open_word2003xml(data) |
| 2212 | 2214 | # store a lowercase version for the next tests: |
| 2213 | 2215 | data_lowercase = data.lower() |
| ... | ... | @@ -2217,14 +2219,14 @@ class VBA_Parser(object): |
| 2217 | 2219 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. |
| 2218 | 2220 | # And the line is case insensitive. |
| 2219 | 2221 | # so we'll just check the presence of mime, version and multipart anywhere: |
| 2220 | - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \ | |
| 2221 | - and 'multipart' in data_lowercase: | |
| 2222 | + if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \ | |
| 2223 | + and b'multipart' in data_lowercase: | |
| 2222 | 2224 | self.open_mht(data) |
| 2223 | 2225 | #TODO: handle exceptions |
| 2224 | 2226 | #TODO: Excel 2003 XML |
| 2225 | 2227 | # Check if this is a plain text VBA or VBScript file: |
| 2226 | 2228 | # To avoid scanning binary files, we simply check for some control chars: |
| 2227 | - if self.type is None and '\x00' not in data: | |
| 2229 | + if self.type is None and b'\x00' not in data: | |
| 2228 | 2230 | self.open_text(data) |
| 2229 | 2231 | if self.type is None: |
| 2230 | 2232 | # At this stage, could not match a known format: |
| ... | ... | @@ -2358,6 +2360,8 @@ class VBA_Parser(object): |
| 2358 | 2360 | """ |
| 2359 | 2361 | log.info('Opening MHTML file %s' % self.filename) |
| 2360 | 2362 | try: |
| 2363 | + if isinstance(data,bytes): | |
| 2364 | + data = data.decode('utf8', 'replace') | |
| 2361 | 2365 | # parse the MIME content |
| 2362 | 2366 | # remove any leading whitespace or newline (workaround for issue in email package) |
| 2363 | 2367 | stripped_data = data.lstrip('\r\n\t ') |
| ... | ... | @@ -2387,7 +2391,8 @@ class VBA_Parser(object): |
| 2387 | 2391 | # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. |
| 2388 | 2392 | # decompress the zlib data starting at offset 0x32, which is the OLE container: |
| 2389 | 2393 | # check ActiveMime header: |
| 2390 | - if isinstance(part_data, str) and is_mso_file(part_data): | |
| 2394 | + | |
| 2395 | + if (isinstance(part_data, str) or isinstance(part_data, bytes)) and is_mso_file(part_data): | |
| 2391 | 2396 | log.debug('Found ActiveMime header, decompressing MSO container') |
| 2392 | 2397 | try: |
| 2393 | 2398 | ole_data = mso_file_extract(part_data) |
| ... | ... | @@ -2458,6 +2463,8 @@ class VBA_Parser(object): |
| 2458 | 2463 | """ |
| 2459 | 2464 | log.info('Opening text file %s' % self.filename) |
| 2460 | 2465 | # directly store the source code: |
| 2466 | + if isinstance(data,bytes): | |
| 2467 | + data=data.decode('utf8','replace') | |
| 2461 | 2468 | self.vba_code_all_modules = data |
| 2462 | 2469 | self.contains_macros = True |
| 2463 | 2470 | # set type only if parsing succeeds |
| ... | ... | @@ -2596,7 +2603,7 @@ class VBA_Parser(object): |
| 2596 | 2603 | # Also look for VBA code in any stream including orphans |
| 2597 | 2604 | # (happens in some malformed files) |
| 2598 | 2605 | ole = self.ole_file |
| 2599 | - for sid in xrange(len(ole.direntries)): | |
| 2606 | + for sid in range(len(ole.direntries)): | |
| 2600 | 2607 | # check if id is already done above: |
| 2601 | 2608 | log.debug('Checking DirEntry #%d' % sid) |
| 2602 | 2609 | d = ole.direntries[sid] |
| ... | ... | @@ -2614,7 +2621,7 @@ class VBA_Parser(object): |
| 2614 | 2621 | log.debug('%r...[much more data]...%r' % (data[:100], data[-50:])) |
| 2615 | 2622 | else: |
| 2616 | 2623 | log.debug(repr(data)) |
| 2617 | - if 'Attribut' in data: | |
| 2624 | + if 'Attribut' in data.decode('utf-8','ignore'): | |
| 2618 | 2625 | log.debug('Found VBA compressed code') |
| 2619 | 2626 | self.contains_macros = True |
| 2620 | 2627 | except IOError as exc: |
| ... | ... | @@ -2662,7 +2669,7 @@ class VBA_Parser(object): |
| 2662 | 2669 | # Also look for VBA code in any stream including orphans |
| 2663 | 2670 | # (happens in some malformed files) |
| 2664 | 2671 | ole = self.ole_file |
| 2665 | - for sid in xrange(len(ole.direntries)): | |
| 2672 | + for sid in range(len(ole.direntries)): | |
| 2666 | 2673 | # check if id is already done above: |
| 2667 | 2674 | log.debug('Checking DirEntry #%d' % sid) |
| 2668 | 2675 | if sid in vba_stream_ids: |
| ... | ... | @@ -2677,7 +2684,7 @@ class VBA_Parser(object): |
| 2677 | 2684 | # read data |
| 2678 | 2685 | log.debug('Reading data from stream %r' % d.name) |
| 2679 | 2686 | data = ole._open(d.isectStart, d.size).read() |
| 2680 | - for match in re.finditer(r'\x00Attribut[^e]', data, flags=re.IGNORECASE): | |
| 2687 | + for match in re.finditer(rb'\x00Attribut[^e]', data, flags=re.IGNORECASE): | |
| 2681 | 2688 | start = match.start() - 3 |
| 2682 | 2689 | log.debug('Found VBA compressed code at index %X' % start) |
| 2683 | 2690 | compressed_code = data[start:] |
| ... | ... | @@ -2720,9 +2727,9 @@ class VBA_Parser(object): |
| 2720 | 2727 | self.vba_code_all_modules = '' |
| 2721 | 2728 | for (_, _, _, vba_code) in self.extract_all_macros(): |
| 2722 | 2729 | #TODO: filter code? (each module) |
| 2723 | - self.vba_code_all_modules += vba_code + '\n' | |
| 2730 | + self.vba_code_all_modules += vba_code.decode('utf-8', 'ignore') + '\n' | |
| 2724 | 2731 | for (_, _, form_string) in self.extract_form_strings(): |
| 2725 | - self.vba_code_all_modules += form_string + '\n' | |
| 2732 | + self.vba_code_all_modules += form_string.decode('utf-8', 'ignore') + '\n' | |
| 2726 | 2733 | # Analyze the whole code at once: |
| 2727 | 2734 | scanner = VBA_Scanner(self.vba_code_all_modules) |
| 2728 | 2735 | self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate) |
| ... | ... | @@ -2897,7 +2904,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2897 | 2904 | """ |
| 2898 | 2905 | # print a waiting message only if the output is not redirected to a file: |
| 2899 | 2906 | if sys.stdout.isatty(): |
| 2900 | - print 'Analysis...\r', | |
| 2907 | + print('Analysis...\r') | |
| 2901 | 2908 | sys.stdout.flush() |
| 2902 | 2909 | results = self.analyze_macros(show_decoded_strings, deobfuscate) |
| 2903 | 2910 | if results: |
| ... | ... | @@ -2913,9 +2920,9 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2913 | 2920 | if not is_printable(description): |
| 2914 | 2921 | description = repr(description) |
| 2915 | 2922 | t.add_row((kw_type, keyword, description)) |
| 2916 | - print t | |
| 2923 | + print(t) | |
| 2917 | 2924 | else: |
| 2918 | - print 'No suspicious keyword or IOC found.' | |
| 2925 | + print('No suspicious keyword or IOC found.') | |
| 2919 | 2926 | |
| 2920 | 2927 | def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False): |
| 2921 | 2928 | """ |
| ... | ... | @@ -2929,7 +2936,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2929 | 2936 | """ |
| 2930 | 2937 | # print a waiting message only if the output is not redirected to a file: |
| 2931 | 2938 | if sys.stdout.isatty(): |
| 2932 | - print 'Analysis...\r', | |
| 2939 | + print('Analysis...\r') | |
| 2933 | 2940 | sys.stdout.flush() |
| 2934 | 2941 | return [dict(type=kw_type, keyword=keyword, description=description) |
| 2935 | 2942 | for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)] |
| ... | ... | @@ -2958,42 +2965,44 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2958 | 2965 | display_filename = '%s in %s' % (self.filename, self.container) |
| 2959 | 2966 | else: |
| 2960 | 2967 | display_filename = self.filename |
| 2961 | - print '=' * 79 | |
| 2962 | - print 'FILE:', display_filename | |
| 2968 | + print('=' * 79) | |
| 2969 | + print('FILE:', display_filename) | |
| 2963 | 2970 | try: |
| 2964 | 2971 | #TODO: handle olefile errors, when an OLE file is malformed |
| 2965 | - print 'Type:', self.type | |
| 2972 | + print('Type: %s' % self.type) | |
| 2966 | 2973 | if self.detect_vba_macros(): |
| 2967 | 2974 | #print 'Contains VBA Macros:' |
| 2968 | 2975 | for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): |
| 2969 | 2976 | if hide_attributes: |
| 2970 | 2977 | # hide attribute lines: |
| 2978 | + if isinstance(vba_code,bytes): | |
| 2979 | + vba_code =vba_code.decode('utf-8','replace') | |
| 2971 | 2980 | vba_code_filtered = filter_vba(vba_code) |
| 2972 | 2981 | else: |
| 2973 | 2982 | vba_code_filtered = vba_code |
| 2974 | - print '-' * 79 | |
| 2975 | - print 'VBA MACRO %s ' % vba_filename | |
| 2976 | - print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) | |
| 2983 | + print('-' * 79) | |
| 2984 | + print('VBA MACRO %s ' % vba_filename) | |
| 2985 | + print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))) | |
| 2977 | 2986 | if display_code: |
| 2978 | - print '- ' * 39 | |
| 2987 | + print('- ' * 39) | |
| 2979 | 2988 | # detect empty macros: |
| 2980 | 2989 | if vba_code_filtered.strip() == '': |
| 2981 | - print '(empty macro)' | |
| 2990 | + print('(empty macro)') | |
| 2982 | 2991 | else: |
| 2983 | - print vba_code_filtered | |
| 2992 | + print(vba_code_filtered) | |
| 2984 | 2993 | for (subfilename, stream_path, form_string) in self.extract_form_strings(): |
| 2985 | - print '-' * 79 | |
| 2986 | - print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path) | |
| 2987 | - print '- ' * 39 | |
| 2988 | - print form_string | |
| 2994 | + print('-' * 79) | |
| 2995 | + print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) | |
| 2996 | + print('- ' * 39) | |
| 2997 | + print(form_string.decode('utf-8', 'ignore')) | |
| 2989 | 2998 | if not vba_code_only: |
| 2990 | 2999 | # analyse the code from all modules at once: |
| 2991 | 3000 | self.print_analysis(show_decoded_strings, deobfuscate) |
| 2992 | 3001 | if show_deobfuscated_code: |
| 2993 | - print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n' | |
| 2994 | - print self.reveal() | |
| 3002 | + print('MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n') | |
| 3003 | + print(self.reveal()) | |
| 2995 | 3004 | else: |
| 2996 | - print 'No VBA macros found.' | |
| 3005 | + print('No VBA macros found.') | |
| 2997 | 3006 | except OlevbaBaseException: |
| 2998 | 3007 | raise |
| 2999 | 3008 | except Exception as exc: |
| ... | ... | @@ -3001,7 +3010,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3001 | 3010 | log.info('Error processing file %s (%s)' % (self.filename, exc)) |
| 3002 | 3011 | log.debug('Traceback:', exc_info=True) |
| 3003 | 3012 | raise ProcessingError(self.filename, exc) |
| 3004 | - print '' | |
| 3013 | + print('') | |
| 3005 | 3014 | |
| 3006 | 3015 | |
| 3007 | 3016 | def process_file_json(self, show_decoded_strings=False, |
| ... | ... | @@ -3048,7 +3057,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3048 | 3057 | curr_macro = {} |
| 3049 | 3058 | if hide_attributes: |
| 3050 | 3059 | # hide attribute lines: |
| 3051 | - vba_code_filtered = filter_vba(vba_code) | |
| 3060 | + vba_code_filtered = filter_vba(vba_code.decode('utf-8','replace')) | |
| 3052 | 3061 | else: |
| 3053 | 3062 | vba_code_filtered = vba_code |
| 3054 | 3063 | |
| ... | ... | @@ -3087,7 +3096,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3087 | 3096 | if self.detect_vba_macros(): |
| 3088 | 3097 | # print a waiting message only if the output is not redirected to a file: |
| 3089 | 3098 | if sys.stdout.isatty(): |
| 3090 | - print 'Analysis...\r', | |
| 3099 | + print('Analysis...\r') | |
| 3091 | 3100 | sys.stdout.flush() |
| 3092 | 3101 | self.analyze_macros(show_decoded_strings=show_decoded_strings, |
| 3093 | 3102 | deobfuscate=deobfuscate) |
| ... | ... | @@ -3105,7 +3114,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3105 | 3114 | base64obf, dridex, vba_obf) |
| 3106 | 3115 | |
| 3107 | 3116 | line = '%-12s %s' % (flags, self.filename) |
| 3108 | - print line | |
| 3117 | + print(line) | |
| 3109 | 3118 | |
| 3110 | 3119 | # old table display: |
| 3111 | 3120 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' |
| ... | ... | @@ -3198,7 +3207,7 @@ def main(): |
| 3198 | 3207 | |
| 3199 | 3208 | # Print help if no arguments are passed |
| 3200 | 3209 | if len(args) == 0: |
| 3201 | - print __doc__ | |
| 3210 | + print(__doc__) | |
| 3202 | 3211 | parser.print_help() |
| 3203 | 3212 | sys.exit(RETURN_WRONG_ARGS) |
| 3204 | 3213 | |
| ... | ... | @@ -3209,7 +3218,7 @@ def main(): |
| 3209 | 3218 | url='http://decalage.info/python/oletools', |
| 3210 | 3219 | type='MetaInformation') |
| 3211 | 3220 | else: |
| 3212 | - print 'olevba %s - http://decalage.info/python/oletools' % __version__ | |
| 3221 | + print('olevba %s - http://decalage.info/python/oletools' % __version__) | |
| 3213 | 3222 | |
| 3214 | 3223 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 3215 | 3224 | # enable logging in the modules: |
| ... | ... | @@ -3229,8 +3238,8 @@ def main(): |
| 3229 | 3238 | # Column headers (do not know how many files there will be yet, so if no output_mode |
| 3230 | 3239 | # was specified, we will print triage for first file --> need these headers) |
| 3231 | 3240 | if options.output_mode in ('triage', 'unspecified'): |
| 3232 | - print '%-12s %-65s' % ('Flags', 'Filename') | |
| 3233 | - print '%-12s %-65s' % ('-' * 11, '-' * 65) | |
| 3241 | + print('%-12s %-65s' % ('Flags', 'Filename')) | |
| 3242 | + print('%-12s %-65s' % ('-' * 11, '-' * 65)) | |
| 3234 | 3243 | |
| 3235 | 3244 | previous_container = None |
| 3236 | 3245 | count = 0 |
| ... | ... | @@ -3248,14 +3257,14 @@ def main(): |
| 3248 | 3257 | if isinstance(data, Exception): |
| 3249 | 3258 | if isinstance(data, PathNotFoundException): |
| 3250 | 3259 | if options.output_mode in ('triage', 'unspecified'): |
| 3251 | - print '%-12s %s - File not found' % ('?', filename) | |
| 3260 | + print('%-12s %s - File not found' % ('?', filename)) | |
| 3252 | 3261 | elif options.output_mode != 'json': |
| 3253 | 3262 | log.error('Given path %r does not exist!' % filename) |
| 3254 | 3263 | return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ |
| 3255 | 3264 | else RETURN_SEVERAL_ERRS |
| 3256 | 3265 | else: |
| 3257 | 3266 | if options.output_mode in ('triage', 'unspecified'): |
| 3258 | - print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container) | |
| 3267 | + print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container)) | |
| 3259 | 3268 | elif options.output_mode != 'json': |
| 3260 | 3269 | log.error('Exception opening/reading %r from zip file %r: %s' |
| 3261 | 3270 | % (filename, container, data)) |
| ... | ... | @@ -3282,7 +3291,7 @@ def main(): |
| 3282 | 3291 | # print container name when it changes: |
| 3283 | 3292 | if container != previous_container: |
| 3284 | 3293 | if container is not None: |
| 3285 | - print '\nFiles in %s:' % container | |
| 3294 | + print('\nFiles in %s:' % container) | |
| 3286 | 3295 | previous_container = container |
| 3287 | 3296 | # summarized output for triage: |
| 3288 | 3297 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, |
| ... | ... | @@ -3300,8 +3309,8 @@ def main(): |
| 3300 | 3309 | |
| 3301 | 3310 | except (SubstreamOpenError, UnexpectedDataError) as exc: |
| 3302 | 3311 | if options.output_mode in ('triage', 'unspecified'): |
| 3303 | - print '%-12s %s - Error opening substream or uenxpected ' \ | |
| 3304 | - 'content' % ('?', filename) | |
| 3312 | + print('%-12s %s - Error opening substream or uenxpected ' \ | |
| 3313 | + 'content' % ('?', filename)) | |
| 3305 | 3314 | elif options.output_mode == 'json': |
| 3306 | 3315 | print_json(file=filename, type='error', |
| 3307 | 3316 | error=type(exc).__name__, message=str(exc)) |
| ... | ... | @@ -3312,7 +3321,7 @@ def main(): |
| 3312 | 3321 | else RETURN_SEVERAL_ERRS |
| 3313 | 3322 | except FileOpenError as exc: |
| 3314 | 3323 | if options.output_mode in ('triage', 'unspecified'): |
| 3315 | - print '%-12s %s - File format not supported' % ('?', filename) | |
| 3324 | + print('%-12s %s - File format not supported' % ('?', filename)) | |
| 3316 | 3325 | elif options.output_mode == 'json': |
| 3317 | 3326 | print_json(file=filename, type='error', |
| 3318 | 3327 | error=type(exc).__name__, message=str(exc)) |
| ... | ... | @@ -3322,7 +3331,7 @@ def main(): |
| 3322 | 3331 | else RETURN_SEVERAL_ERRS |
| 3323 | 3332 | except ProcessingError as exc: |
| 3324 | 3333 | if options.output_mode in ('triage', 'unspecified'): |
| 3325 | - print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc) | |
| 3334 | + print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc)) | |
| 3326 | 3335 | elif options.output_mode == 'json': |
| 3327 | 3336 | print_json(file=filename, type='error', |
| 3328 | 3337 | error=type(exc).__name__, |
| ... | ... | @@ -3337,9 +3346,9 @@ def main(): |
| 3337 | 3346 | vba_parser.close() |
| 3338 | 3347 | |
| 3339 | 3348 | if options.output_mode == 'triage': |
| 3340 | - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 3349 | + print('\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 3341 | 3350 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ |
| 3342 | - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' | |
| 3351 | + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n') | |
| 3343 | 3352 | |
| 3344 | 3353 | if count == 1 and options.output_mode == 'unspecified': |
| 3345 | 3354 | # if options -t, -d and -j were not specified and it's a single file, print details: | ... | ... |
oletools/ppt_parser.py
oletools/thirdparty/olefile/olefile.py
| ... | ... | @@ -1030,10 +1030,11 @@ class OleDirectoryEntry: |
| 1030 | 1030 | #[PL] this method was added to use simple recursion instead of a complex |
| 1031 | 1031 | # algorithm. |
| 1032 | 1032 | # if this is not a storage or a leaf of the tree, nothing to do: |
| 1033 | + | |
| 1033 | 1034 | if child_sid == NOSTREAM: |
| 1034 | 1035 | return |
| 1035 | 1036 | # check if child SID is in the proper range: |
| 1036 | - if child_sid<0 or child_sid>=len(self.olefile.direntries): | |
| 1037 | + if child_sid <= 0 or child_sid >= len(self.olefile.direntries): | |
| 1037 | 1038 | self.olefile._raise_defect(DEFECT_INCORRECT, 'OLE DirEntry index out of range') |
| 1038 | 1039 | else: |
| 1039 | 1040 | # get child direntry: | ... | ... |
oletools/thirdparty/olefile/olefile2.py
| ... | ... | @@ -1004,7 +1004,7 @@ class OleFileIO: |
| 1004 | 1004 | TIFF files). |
| 1005 | 1005 | """ |
| 1006 | 1006 | |
| 1007 | - def __init__(self, filename = None, raise_defects=DEFECT_FATAL): | |
| 1007 | + def __init__(self, filename = None, raise_defects=DEFECT_FATAL): | |
| 1008 | 1008 | """ |
| 1009 | 1009 | Constructor for OleFileIO class. |
| 1010 | 1010 | ... | ... |
oletools/thirdparty/tablestream/tablestream.py
| ... | ... | @@ -236,7 +236,7 @@ class TableStream(object): |
| 236 | 236 | assert len(row) == self.num_columns |
| 237 | 237 | columns = [] |
| 238 | 238 | max_lines = 0 |
| 239 | - for i in xrange(self.num_columns): | |
| 239 | + for i in range(self.num_columns): | |
| 240 | 240 | cell = row[i] |
| 241 | 241 | # Convert to string: |
| 242 | 242 | # TODO: handle unicode properly |
| ... | ... | @@ -245,7 +245,7 @@ class TableStream(object): |
| 245 | 245 | # encode to UTF8, avoiding errors |
| 246 | 246 | cell = cell.decode('utf-8', errors='replace') |
| 247 | 247 | else: |
| 248 | - cell = unicode(cell) | |
| 248 | + cell = cell | |
| 249 | 249 | # Wrap cell text according to the column width |
| 250 | 250 | # TODO: use a TextWrapper object for each column instead |
| 251 | 251 | # split the string if it contains newline characters, otherwise |
| ... | ... | @@ -257,16 +257,16 @@ class TableStream(object): |
| 257 | 257 | if colors is not None and self.outfile.isatty(): |
| 258 | 258 | color = colors[i] |
| 259 | 259 | if color: |
| 260 | - for j in xrange(len(column)): | |
| 260 | + for j in range(len(column)): | |
| 261 | 261 | # print '%r: %s' % (column[j], type(column[j])) |
| 262 | 262 | column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color)) |
| 263 | 263 | columns.append(column) |
| 264 | 264 | # determine which column has the highest number of lines |
| 265 | 265 | max_lines = max(len(columns[i]), max_lines) |
| 266 | 266 | # transpose: write output line by line |
| 267 | - for j in xrange(max_lines): | |
| 267 | + for j in range(max_lines): | |
| 268 | 268 | self.write(self.style.vertical_left) |
| 269 | - for i in xrange(self.num_columns): | |
| 269 | + for i in range(self.num_columns): | |
| 270 | 270 | column = columns[i] |
| 271 | 271 | if j<len(column): |
| 272 | 272 | # text to be written | ... | ... |