Commit 1541d5dedc31c06d3adbf382b51c5e4794fb711e
Committed by
GitHub
Merge pull request #64 from sebdraven/master
Conversion of all oletools to Python 3.5 (temporarily breaking compatibility with Python 2.7)
Showing
8 changed files
with
119 additions
and
109 deletions
oletools/mraptor.py
| @@ -233,16 +233,16 @@ def main(): | @@ -233,16 +233,16 @@ def main(): | ||
| 233 | 233 | ||
| 234 | # Print help if no arguments are passed | 234 | # Print help if no arguments are passed |
| 235 | if len(args) == 0: | 235 | if len(args) == 0: |
| 236 | - print __doc__ | 236 | + print(__doc__) |
| 237 | parser.print_help() | 237 | parser.print_help() |
| 238 | - print '\nAn exit code is returned based on the analysis result:' | 238 | + print('\nAn exit code is returned based on the analysis result:') |
| 239 | for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious): | 239 | for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious): |
| 240 | - print ' - %d: %s' % (result.exit_code, result.name) | 240 | + print(' - %d: %s' % (result.exit_code, result.name)) |
| 241 | sys.exit() | 241 | sys.exit() |
| 242 | 242 | ||
| 243 | # print banner with version | 243 | # print banner with version |
| 244 | - print 'MacroRaptor %s - http://decalage.info/python/oletools' % __version__ | ||
| 245 | - print 'This is work in progress, please report issues at %s' % URL_ISSUES | 244 | + print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__) |
| 245 | + print('This is work in progress, please report issues at %s' % URL_ISSUES) | ||
| 246 | 246 | ||
| 247 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') | 247 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 248 | # enable logging in the modules: | 248 | # enable logging in the modules: |
| @@ -292,7 +292,7 @@ def main(): | @@ -292,7 +292,7 @@ def main(): | ||
| 292 | vba_code_all_modules = '' | 292 | vba_code_all_modules = '' |
| 293 | try: | 293 | try: |
| 294 | for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros(): | 294 | for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros(): |
| 295 | - vba_code_all_modules += vba_code + '\n' | 295 | + vba_code_all_modules += vba_code.decode('utf-8','replace') + '\n' |
| 296 | except Exception as e: | 296 | except Exception as e: |
| 297 | # log.error('Error when parsing VBA macros from file %r' % full_name) | 297 | # log.error('Error when parsing VBA macros from file %r' % full_name) |
| 298 | result = Result_Error | 298 | result = Result_Error |
| @@ -319,9 +319,9 @@ def main(): | @@ -319,9 +319,9 @@ def main(): | ||
| 319 | global_result = result | 319 | global_result = result |
| 320 | exitcode = result.exit_code | 320 | exitcode = result.exit_code |
| 321 | 321 | ||
| 322 | - print '' | ||
| 323 | - print 'Flags: A=AutoExec, W=Write, X=Execute' | ||
| 324 | - print 'Exit code: %d - %s' % (exitcode, global_result.name) | 322 | + print('') |
| 323 | + print('Flags: A=AutoExec, W=Write, X=Execute') | ||
| 324 | + print('Exit code: %d - %s' % (exitcode, global_result.name)) | ||
| 325 | sys.exit(exitcode) | 325 | sys.exit(exitcode) |
| 326 | 326 | ||
| 327 | if __name__ == '__main__': | 327 | if __name__ == '__main__': |
oletools/olemap.py
| @@ -90,14 +90,14 @@ FAT_COLORS = { | @@ -90,14 +90,14 @@ FAT_COLORS = { | ||
| 90 | 90 | ||
| 91 | if __name__ == '__main__': | 91 | if __name__ == '__main__': |
| 92 | # print banner with version | 92 | # print banner with version |
| 93 | - print 'olemap %s - http://decalage.info/python/oletools' % __version__ | 93 | + print('olemap %s - http://decalage.info/python/oletools' % __version__) |
| 94 | 94 | ||
| 95 | fname = sys.argv[1] | 95 | fname = sys.argv[1] |
| 96 | ole = olefile.OleFileIO(fname) | 96 | ole = olefile.OleFileIO(fname) |
| 97 | 97 | ||
| 98 | - print 'FAT:' | 98 | + print('FAT:') |
| 99 | t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #']) | 99 | t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #']) |
| 100 | - for i in xrange(ole.nb_sect): | 100 | + for i in range(ole.nb_sect): |
| 101 | fat_value = ole.fat[i] | 101 | fat_value = ole.fat[i] |
| 102 | fat_type = FAT_TYPES.get(fat_value, '<Data>') | 102 | fat_type = FAT_TYPES.get(fat_value, '<Data>') |
| 103 | color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default']) | 103 | color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default']) |
| @@ -106,15 +106,15 @@ if __name__ == '__main__': | @@ -106,15 +106,15 @@ if __name__ == '__main__': | ||
| 106 | # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) | 106 | # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) |
| 107 | t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value], | 107 | t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value], |
| 108 | colors=[None, color_type, None, None]) | 108 | colors=[None, color_type, None, None]) |
| 109 | - print '' | 109 | + print('') |
| 110 | 110 | ||
| 111 | - print 'MiniFAT:' | 111 | + print('MiniFAT:') |
| 112 | # load MiniFAT if it wasn't already done: | 112 | # load MiniFAT if it wasn't already done: |
| 113 | ole.loadminifat() | 113 | ole.loadminifat() |
| 114 | - for i in xrange(len(ole.minifat)): | 114 | + for i in range(len(ole.minifat)): |
| 115 | fat_value = ole.minifat[i] | 115 | fat_value = ole.minifat[i] |
| 116 | fat_type = FAT_TYPES.get(fat_value, 'Data') | 116 | fat_type = FAT_TYPES.get(fat_value, 'Data') |
| 117 | - print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) | 117 | + print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)) |
| 118 | 118 | ||
| 119 | ole.close() | 119 | ole.close() |
| 120 | 120 |
oletools/oletimes.py
| @@ -94,6 +94,6 @@ for obj in ole.listdir(streams=True, storages=True): | @@ -94,6 +94,6 @@ for obj in ole.listdir(streams=True, storages=True): | ||
| 94 | #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj)) | 94 | #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj)) |
| 95 | t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj)))) | 95 | t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj)))) |
| 96 | 96 | ||
| 97 | -print t | 97 | +print(t) |
| 98 | 98 | ||
| 99 | ole.close() | 99 | ole.close() |
oletools/olevba.py
| @@ -215,7 +215,7 @@ __version__ = '0.50' | @@ -215,7 +215,7 @@ __version__ = '0.50' | ||
| 215 | 215 | ||
| 216 | import sys, logging | 216 | import sys, logging |
| 217 | import struct | 217 | import struct |
| 218 | -import cStringIO | 218 | +from _io import StringIO,BytesIO |
| 219 | import math | 219 | import math |
| 220 | import zipfile | 220 | import zipfile |
| 221 | import re | 221 | import re |
| @@ -240,9 +240,9 @@ except ImportError: | @@ -240,9 +240,9 @@ except ImportError: | ||
| 240 | # Python <2.5: standalone ElementTree install | 240 | # Python <2.5: standalone ElementTree install |
| 241 | import elementtree.cElementTree as ET | 241 | import elementtree.cElementTree as ET |
| 242 | except ImportError: | 242 | except ImportError: |
| 243 | - raise ImportError, "lxml or ElementTree are not installed, " \ | 243 | + raise(ImportError, "lxml or ElementTree are not installed, " \ |
| 244 | + "see http://codespeak.net/lxml " \ | 244 | + "see http://codespeak.net/lxml " \ |
| 245 | - + "or http://effbot.org/zone/element-index.htm" | 245 | + + "or http://effbot.org/zone/element-index.htm") |
| 246 | 246 | ||
| 247 | import thirdparty.olefile as olefile | 247 | import thirdparty.olefile as olefile |
| 248 | from thirdparty.prettytable import prettytable | 248 | from thirdparty.prettytable import prettytable |
| @@ -421,7 +421,7 @@ TYPE2TAG = { | @@ -421,7 +421,7 @@ TYPE2TAG = { | ||
| 421 | 421 | ||
| 422 | 422 | ||
| 423 | # MSO files ActiveMime header magic | 423 | # MSO files ActiveMime header magic |
| 424 | -MSO_ACTIVEMIME_HEADER = 'ActiveMime' | 424 | +MSO_ACTIVEMIME_HEADER = b'ActiveMime' |
| 425 | 425 | ||
| 426 | MODULE_EXTENSION = "bas" | 426 | MODULE_EXTENSION = "bas" |
| 427 | CLASS_EXTENSION = "cls" | 427 | CLASS_EXTENSION = "cls" |
| @@ -630,7 +630,7 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') | @@ -630,7 +630,7 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') | ||
| 630 | re_nothex_check = re.compile(r'[G-Zg-z]') | 630 | re_nothex_check = re.compile(r'[G-Zg-z]') |
| 631 | 631 | ||
| 632 | # regex to extract printable strings (at least 5 chars) from VBA Forms: | 632 | # regex to extract printable strings (at least 5 chars) from VBA Forms: |
| 633 | -re_printable_string = re.compile(r'[\t\r\n\x20-\xFF]{5,}') | 633 | +re_printable_string = re.compile(rb'[\t\r\n\x20-\xFF]{5,}') |
| 634 | 634 | ||
| 635 | 635 | ||
| 636 | # === PARTIAL VBA GRAMMAR ==================================================== | 636 | # === PARTIAL VBA GRAMMAR ==================================================== |
| @@ -1060,10 +1060,10 @@ def decompress_stream(compressed_container): | @@ -1060,10 +1060,10 @@ def decompress_stream(compressed_container): | ||
| 1060 | # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the | 1060 | # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the |
| 1061 | # DecompressedBuffer (section 2.4.1.1.2). | 1061 | # DecompressedBuffer (section 2.4.1.1.2). |
| 1062 | 1062 | ||
| 1063 | - decompressed_container = '' # result | 1063 | + decompressed_container = b'' # result |
| 1064 | compressed_current = 0 | 1064 | compressed_current = 0 |
| 1065 | 1065 | ||
| 1066 | - sig_byte = ord(compressed_container[compressed_current]) | 1066 | + sig_byte = compressed_container[compressed_current] |
| 1067 | if sig_byte != 0x01: | 1067 | if sig_byte != 0x01: |
| 1068 | raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) | 1068 | raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) |
| 1069 | 1069 | ||
| @@ -1109,7 +1109,7 @@ def decompress_stream(compressed_container): | @@ -1109,7 +1109,7 @@ def decompress_stream(compressed_container): | ||
| 1109 | # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk | 1109 | # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk |
| 1110 | # uncompressed chunk: read the next 4096 bytes as-is | 1110 | # uncompressed chunk: read the next 4096 bytes as-is |
| 1111 | #TODO: check if there are at least 4096 bytes left | 1111 | #TODO: check if there are at least 4096 bytes left |
| 1112 | - decompressed_container += compressed_container[compressed_current:compressed_current + 4096] | 1112 | + decompressed_container += bytes([compressed_container[compressed_current:compressed_current + 4096]]) |
| 1113 | compressed_current += 4096 | 1113 | compressed_current += 4096 |
| 1114 | else: | 1114 | else: |
| 1115 | # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk | 1115 | # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk |
| @@ -1120,9 +1120,9 @@ def decompress_stream(compressed_container): | @@ -1120,9 +1120,9 @@ def decompress_stream(compressed_container): | ||
| 1120 | # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) | 1120 | # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) |
| 1121 | # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or | 1121 | # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or |
| 1122 | # copy tokens (reference to a previous literal token) | 1122 | # copy tokens (reference to a previous literal token) |
| 1123 | - flag_byte = ord(compressed_container[compressed_current]) | 1123 | + flag_byte = compressed_container[compressed_current] |
| 1124 | compressed_current += 1 | 1124 | compressed_current += 1 |
| 1125 | - for bit_index in xrange(0, 8): | 1125 | + for bit_index in range(0, 8): |
| 1126 | # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) | 1126 | # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) |
| 1127 | if compressed_current >= compressed_end: | 1127 | if compressed_current >= compressed_end: |
| 1128 | break | 1128 | break |
| @@ -1132,7 +1132,7 @@ def decompress_stream(compressed_container): | @@ -1132,7 +1132,7 @@ def decompress_stream(compressed_container): | ||
| 1132 | #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) | 1132 | #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) |
| 1133 | if flag_bit == 0: # LiteralToken | 1133 | if flag_bit == 0: # LiteralToken |
| 1134 | # copy one byte directly to output | 1134 | # copy one byte directly to output |
| 1135 | - decompressed_container += compressed_container[compressed_current] | 1135 | + decompressed_container += bytes([compressed_container[compressed_current]]) |
| 1136 | compressed_current += 1 | 1136 | compressed_current += 1 |
| 1137 | else: # CopyToken | 1137 | else: # CopyToken |
| 1138 | # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken | 1138 | # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken |
| @@ -1147,8 +1147,8 @@ def decompress_stream(compressed_container): | @@ -1147,8 +1147,8 @@ def decompress_stream(compressed_container): | ||
| 1147 | offset = (temp1 >> temp2) + 1 | 1147 | offset = (temp1 >> temp2) + 1 |
| 1148 | #log.debug('offset=%d length=%d' % (offset, length)) | 1148 | #log.debug('offset=%d length=%d' % (offset, length)) |
| 1149 | copy_source = len(decompressed_container) - offset | 1149 | copy_source = len(decompressed_container) - offset |
| 1150 | - for index in xrange(copy_source, copy_source + length): | ||
| 1151 | - decompressed_container += decompressed_container[index] | 1150 | + for index in range(copy_source, copy_source + length): |
| 1151 | + decompressed_container += bytes([decompressed_container[index]]) | ||
| 1152 | compressed_current += 2 | 1152 | compressed_current += 2 |
| 1153 | return decompressed_container | 1153 | return decompressed_container |
| 1154 | 1154 | ||
| @@ -1191,7 +1191,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1191,7 +1191,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1191 | code_modules = {} | 1191 | code_modules = {} |
| 1192 | 1192 | ||
| 1193 | for line in project: | 1193 | for line in project: |
| 1194 | - line = line.strip() | 1194 | + line = line.strip().decode('utf-8','ignore') |
| 1195 | if '=' in line: | 1195 | if '=' in line: |
| 1196 | # split line at the 1st equal sign: | 1196 | # split line at the 1st equal sign: |
| 1197 | name, value = line.split('=', 1) | 1197 | name, value = line.split('=', 1) |
| @@ -1222,7 +1222,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1222,7 +1222,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1222 | else: | 1222 | else: |
| 1223 | raise UnexpectedDataError(dir_path, name, expected, value) | 1223 | raise UnexpectedDataError(dir_path, name, expected, value) |
| 1224 | 1224 | ||
| 1225 | - dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed)) | 1225 | + dir_stream = BytesIO(decompress_stream(dir_compressed)) |
| 1226 | 1226 | ||
| 1227 | # PROJECTSYSKIND Record | 1227 | # PROJECTSYSKIND Record |
| 1228 | projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] | 1228 | projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] |
| @@ -1484,7 +1484,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1484,7 +1484,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1484 | uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') | 1484 | uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') |
| 1485 | 1485 | ||
| 1486 | log.debug("parsing {0} modules".format(projectmodules_count)) | 1486 | log.debug("parsing {0} modules".format(projectmodules_count)) |
| 1487 | - for projectmodule_index in xrange(0, projectmodules_count): | 1487 | + for projectmodule_index in range(0, projectmodules_count): |
| 1488 | try: | 1488 | try: |
| 1489 | modulename_id = struct.unpack("<H", dir_stream.read(2))[0] | 1489 | modulename_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1490 | check_value('MODULENAME_Id', 0x0019, modulename_id) | 1490 | check_value('MODULENAME_Id', 0x0019, modulename_id) |
| @@ -1881,19 +1881,19 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): | @@ -1881,19 +1881,19 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): | ||
| 1881 | pass | 1881 | pass |
| 1882 | elif isinstance(json_obj, str): | 1882 | elif isinstance(json_obj, str): |
| 1883 | # de-code and re-encode | 1883 | # de-code and re-encode |
| 1884 | - dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) | 1884 | + dencoded = json_obj |
| 1885 | if dencoded != json_obj: | 1885 | if dencoded != json_obj: |
| 1886 | log.debug('json2ascii: replaced: {0} (len {1})' | 1886 | log.debug('json2ascii: replaced: {0} (len {1})' |
| 1887 | .format(json_obj, len(json_obj))) | 1887 | .format(json_obj, len(json_obj))) |
| 1888 | log.debug('json2ascii: with: {0} (len {1})' | 1888 | log.debug('json2ascii: with: {0} (len {1})' |
| 1889 | .format(dencoded, len(dencoded))) | 1889 | .format(dencoded, len(dencoded))) |
| 1890 | return dencoded | 1890 | return dencoded |
| 1891 | - elif isinstance(json_obj, unicode): | 1891 | + elif isinstance(json_obj, bytes): |
| 1892 | log.debug('json2ascii: encode unicode: {0}' | 1892 | log.debug('json2ascii: encode unicode: {0}' |
| 1893 | - .format(json_obj.encode(encoding, errors))) | 1893 | + .format(json_obj.decode(encoding, errors))) |
| 1894 | # cannot put original into logger | 1894 | # cannot put original into logger |
| 1895 | # print 'original: ' json_obj | 1895 | # print 'original: ' json_obj |
| 1896 | - return json_obj.encode(encoding, errors) | 1896 | + return json_obj.decode(encoding, errors) |
| 1897 | elif isinstance(json_obj, dict): | 1897 | elif isinstance(json_obj, dict): |
| 1898 | for key in json_obj: | 1898 | for key in json_obj: |
| 1899 | json_obj[key] = json2ascii(json_obj[key]) | 1899 | json_obj[key] = json2ascii(json_obj[key]) |
| @@ -1931,18 +1931,18 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts): | @@ -1931,18 +1931,18 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts): | ||
| 1931 | json_dict = json_parts | 1931 | json_dict = json_parts |
| 1932 | 1932 | ||
| 1933 | if not _have_printed_json_start: | 1933 | if not _have_printed_json_start: |
| 1934 | - print '[' | 1934 | + print('[') |
| 1935 | _have_printed_json_start = True | 1935 | _have_printed_json_start = True |
| 1936 | 1936 | ||
| 1937 | lines = json.dumps(json2ascii(json_dict), check_circular=False, | 1937 | lines = json.dumps(json2ascii(json_dict), check_circular=False, |
| 1938 | indent=4, ensure_ascii=False).splitlines() | 1938 | indent=4, ensure_ascii=False).splitlines() |
| 1939 | for line in lines[:-1]: | 1939 | for line in lines[:-1]: |
| 1940 | - print ' {0}'.format(line) | 1940 | + print(' {0}'.format(line)) |
| 1941 | if _json_is_last: | 1941 | if _json_is_last: |
| 1942 | - print ' {0}'.format(lines[-1]) # print last line without comma | ||
| 1943 | - print ']' | 1942 | + print(' {0}'.format(lines[-1])) # print last line without comma |
| 1943 | + print(']') | ||
| 1944 | else: | 1944 | else: |
| 1945 | - print ' {0},'.format(lines[-1]) # print last line with comma | 1945 | + print(' {0},'.format(lines[-1])) # print last line with comma |
| 1946 | 1946 | ||
| 1947 | 1947 | ||
| 1948 | class VBA_Scanner(object): | 1948 | class VBA_Scanner(object): |
| @@ -1959,10 +1959,10 @@ class VBA_Scanner(object): | @@ -1959,10 +1959,10 @@ class VBA_Scanner(object): | ||
| 1959 | """ | 1959 | """ |
| 1960 | # join long lines ending with " _": | 1960 | # join long lines ending with " _": |
| 1961 | self.code = vba_collapse_long_lines(vba_code) | 1961 | self.code = vba_collapse_long_lines(vba_code) |
| 1962 | - self.code_hex = '' | ||
| 1963 | - self.code_hex_rev = '' | ||
| 1964 | - self.code_rev_hex = '' | ||
| 1965 | - self.code_base64 = '' | 1962 | + self.code_hex = b'' |
| 1963 | + self.code_hex_rev = b'' | ||
| 1964 | + self.code_rev_hex = b'' | ||
| 1965 | + self.code_base64 = b'' | ||
| 1966 | self.code_dridex = '' | 1966 | self.code_dridex = '' |
| 1967 | self.code_vba = '' | 1967 | self.code_vba = '' |
| 1968 | self.strReverse = None | 1968 | self.strReverse = None |
| @@ -1995,19 +1995,19 @@ class VBA_Scanner(object): | @@ -1995,19 +1995,19 @@ class VBA_Scanner(object): | ||
| 1995 | if 'strreverse' in self.code.lower(): self.strReverse = True | 1995 | if 'strreverse' in self.code.lower(): self.strReverse = True |
| 1996 | # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: | 1996 | # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: |
| 1997 | for encoded, decoded in self.hex_strings: | 1997 | for encoded, decoded in self.hex_strings: |
| 1998 | - self.code_hex += '\n' + decoded | 1998 | + self.code_hex += b'\n' + decoded |
| 1999 | # if the code contains "StrReverse", also append the hex strings in reverse order: | 1999 | # if the code contains "StrReverse", also append the hex strings in reverse order: |
| 2000 | if self.strReverse: | 2000 | if self.strReverse: |
| 2001 | # StrReverse after hex decoding: | 2001 | # StrReverse after hex decoding: |
| 2002 | - self.code_hex_rev += '\n' + decoded[::-1] | 2002 | + self.code_hex_rev += b'\n' + decoded[::-1] |
| 2003 | # StrReverse before hex decoding: | 2003 | # StrReverse before hex decoding: |
| 2004 | - self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1]) | 2004 | + self.code_rev_hex += b'\n' + binascii.unhexlify(encoded[::-1]) |
| 2005 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ | 2005 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ |
| 2006 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) | 2006 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) |
| 2007 | # Detect Base64-encoded strings | 2007 | # Detect Base64-encoded strings |
| 2008 | self.base64_strings = detect_base64_strings(self.code) | 2008 | self.base64_strings = detect_base64_strings(self.code) |
| 2009 | for encoded, decoded in self.base64_strings: | 2009 | for encoded, decoded in self.base64_strings: |
| 2010 | - self.code_base64 += '\n' + decoded | 2010 | + self.code_base64 += b'\n' + decoded |
| 2011 | # Detect Dridex-encoded strings | 2011 | # Detect Dridex-encoded strings |
| 2012 | self.dridex_strings = detect_dridex_strings(self.code) | 2012 | self.dridex_strings = detect_dridex_strings(self.code) |
| 2013 | for encoded, decoded in self.dridex_strings: | 2013 | for encoded, decoded in self.dridex_strings: |
| @@ -2026,13 +2026,15 @@ class VBA_Scanner(object): | @@ -2026,13 +2026,15 @@ class VBA_Scanner(object): | ||
| 2026 | 2026 | ||
| 2027 | for code, obfuscation in ( | 2027 | for code, obfuscation in ( |
| 2028 | (self.code, None), | 2028 | (self.code, None), |
| 2029 | - (self.code_hex, 'Hex'), | 2029 | + (self.code_hex.decode('utf-8','replace'), 'Hex'), |
| 2030 | (self.code_hex_rev, 'Hex+StrReverse'), | 2030 | (self.code_hex_rev, 'Hex+StrReverse'), |
| 2031 | (self.code_rev_hex, 'StrReverse+Hex'), | 2031 | (self.code_rev_hex, 'StrReverse+Hex'), |
| 2032 | - (self.code_base64, 'Base64'), | 2032 | + (self.code_base64.decode('utf-8', 'replace'), 'Base64'), |
| 2033 | (self.code_dridex, 'Dridex'), | 2033 | (self.code_dridex, 'Dridex'), |
| 2034 | (self.code_vba, 'VBA expression'), | 2034 | (self.code_vba, 'VBA expression'), |
| 2035 | ): | 2035 | ): |
| 2036 | + if isinstance(code,bytes): | ||
| 2037 | + code=code.decode('utf-8','replace') | ||
| 2036 | self.autoexec_keywords += detect_autoexec(code, obfuscation) | 2038 | self.autoexec_keywords += detect_autoexec(code, obfuscation) |
| 2037 | self.suspicious_keywords += detect_suspicious(code, obfuscation) | 2039 | self.suspicious_keywords += detect_suspicious(code, obfuscation) |
| 2038 | self.iocs += detect_patterns(code, obfuscation) | 2040 | self.iocs += detect_patterns(code, obfuscation) |
| @@ -2158,7 +2160,7 @@ class VBA_Parser(object): | @@ -2158,7 +2160,7 @@ class VBA_Parser(object): | ||
| 2158 | _file = filename | 2160 | _file = filename |
| 2159 | else: | 2161 | else: |
| 2160 | # file already read in memory, make it a file-like object for zipfile: | 2162 | # file already read in memory, make it a file-like object for zipfile: |
| 2161 | - _file = cStringIO.StringIO(data) | 2163 | + _file = BytesIO(data) |
| 2162 | #self.file = _file | 2164 | #self.file = _file |
| 2163 | self.ole_file = None | 2165 | self.ole_file = None |
| 2164 | self.ole_subfiles = [] | 2166 | self.ole_subfiles = [] |
| @@ -2207,7 +2209,7 @@ class VBA_Parser(object): | @@ -2207,7 +2209,7 @@ class VBA_Parser(object): | ||
| 2207 | if data is None: | 2209 | if data is None: |
| 2208 | data = open(filename, 'rb').read() | 2210 | data = open(filename, 'rb').read() |
| 2209 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace | 2211 | # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace |
| 2210 | - if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: | 2212 | + if b'http://schemas.microsoft.com/office/word/2003/wordml' in data: |
| 2211 | self.open_word2003xml(data) | 2213 | self.open_word2003xml(data) |
| 2212 | # store a lowercase version for the next tests: | 2214 | # store a lowercase version for the next tests: |
| 2213 | data_lowercase = data.lower() | 2215 | data_lowercase = data.lower() |
| @@ -2217,14 +2219,14 @@ class VBA_Parser(object): | @@ -2217,14 +2219,14 @@ class VBA_Parser(object): | ||
| 2217 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. | 2219 | # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. |
| 2218 | # And the line is case insensitive. | 2220 | # And the line is case insensitive. |
| 2219 | # so we'll just check the presence of mime, version and multipart anywhere: | 2221 | # so we'll just check the presence of mime, version and multipart anywhere: |
| 2220 | - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \ | ||
| 2221 | - and 'multipart' in data_lowercase: | 2222 | + if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \ |
| 2223 | + and b'multipart' in data_lowercase: | ||
| 2222 | self.open_mht(data) | 2224 | self.open_mht(data) |
| 2223 | #TODO: handle exceptions | 2225 | #TODO: handle exceptions |
| 2224 | #TODO: Excel 2003 XML | 2226 | #TODO: Excel 2003 XML |
| 2225 | # Check if this is a plain text VBA or VBScript file: | 2227 | # Check if this is a plain text VBA or VBScript file: |
| 2226 | # To avoid scanning binary files, we simply check for some control chars: | 2228 | # To avoid scanning binary files, we simply check for some control chars: |
| 2227 | - if self.type is None and '\x00' not in data: | 2229 | + if self.type is None and b'\x00' not in data: |
| 2228 | self.open_text(data) | 2230 | self.open_text(data) |
| 2229 | if self.type is None: | 2231 | if self.type is None: |
| 2230 | # At this stage, could not match a known format: | 2232 | # At this stage, could not match a known format: |
| @@ -2358,6 +2360,8 @@ class VBA_Parser(object): | @@ -2358,6 +2360,8 @@ class VBA_Parser(object): | ||
| 2358 | """ | 2360 | """ |
| 2359 | log.info('Opening MHTML file %s' % self.filename) | 2361 | log.info('Opening MHTML file %s' % self.filename) |
| 2360 | try: | 2362 | try: |
| 2363 | + if isinstance(data,bytes): | ||
| 2364 | + data = data.decode('utf8', 'replace') | ||
| 2361 | # parse the MIME content | 2365 | # parse the MIME content |
| 2362 | # remove any leading whitespace or newline (workaround for issue in email package) | 2366 | # remove any leading whitespace or newline (workaround for issue in email package) |
| 2363 | stripped_data = data.lstrip('\r\n\t ') | 2367 | stripped_data = data.lstrip('\r\n\t ') |
| @@ -2387,7 +2391,8 @@ class VBA_Parser(object): | @@ -2387,7 +2391,8 @@ class VBA_Parser(object): | ||
| 2387 | # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. | 2391 | # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. |
| 2388 | # decompress the zlib data starting at offset 0x32, which is the OLE container: | 2392 | # decompress the zlib data starting at offset 0x32, which is the OLE container: |
| 2389 | # check ActiveMime header: | 2393 | # check ActiveMime header: |
| 2390 | - if isinstance(part_data, str) and is_mso_file(part_data): | 2394 | + |
| 2395 | + if (isinstance(part_data, str) or isinstance(part_data, bytes)) and is_mso_file(part_data): | ||
| 2391 | log.debug('Found ActiveMime header, decompressing MSO container') | 2396 | log.debug('Found ActiveMime header, decompressing MSO container') |
| 2392 | try: | 2397 | try: |
| 2393 | ole_data = mso_file_extract(part_data) | 2398 | ole_data = mso_file_extract(part_data) |
| @@ -2458,6 +2463,8 @@ class VBA_Parser(object): | @@ -2458,6 +2463,8 @@ class VBA_Parser(object): | ||
| 2458 | """ | 2463 | """ |
| 2459 | log.info('Opening text file %s' % self.filename) | 2464 | log.info('Opening text file %s' % self.filename) |
| 2460 | # directly store the source code: | 2465 | # directly store the source code: |
| 2466 | + if isinstance(data,bytes): | ||
| 2467 | + data=data.decode('utf8','replace') | ||
| 2461 | self.vba_code_all_modules = data | 2468 | self.vba_code_all_modules = data |
| 2462 | self.contains_macros = True | 2469 | self.contains_macros = True |
| 2463 | # set type only if parsing succeeds | 2470 | # set type only if parsing succeeds |
| @@ -2596,7 +2603,7 @@ class VBA_Parser(object): | @@ -2596,7 +2603,7 @@ class VBA_Parser(object): | ||
| 2596 | # Also look for VBA code in any stream including orphans | 2603 | # Also look for VBA code in any stream including orphans |
| 2597 | # (happens in some malformed files) | 2604 | # (happens in some malformed files) |
| 2598 | ole = self.ole_file | 2605 | ole = self.ole_file |
| 2599 | - for sid in xrange(len(ole.direntries)): | 2606 | + for sid in range(len(ole.direntries)): |
| 2600 | # check if id is already done above: | 2607 | # check if id is already done above: |
| 2601 | log.debug('Checking DirEntry #%d' % sid) | 2608 | log.debug('Checking DirEntry #%d' % sid) |
| 2602 | d = ole.direntries[sid] | 2609 | d = ole.direntries[sid] |
| @@ -2614,7 +2621,7 @@ class VBA_Parser(object): | @@ -2614,7 +2621,7 @@ class VBA_Parser(object): | ||
| 2614 | log.debug('%r...[much more data]...%r' % (data[:100], data[-50:])) | 2621 | log.debug('%r...[much more data]...%r' % (data[:100], data[-50:])) |
| 2615 | else: | 2622 | else: |
| 2616 | log.debug(repr(data)) | 2623 | log.debug(repr(data)) |
| 2617 | - if 'Attribut' in data: | 2624 | + if 'Attribut' in data.decode('utf-8','ignore'): |
| 2618 | log.debug('Found VBA compressed code') | 2625 | log.debug('Found VBA compressed code') |
| 2619 | self.contains_macros = True | 2626 | self.contains_macros = True |
| 2620 | except IOError as exc: | 2627 | except IOError as exc: |
| @@ -2662,7 +2669,7 @@ class VBA_Parser(object): | @@ -2662,7 +2669,7 @@ class VBA_Parser(object): | ||
| 2662 | # Also look for VBA code in any stream including orphans | 2669 | # Also look for VBA code in any stream including orphans |
| 2663 | # (happens in some malformed files) | 2670 | # (happens in some malformed files) |
| 2664 | ole = self.ole_file | 2671 | ole = self.ole_file |
| 2665 | - for sid in xrange(len(ole.direntries)): | 2672 | + for sid in range(len(ole.direntries)): |
| 2666 | # check if id is already done above: | 2673 | # check if id is already done above: |
| 2667 | log.debug('Checking DirEntry #%d' % sid) | 2674 | log.debug('Checking DirEntry #%d' % sid) |
| 2668 | if sid in vba_stream_ids: | 2675 | if sid in vba_stream_ids: |
| @@ -2677,7 +2684,7 @@ class VBA_Parser(object): | @@ -2677,7 +2684,7 @@ class VBA_Parser(object): | ||
| 2677 | # read data | 2684 | # read data |
| 2678 | log.debug('Reading data from stream %r' % d.name) | 2685 | log.debug('Reading data from stream %r' % d.name) |
| 2679 | data = ole._open(d.isectStart, d.size).read() | 2686 | data = ole._open(d.isectStart, d.size).read() |
| 2680 | - for match in re.finditer(r'\x00Attribut[^e]', data, flags=re.IGNORECASE): | 2687 | + for match in re.finditer(rb'\x00Attribut[^e]', data, flags=re.IGNORECASE): |
| 2681 | start = match.start() - 3 | 2688 | start = match.start() - 3 |
| 2682 | log.debug('Found VBA compressed code at index %X' % start) | 2689 | log.debug('Found VBA compressed code at index %X' % start) |
| 2683 | compressed_code = data[start:] | 2690 | compressed_code = data[start:] |
| @@ -2720,9 +2727,9 @@ class VBA_Parser(object): | @@ -2720,9 +2727,9 @@ class VBA_Parser(object): | ||
| 2720 | self.vba_code_all_modules = '' | 2727 | self.vba_code_all_modules = '' |
| 2721 | for (_, _, _, vba_code) in self.extract_all_macros(): | 2728 | for (_, _, _, vba_code) in self.extract_all_macros(): |
| 2722 | #TODO: filter code? (each module) | 2729 | #TODO: filter code? (each module) |
| 2723 | - self.vba_code_all_modules += vba_code + '\n' | 2730 | + self.vba_code_all_modules += vba_code.decode('utf-8', 'ignore') + '\n' |
| 2724 | for (_, _, form_string) in self.extract_form_strings(): | 2731 | for (_, _, form_string) in self.extract_form_strings(): |
| 2725 | - self.vba_code_all_modules += form_string + '\n' | 2732 | + self.vba_code_all_modules += form_string.decode('utf-8', 'ignore') + '\n' |
| 2726 | # Analyze the whole code at once: | 2733 | # Analyze the whole code at once: |
| 2727 | scanner = VBA_Scanner(self.vba_code_all_modules) | 2734 | scanner = VBA_Scanner(self.vba_code_all_modules) |
| 2728 | self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate) | 2735 | self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate) |
| @@ -2897,7 +2904,7 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -2897,7 +2904,7 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 2897 | """ | 2904 | """ |
| 2898 | # print a waiting message only if the output is not redirected to a file: | 2905 | # print a waiting message only if the output is not redirected to a file: |
| 2899 | if sys.stdout.isatty(): | 2906 | if sys.stdout.isatty(): |
| 2900 | - print 'Analysis...\r', | 2907 | + print('Analysis...\r') |
| 2901 | sys.stdout.flush() | 2908 | sys.stdout.flush() |
| 2902 | results = self.analyze_macros(show_decoded_strings, deobfuscate) | 2909 | results = self.analyze_macros(show_decoded_strings, deobfuscate) |
| 2903 | if results: | 2910 | if results: |
| @@ -2913,9 +2920,9 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -2913,9 +2920,9 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 2913 | if not is_printable(description): | 2920 | if not is_printable(description): |
| 2914 | description = repr(description) | 2921 | description = repr(description) |
| 2915 | t.add_row((kw_type, keyword, description)) | 2922 | t.add_row((kw_type, keyword, description)) |
| 2916 | - print t | 2923 | + print(t) |
| 2917 | else: | 2924 | else: |
| 2918 | - print 'No suspicious keyword or IOC found.' | 2925 | + print('No suspicious keyword or IOC found.') |
| 2919 | 2926 | ||
| 2920 | def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False): | 2927 | def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False): |
| 2921 | """ | 2928 | """ |
| @@ -2929,7 +2936,7 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -2929,7 +2936,7 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 2929 | """ | 2936 | """ |
| 2930 | # print a waiting message only if the output is not redirected to a file: | 2937 | # print a waiting message only if the output is not redirected to a file: |
| 2931 | if sys.stdout.isatty(): | 2938 | if sys.stdout.isatty(): |
| 2932 | - print 'Analysis...\r', | 2939 | + print('Analysis...\r') |
| 2933 | sys.stdout.flush() | 2940 | sys.stdout.flush() |
| 2934 | return [dict(type=kw_type, keyword=keyword, description=description) | 2941 | return [dict(type=kw_type, keyword=keyword, description=description) |
| 2935 | for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)] | 2942 | for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)] |
| @@ -2958,42 +2965,44 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -2958,42 +2965,44 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 2958 | display_filename = '%s in %s' % (self.filename, self.container) | 2965 | display_filename = '%s in %s' % (self.filename, self.container) |
| 2959 | else: | 2966 | else: |
| 2960 | display_filename = self.filename | 2967 | display_filename = self.filename |
| 2961 | - print '=' * 79 | ||
| 2962 | - print 'FILE:', display_filename | 2968 | + print('=' * 79) |
| 2969 | + print('FILE:', display_filename) | ||
| 2963 | try: | 2970 | try: |
| 2964 | #TODO: handle olefile errors, when an OLE file is malformed | 2971 | #TODO: handle olefile errors, when an OLE file is malformed |
| 2965 | - print 'Type:', self.type | 2972 | + print('Type: %s' % self.type) |
| 2966 | if self.detect_vba_macros(): | 2973 | if self.detect_vba_macros(): |
| 2967 | #print 'Contains VBA Macros:' | 2974 | #print 'Contains VBA Macros:' |
| 2968 | for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): | 2975 | for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): |
| 2969 | if hide_attributes: | 2976 | if hide_attributes: |
| 2970 | # hide attribute lines: | 2977 | # hide attribute lines: |
| 2978 | + if isinstance(vba_code,bytes): | ||
| 2979 | + vba_code =vba_code.decode('utf-8','replace') | ||
| 2971 | vba_code_filtered = filter_vba(vba_code) | 2980 | vba_code_filtered = filter_vba(vba_code) |
| 2972 | else: | 2981 | else: |
| 2973 | vba_code_filtered = vba_code | 2982 | vba_code_filtered = vba_code |
| 2974 | - print '-' * 79 | ||
| 2975 | - print 'VBA MACRO %s ' % vba_filename | ||
| 2976 | - print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) | 2983 | + print('-' * 79) |
| 2984 | + print('VBA MACRO %s ' % vba_filename) | ||
| 2985 | + print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))) | ||
| 2977 | if display_code: | 2986 | if display_code: |
| 2978 | - print '- ' * 39 | 2987 | + print('- ' * 39) |
| 2979 | # detect empty macros: | 2988 | # detect empty macros: |
| 2980 | if vba_code_filtered.strip() == '': | 2989 | if vba_code_filtered.strip() == '': |
| 2981 | - print '(empty macro)' | 2990 | + print('(empty macro)') |
| 2982 | else: | 2991 | else: |
| 2983 | - print vba_code_filtered | 2992 | + print(vba_code_filtered) |
| 2984 | for (subfilename, stream_path, form_string) in self.extract_form_strings(): | 2993 | for (subfilename, stream_path, form_string) in self.extract_form_strings(): |
| 2985 | - print '-' * 79 | ||
| 2986 | - print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path) | ||
| 2987 | - print '- ' * 39 | ||
| 2988 | - print form_string | 2994 | + print('-' * 79) |
| 2995 | + print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)) | ||
| 2996 | + print('- ' * 39) | ||
| 2997 | + print(form_string.decode('utf-8', 'ignore')) | ||
| 2989 | if not vba_code_only: | 2998 | if not vba_code_only: |
| 2990 | # analyse the code from all modules at once: | 2999 | # analyse the code from all modules at once: |
| 2991 | self.print_analysis(show_decoded_strings, deobfuscate) | 3000 | self.print_analysis(show_decoded_strings, deobfuscate) |
| 2992 | if show_deobfuscated_code: | 3001 | if show_deobfuscated_code: |
| 2993 | - print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n' | ||
| 2994 | - print self.reveal() | 3002 | + print('MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n') |
| 3003 | + print(self.reveal()) | ||
| 2995 | else: | 3004 | else: |
| 2996 | - print 'No VBA macros found.' | 3005 | + print('No VBA macros found.') |
| 2997 | except OlevbaBaseException: | 3006 | except OlevbaBaseException: |
| 2998 | raise | 3007 | raise |
| 2999 | except Exception as exc: | 3008 | except Exception as exc: |
| @@ -3001,7 +3010,7 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -3001,7 +3010,7 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 3001 | log.info('Error processing file %s (%s)' % (self.filename, exc)) | 3010 | log.info('Error processing file %s (%s)' % (self.filename, exc)) |
| 3002 | log.debug('Traceback:', exc_info=True) | 3011 | log.debug('Traceback:', exc_info=True) |
| 3003 | raise ProcessingError(self.filename, exc) | 3012 | raise ProcessingError(self.filename, exc) |
| 3004 | - print '' | 3013 | + print('') |
| 3005 | 3014 | ||
| 3006 | 3015 | ||
| 3007 | def process_file_json(self, show_decoded_strings=False, | 3016 | def process_file_json(self, show_decoded_strings=False, |
| @@ -3048,7 +3057,7 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -3048,7 +3057,7 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 3048 | curr_macro = {} | 3057 | curr_macro = {} |
| 3049 | if hide_attributes: | 3058 | if hide_attributes: |
| 3050 | # hide attribute lines: | 3059 | # hide attribute lines: |
| 3051 | - vba_code_filtered = filter_vba(vba_code) | 3060 | + vba_code_filtered = filter_vba(vba_code.decode('utf-8','replace')) |
| 3052 | else: | 3061 | else: |
| 3053 | vba_code_filtered = vba_code | 3062 | vba_code_filtered = vba_code |
| 3054 | 3063 | ||
| @@ -3087,7 +3096,7 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -3087,7 +3096,7 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 3087 | if self.detect_vba_macros(): | 3096 | if self.detect_vba_macros(): |
| 3088 | # print a waiting message only if the output is not redirected to a file: | 3097 | # print a waiting message only if the output is not redirected to a file: |
| 3089 | if sys.stdout.isatty(): | 3098 | if sys.stdout.isatty(): |
| 3090 | - print 'Analysis...\r', | 3099 | + print('Analysis...\r') |
| 3091 | sys.stdout.flush() | 3100 | sys.stdout.flush() |
| 3092 | self.analyze_macros(show_decoded_strings=show_decoded_strings, | 3101 | self.analyze_macros(show_decoded_strings=show_decoded_strings, |
| 3093 | deobfuscate=deobfuscate) | 3102 | deobfuscate=deobfuscate) |
| @@ -3105,7 +3114,7 @@ class VBA_Parser_CLI(VBA_Parser): | @@ -3105,7 +3114,7 @@ class VBA_Parser_CLI(VBA_Parser): | ||
| 3105 | base64obf, dridex, vba_obf) | 3114 | base64obf, dridex, vba_obf) |
| 3106 | 3115 | ||
| 3107 | line = '%-12s %s' % (flags, self.filename) | 3116 | line = '%-12s %s' % (flags, self.filename) |
| 3108 | - print line | 3117 | + print(line) |
| 3109 | 3118 | ||
| 3110 | # old table display: | 3119 | # old table display: |
| 3111 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' | 3120 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' |
| @@ -3198,7 +3207,7 @@ def main(): | @@ -3198,7 +3207,7 @@ def main(): | ||
| 3198 | 3207 | ||
| 3199 | # Print help if no arguments are passed | 3208 | # Print help if no arguments are passed |
| 3200 | if len(args) == 0: | 3209 | if len(args) == 0: |
| 3201 | - print __doc__ | 3210 | + print(__doc__) |
| 3202 | parser.print_help() | 3211 | parser.print_help() |
| 3203 | sys.exit(RETURN_WRONG_ARGS) | 3212 | sys.exit(RETURN_WRONG_ARGS) |
| 3204 | 3213 | ||
| @@ -3209,7 +3218,7 @@ def main(): | @@ -3209,7 +3218,7 @@ def main(): | ||
| 3209 | url='http://decalage.info/python/oletools', | 3218 | url='http://decalage.info/python/oletools', |
| 3210 | type='MetaInformation') | 3219 | type='MetaInformation') |
| 3211 | else: | 3220 | else: |
| 3212 | - print 'olevba %s - http://decalage.info/python/oletools' % __version__ | 3221 | + print('olevba %s - http://decalage.info/python/oletools' % __version__) |
| 3213 | 3222 | ||
| 3214 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') | 3223 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 3215 | # enable logging in the modules: | 3224 | # enable logging in the modules: |
| @@ -3229,8 +3238,8 @@ def main(): | @@ -3229,8 +3238,8 @@ def main(): | ||
| 3229 | # Column headers (do not know how many files there will be yet, so if no output_mode | 3238 | # Column headers (do not know how many files there will be yet, so if no output_mode |
| 3230 | # was specified, we will print triage for first file --> need these headers) | 3239 | # was specified, we will print triage for first file --> need these headers) |
| 3231 | if options.output_mode in ('triage', 'unspecified'): | 3240 | if options.output_mode in ('triage', 'unspecified'): |
| 3232 | - print '%-12s %-65s' % ('Flags', 'Filename') | ||
| 3233 | - print '%-12s %-65s' % ('-' * 11, '-' * 65) | 3241 | + print('%-12s %-65s' % ('Flags', 'Filename')) |
| 3242 | + print('%-12s %-65s' % ('-' * 11, '-' * 65)) | ||
| 3234 | 3243 | ||
| 3235 | previous_container = None | 3244 | previous_container = None |
| 3236 | count = 0 | 3245 | count = 0 |
| @@ -3248,14 +3257,14 @@ def main(): | @@ -3248,14 +3257,14 @@ def main(): | ||
| 3248 | if isinstance(data, Exception): | 3257 | if isinstance(data, Exception): |
| 3249 | if isinstance(data, PathNotFoundException): | 3258 | if isinstance(data, PathNotFoundException): |
| 3250 | if options.output_mode in ('triage', 'unspecified'): | 3259 | if options.output_mode in ('triage', 'unspecified'): |
| 3251 | - print '%-12s %s - File not found' % ('?', filename) | 3260 | + print('%-12s %s - File not found' % ('?', filename)) |
| 3252 | elif options.output_mode != 'json': | 3261 | elif options.output_mode != 'json': |
| 3253 | log.error('Given path %r does not exist!' % filename) | 3262 | log.error('Given path %r does not exist!' % filename) |
| 3254 | return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ | 3263 | return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ |
| 3255 | else RETURN_SEVERAL_ERRS | 3264 | else RETURN_SEVERAL_ERRS |
| 3256 | else: | 3265 | else: |
| 3257 | if options.output_mode in ('triage', 'unspecified'): | 3266 | if options.output_mode in ('triage', 'unspecified'): |
| 3258 | - print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container) | 3267 | + print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container)) |
| 3259 | elif options.output_mode != 'json': | 3268 | elif options.output_mode != 'json': |
| 3260 | log.error('Exception opening/reading %r from zip file %r: %s' | 3269 | log.error('Exception opening/reading %r from zip file %r: %s' |
| 3261 | % (filename, container, data)) | 3270 | % (filename, container, data)) |
| @@ -3282,7 +3291,7 @@ def main(): | @@ -3282,7 +3291,7 @@ def main(): | ||
| 3282 | # print container name when it changes: | 3291 | # print container name when it changes: |
| 3283 | if container != previous_container: | 3292 | if container != previous_container: |
| 3284 | if container is not None: | 3293 | if container is not None: |
| 3285 | - print '\nFiles in %s:' % container | 3294 | + print('\nFiles in %s:' % container) |
| 3286 | previous_container = container | 3295 | previous_container = container |
| 3287 | # summarized output for triage: | 3296 | # summarized output for triage: |
| 3288 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, | 3297 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, |
| @@ -3300,8 +3309,8 @@ def main(): | @@ -3300,8 +3309,8 @@ def main(): | ||
| 3300 | 3309 | ||
| 3301 | except (SubstreamOpenError, UnexpectedDataError) as exc: | 3310 | except (SubstreamOpenError, UnexpectedDataError) as exc: |
| 3302 | if options.output_mode in ('triage', 'unspecified'): | 3311 | if options.output_mode in ('triage', 'unspecified'): |
| 3303 | - print '%-12s %s - Error opening substream or uenxpected ' \ | ||
| 3304 | - 'content' % ('?', filename) | 3312 | + print('%-12s %s - Error opening substream or uenxpected ' \ |
| 3313 | + 'content' % ('?', filename)) | ||
| 3305 | elif options.output_mode == 'json': | 3314 | elif options.output_mode == 'json': |
| 3306 | print_json(file=filename, type='error', | 3315 | print_json(file=filename, type='error', |
| 3307 | error=type(exc).__name__, message=str(exc)) | 3316 | error=type(exc).__name__, message=str(exc)) |
| @@ -3312,7 +3321,7 @@ def main(): | @@ -3312,7 +3321,7 @@ def main(): | ||
| 3312 | else RETURN_SEVERAL_ERRS | 3321 | else RETURN_SEVERAL_ERRS |
| 3313 | except FileOpenError as exc: | 3322 | except FileOpenError as exc: |
| 3314 | if options.output_mode in ('triage', 'unspecified'): | 3323 | if options.output_mode in ('triage', 'unspecified'): |
| 3315 | - print '%-12s %s - File format not supported' % ('?', filename) | 3324 | + print('%-12s %s - File format not supported' % ('?', filename)) |
| 3316 | elif options.output_mode == 'json': | 3325 | elif options.output_mode == 'json': |
| 3317 | print_json(file=filename, type='error', | 3326 | print_json(file=filename, type='error', |
| 3318 | error=type(exc).__name__, message=str(exc)) | 3327 | error=type(exc).__name__, message=str(exc)) |
| @@ -3322,7 +3331,7 @@ def main(): | @@ -3322,7 +3331,7 @@ def main(): | ||
| 3322 | else RETURN_SEVERAL_ERRS | 3331 | else RETURN_SEVERAL_ERRS |
| 3323 | except ProcessingError as exc: | 3332 | except ProcessingError as exc: |
| 3324 | if options.output_mode in ('triage', 'unspecified'): | 3333 | if options.output_mode in ('triage', 'unspecified'): |
| 3325 | - print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc) | 3334 | + print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc)) |
| 3326 | elif options.output_mode == 'json': | 3335 | elif options.output_mode == 'json': |
| 3327 | print_json(file=filename, type='error', | 3336 | print_json(file=filename, type='error', |
| 3328 | error=type(exc).__name__, | 3337 | error=type(exc).__name__, |
| @@ -3337,9 +3346,9 @@ def main(): | @@ -3337,9 +3346,9 @@ def main(): | ||
| 3337 | vba_parser.close() | 3346 | vba_parser.close() |
| 3338 | 3347 | ||
| 3339 | if options.output_mode == 'triage': | 3348 | if options.output_mode == 'triage': |
| 3340 | - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | 3349 | + print('\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ |
| 3341 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ | 3350 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ |
| 3342 | - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' | 3351 | + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n') |
| 3343 | 3352 | ||
| 3344 | if count == 1 and options.output_mode == 'unspecified': | 3353 | if count == 1 and options.output_mode == 'unspecified': |
| 3345 | # if options -t, -d and -j were not specified and it's a single file, print details: | 3354 | # if options -t, -d and -j were not specified and it's a single file, print details: |
oletools/ppt_parser.py
| @@ -1570,4 +1570,4 @@ def iterative_decompress(stream, size, chunk_size=4096): | @@ -1570,4 +1570,4 @@ def iterative_decompress(stream, size, chunk_size=4096): | ||
| 1570 | 1570 | ||
| 1571 | 1571 | ||
| 1572 | if __name__ == '__main__': | 1572 | if __name__ == '__main__': |
| 1573 | - print 'nothing here to run!' | 1573 | + print('nothing here to run!') |
oletools/thirdparty/olefile/olefile.py
| @@ -1030,10 +1030,11 @@ class OleDirectoryEntry: | @@ -1030,10 +1030,11 @@ class OleDirectoryEntry: | ||
| 1030 | #[PL] this method was added to use simple recursion instead of a complex | 1030 | #[PL] this method was added to use simple recursion instead of a complex |
| 1031 | # algorithm. | 1031 | # algorithm. |
| 1032 | # if this is not a storage or a leaf of the tree, nothing to do: | 1032 | # if this is not a storage or a leaf of the tree, nothing to do: |
| 1033 | + | ||
| 1033 | if child_sid == NOSTREAM: | 1034 | if child_sid == NOSTREAM: |
| 1034 | return | 1035 | return |
| 1035 | # check if child SID is in the proper range: | 1036 | # check if child SID is in the proper range: |
| 1036 | - if child_sid<0 or child_sid>=len(self.olefile.direntries): | 1037 | + if child_sid <= 0 or child_sid >= len(self.olefile.direntries): |
| 1037 | self.olefile._raise_defect(DEFECT_INCORRECT, 'OLE DirEntry index out of range') | 1038 | self.olefile._raise_defect(DEFECT_INCORRECT, 'OLE DirEntry index out of range') |
| 1038 | else: | 1039 | else: |
| 1039 | # get child direntry: | 1040 | # get child direntry: |
oletools/thirdparty/olefile/olefile2.py
| @@ -1004,7 +1004,7 @@ class OleFileIO: | @@ -1004,7 +1004,7 @@ class OleFileIO: | ||
| 1004 | TIFF files). | 1004 | TIFF files). |
| 1005 | """ | 1005 | """ |
| 1006 | 1006 | ||
| 1007 | - def __init__(self, filename = None, raise_defects=DEFECT_FATAL): | 1007 | + def __init__(self, filename = None, raise_defects=DEFECT_FATAL): |
| 1008 | """ | 1008 | """ |
| 1009 | Constructor for OleFileIO class. | 1009 | Constructor for OleFileIO class. |
| 1010 | 1010 |
oletools/thirdparty/tablestream/tablestream.py
| @@ -236,7 +236,7 @@ class TableStream(object): | @@ -236,7 +236,7 @@ class TableStream(object): | ||
| 236 | assert len(row) == self.num_columns | 236 | assert len(row) == self.num_columns |
| 237 | columns = [] | 237 | columns = [] |
| 238 | max_lines = 0 | 238 | max_lines = 0 |
| 239 | - for i in xrange(self.num_columns): | 239 | + for i in range(self.num_columns): |
| 240 | cell = row[i] | 240 | cell = row[i] |
| 241 | # Convert to string: | 241 | # Convert to string: |
| 242 | # TODO: handle unicode properly | 242 | # TODO: handle unicode properly |
| @@ -245,7 +245,7 @@ class TableStream(object): | @@ -245,7 +245,7 @@ class TableStream(object): | ||
| 245 | # encode to UTF8, avoiding errors | 245 | # encode to UTF8, avoiding errors |
| 246 | cell = cell.decode('utf-8', errors='replace') | 246 | cell = cell.decode('utf-8', errors='replace') |
| 247 | else: | 247 | else: |
| 248 | - cell = unicode(cell) | 248 | + cell = cell |
| 249 | # Wrap cell text according to the column width | 249 | # Wrap cell text according to the column width |
| 250 | # TODO: use a TextWrapper object for each column instead | 250 | # TODO: use a TextWrapper object for each column instead |
| 251 | # split the string if it contains newline characters, otherwise | 251 | # split the string if it contains newline characters, otherwise |
| @@ -257,16 +257,16 @@ class TableStream(object): | @@ -257,16 +257,16 @@ class TableStream(object): | ||
| 257 | if colors is not None and self.outfile.isatty(): | 257 | if colors is not None and self.outfile.isatty(): |
| 258 | color = colors[i] | 258 | color = colors[i] |
| 259 | if color: | 259 | if color: |
| 260 | - for j in xrange(len(column)): | 260 | + for j in range(len(column)): |
| 261 | # print '%r: %s' % (column[j], type(column[j])) | 261 | # print '%r: %s' % (column[j], type(column[j])) |
| 262 | column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color)) | 262 | column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color)) |
| 263 | columns.append(column) | 263 | columns.append(column) |
| 264 | # determine which column has the highest number of lines | 264 | # determine which column has the highest number of lines |
| 265 | max_lines = max(len(columns[i]), max_lines) | 265 | max_lines = max(len(columns[i]), max_lines) |
| 266 | # transpose: write output line by line | 266 | # transpose: write output line by line |
| 267 | - for j in xrange(max_lines): | 267 | + for j in range(max_lines): |
| 268 | self.write(self.style.vertical_left) | 268 | self.write(self.style.vertical_left) |
| 269 | - for i in xrange(self.num_columns): | 269 | + for i in range(self.num_columns): |
| 270 | column = columns[i] | 270 | column = columns[i] |
| 271 | if j<len(column): | 271 | if j<len(column): |
| 272 | # text to be written | 272 | # text to be written |