Commit 1541d5dedc31c06d3adbf382b51c5e4794fb711e

Authored by Philippe Lagadec
Committed by GitHub
2 parents e73e4dc3 4984d587

Merge pull request #64 from sebdraven/master

Conversion of all oletools to Python 3.5 (temporarily breaking compatibility with Python 2.7)
oletools/mraptor.py
@@ -233,16 +233,16 @@ def main(): @@ -233,16 +233,16 @@ def main():
233 233
234 # Print help if no arguments are passed 234 # Print help if no arguments are passed
235 if len(args) == 0: 235 if len(args) == 0:
236 - print __doc__ 236 + print(__doc__)
237 parser.print_help() 237 parser.print_help()
238 - print '\nAn exit code is returned based on the analysis result:' 238 + print('\nAn exit code is returned based on the analysis result:')
239 for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious): 239 for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious):
240 - print ' - %d: %s' % (result.exit_code, result.name) 240 + print(' - %d: %s' % (result.exit_code, result.name))
241 sys.exit() 241 sys.exit()
242 242
243 # print banner with version 243 # print banner with version
244 - print 'MacroRaptor %s - http://decalage.info/python/oletools' % __version__  
245 - print 'This is work in progress, please report issues at %s' % URL_ISSUES 244 + print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
  245 + print('This is work in progress, please report issues at %s' % URL_ISSUES)
246 246
247 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') 247 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
248 # enable logging in the modules: 248 # enable logging in the modules:
@@ -292,7 +292,7 @@ def main(): @@ -292,7 +292,7 @@ def main():
292 vba_code_all_modules = '' 292 vba_code_all_modules = ''
293 try: 293 try:
294 for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros(): 294 for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros():
295 - vba_code_all_modules += vba_code + '\n' 295 + vba_code_all_modules += vba_code.decode('utf-8','replace') + '\n'
296 except Exception as e: 296 except Exception as e:
297 # log.error('Error when parsing VBA macros from file %r' % full_name) 297 # log.error('Error when parsing VBA macros from file %r' % full_name)
298 result = Result_Error 298 result = Result_Error
@@ -319,9 +319,9 @@ def main(): @@ -319,9 +319,9 @@ def main():
319 global_result = result 319 global_result = result
320 exitcode = result.exit_code 320 exitcode = result.exit_code
321 321
322 - print ''  
323 - print 'Flags: A=AutoExec, W=Write, X=Execute'  
324 - print 'Exit code: %d - %s' % (exitcode, global_result.name) 322 + print('')
  323 + print('Flags: A=AutoExec, W=Write, X=Execute')
  324 + print('Exit code: %d - %s' % (exitcode, global_result.name))
325 sys.exit(exitcode) 325 sys.exit(exitcode)
326 326
327 if __name__ == '__main__': 327 if __name__ == '__main__':
oletools/olemap.py
@@ -90,14 +90,14 @@ FAT_COLORS = { @@ -90,14 +90,14 @@ FAT_COLORS = {
90 90
91 if __name__ == '__main__': 91 if __name__ == '__main__':
92 # print banner with version 92 # print banner with version
93 - print 'olemap %s - http://decalage.info/python/oletools' % __version__ 93 + print('olemap %s - http://decalage.info/python/oletools' % __version__)
94 94
95 fname = sys.argv[1] 95 fname = sys.argv[1]
96 ole = olefile.OleFileIO(fname) 96 ole = olefile.OleFileIO(fname)
97 97
98 - print 'FAT:' 98 + print('FAT:')
99 t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #']) 99 t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #'])
100 - for i in xrange(ole.nb_sect): 100 + for i in range(ole.nb_sect):
101 fat_value = ole.fat[i] 101 fat_value = ole.fat[i]
102 fat_type = FAT_TYPES.get(fat_value, '<Data>') 102 fat_type = FAT_TYPES.get(fat_value, '<Data>')
103 color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default']) 103 color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default'])
@@ -106,15 +106,15 @@ if __name__ == &#39;__main__&#39;: @@ -106,15 +106,15 @@ if __name__ == &#39;__main__&#39;:
106 # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) 106 # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)
107 t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value], 107 t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value],
108 colors=[None, color_type, None, None]) 108 colors=[None, color_type, None, None])
109 - print '' 109 + print('')
110 110
111 - print 'MiniFAT:' 111 + print('MiniFAT:')
112 # load MiniFAT if it wasn't already done: 112 # load MiniFAT if it wasn't already done:
113 ole.loadminifat() 113 ole.loadminifat()
114 - for i in xrange(len(ole.minifat)): 114 + for i in range(len(ole.minifat)):
115 fat_value = ole.minifat[i] 115 fat_value = ole.minifat[i]
116 fat_type = FAT_TYPES.get(fat_value, 'Data') 116 fat_type = FAT_TYPES.get(fat_value, 'Data')
117 - print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) 117 + print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value))
118 118
119 ole.close() 119 ole.close()
120 120
oletools/oletimes.py
@@ -94,6 +94,6 @@ for obj in ole.listdir(streams=True, storages=True): @@ -94,6 +94,6 @@ for obj in ole.listdir(streams=True, storages=True):
94 #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj)) 94 #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))
95 t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj)))) 95 t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj))))
96 96
97 -print t 97 +print(t)
98 98
99 ole.close() 99 ole.close()
oletools/olevba.py
@@ -215,7 +215,7 @@ __version__ = &#39;0.50&#39; @@ -215,7 +215,7 @@ __version__ = &#39;0.50&#39;
215 215
216 import sys, logging 216 import sys, logging
217 import struct 217 import struct
218 -import cStringIO 218 +from _io import StringIO,BytesIO
219 import math 219 import math
220 import zipfile 220 import zipfile
221 import re 221 import re
@@ -240,9 +240,9 @@ except ImportError: @@ -240,9 +240,9 @@ except ImportError:
240 # Python <2.5: standalone ElementTree install 240 # Python <2.5: standalone ElementTree install
241 import elementtree.cElementTree as ET 241 import elementtree.cElementTree as ET
242 except ImportError: 242 except ImportError:
243 - raise ImportError, "lxml or ElementTree are not installed, " \ 243 + raise(ImportError, "lxml or ElementTree are not installed, " \
244 + "see http://codespeak.net/lxml " \ 244 + "see http://codespeak.net/lxml " \
245 - + "or http://effbot.org/zone/element-index.htm" 245 + + "or http://effbot.org/zone/element-index.htm")
246 246
247 import thirdparty.olefile as olefile 247 import thirdparty.olefile as olefile
248 from thirdparty.prettytable import prettytable 248 from thirdparty.prettytable import prettytable
@@ -421,7 +421,7 @@ TYPE2TAG = { @@ -421,7 +421,7 @@ TYPE2TAG = {
421 421
422 422
423 # MSO files ActiveMime header magic 423 # MSO files ActiveMime header magic
424 -MSO_ACTIVEMIME_HEADER = 'ActiveMime' 424 +MSO_ACTIVEMIME_HEADER = b'ActiveMime'
425 425
426 MODULE_EXTENSION = "bas" 426 MODULE_EXTENSION = "bas"
427 CLASS_EXTENSION = "cls" 427 CLASS_EXTENSION = "cls"
@@ -630,7 +630,7 @@ re_dridex_string = re.compile(r&#39;&quot;[0-9A-Za-z]{20,}&quot;&#39;) @@ -630,7 +630,7 @@ re_dridex_string = re.compile(r&#39;&quot;[0-9A-Za-z]{20,}&quot;&#39;)
630 re_nothex_check = re.compile(r'[G-Zg-z]') 630 re_nothex_check = re.compile(r'[G-Zg-z]')
631 631
632 # regex to extract printable strings (at least 5 chars) from VBA Forms: 632 # regex to extract printable strings (at least 5 chars) from VBA Forms:
633 -re_printable_string = re.compile(r'[\t\r\n\x20-\xFF]{5,}') 633 +re_printable_string = re.compile(rb'[\t\r\n\x20-\xFF]{5,}')
634 634
635 635
636 # === PARTIAL VBA GRAMMAR ==================================================== 636 # === PARTIAL VBA GRAMMAR ====================================================
@@ -1060,10 +1060,10 @@ def decompress_stream(compressed_container): @@ -1060,10 +1060,10 @@ def decompress_stream(compressed_container):
1060 # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the 1060 # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the
1061 # DecompressedBuffer (section 2.4.1.1.2). 1061 # DecompressedBuffer (section 2.4.1.1.2).
1062 1062
1063 - decompressed_container = '' # result 1063 + decompressed_container = b'' # result
1064 compressed_current = 0 1064 compressed_current = 0
1065 1065
1066 - sig_byte = ord(compressed_container[compressed_current]) 1066 + sig_byte = compressed_container[compressed_current]
1067 if sig_byte != 0x01: 1067 if sig_byte != 0x01:
1068 raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) 1068 raise ValueError('invalid signature byte {0:02X}'.format(sig_byte))
1069 1069
@@ -1109,7 +1109,7 @@ def decompress_stream(compressed_container): @@ -1109,7 +1109,7 @@ def decompress_stream(compressed_container):
1109 # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk 1109 # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk
1110 # uncompressed chunk: read the next 4096 bytes as-is 1110 # uncompressed chunk: read the next 4096 bytes as-is
1111 #TODO: check if there are at least 4096 bytes left 1111 #TODO: check if there are at least 4096 bytes left
1112 - decompressed_container += compressed_container[compressed_current:compressed_current + 4096] 1112 + decompressed_container += bytes([compressed_container[compressed_current:compressed_current + 4096]])
1113 compressed_current += 4096 1113 compressed_current += 4096
1114 else: 1114 else:
1115 # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk 1115 # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk
@@ -1120,9 +1120,9 @@ def decompress_stream(compressed_container): @@ -1120,9 +1120,9 @@ def decompress_stream(compressed_container):
1120 # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) 1120 # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end))
1121 # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or 1121 # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or
1122 # copy tokens (reference to a previous literal token) 1122 # copy tokens (reference to a previous literal token)
1123 - flag_byte = ord(compressed_container[compressed_current]) 1123 + flag_byte = compressed_container[compressed_current]
1124 compressed_current += 1 1124 compressed_current += 1
1125 - for bit_index in xrange(0, 8): 1125 + for bit_index in range(0, 8):
1126 # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) 1126 # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end))
1127 if compressed_current >= compressed_end: 1127 if compressed_current >= compressed_end:
1128 break 1128 break
@@ -1132,7 +1132,7 @@ def decompress_stream(compressed_container): @@ -1132,7 +1132,7 @@ def decompress_stream(compressed_container):
1132 #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) 1132 #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit))
1133 if flag_bit == 0: # LiteralToken 1133 if flag_bit == 0: # LiteralToken
1134 # copy one byte directly to output 1134 # copy one byte directly to output
1135 - decompressed_container += compressed_container[compressed_current] 1135 + decompressed_container += bytes([compressed_container[compressed_current]])
1136 compressed_current += 1 1136 compressed_current += 1
1137 else: # CopyToken 1137 else: # CopyToken
1138 # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken 1138 # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken
@@ -1147,8 +1147,8 @@ def decompress_stream(compressed_container): @@ -1147,8 +1147,8 @@ def decompress_stream(compressed_container):
1147 offset = (temp1 >> temp2) + 1 1147 offset = (temp1 >> temp2) + 1
1148 #log.debug('offset=%d length=%d' % (offset, length)) 1148 #log.debug('offset=%d length=%d' % (offset, length))
1149 copy_source = len(decompressed_container) - offset 1149 copy_source = len(decompressed_container) - offset
1150 - for index in xrange(copy_source, copy_source + length):  
1151 - decompressed_container += decompressed_container[index] 1150 + for index in range(copy_source, copy_source + length):
  1151 + decompressed_container += bytes([decompressed_container[index]])
1152 compressed_current += 2 1152 compressed_current += 2
1153 return decompressed_container 1153 return decompressed_container
1154 1154
@@ -1191,7 +1191,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1191,7 +1191,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1191 code_modules = {} 1191 code_modules = {}
1192 1192
1193 for line in project: 1193 for line in project:
1194 - line = line.strip() 1194 + line = line.strip().decode('utf-8','ignore')
1195 if '=' in line: 1195 if '=' in line:
1196 # split line at the 1st equal sign: 1196 # split line at the 1st equal sign:
1197 name, value = line.split('=', 1) 1197 name, value = line.split('=', 1)
@@ -1222,7 +1222,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1222,7 +1222,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1222 else: 1222 else:
1223 raise UnexpectedDataError(dir_path, name, expected, value) 1223 raise UnexpectedDataError(dir_path, name, expected, value)
1224 1224
1225 - dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed)) 1225 + dir_stream = BytesIO(decompress_stream(dir_compressed))
1226 1226
1227 # PROJECTSYSKIND Record 1227 # PROJECTSYSKIND Record
1228 projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] 1228 projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0]
@@ -1484,7 +1484,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1484,7 +1484,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1484 uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') 1484 uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace')
1485 1485
1486 log.debug("parsing {0} modules".format(projectmodules_count)) 1486 log.debug("parsing {0} modules".format(projectmodules_count))
1487 - for projectmodule_index in xrange(0, projectmodules_count): 1487 + for projectmodule_index in range(0, projectmodules_count):
1488 try: 1488 try:
1489 modulename_id = struct.unpack("<H", dir_stream.read(2))[0] 1489 modulename_id = struct.unpack("<H", dir_stream.read(2))[0]
1490 check_value('MODULENAME_Id', 0x0019, modulename_id) 1490 check_value('MODULENAME_Id', 0x0019, modulename_id)
@@ -1881,19 +1881,19 @@ def json2ascii(json_obj, encoding=&#39;utf8&#39;, errors=&#39;replace&#39;): @@ -1881,19 +1881,19 @@ def json2ascii(json_obj, encoding=&#39;utf8&#39;, errors=&#39;replace&#39;):
1881 pass 1881 pass
1882 elif isinstance(json_obj, str): 1882 elif isinstance(json_obj, str):
1883 # de-code and re-encode 1883 # de-code and re-encode
1884 - dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) 1884 + dencoded = json_obj
1885 if dencoded != json_obj: 1885 if dencoded != json_obj:
1886 log.debug('json2ascii: replaced: {0} (len {1})' 1886 log.debug('json2ascii: replaced: {0} (len {1})'
1887 .format(json_obj, len(json_obj))) 1887 .format(json_obj, len(json_obj)))
1888 log.debug('json2ascii: with: {0} (len {1})' 1888 log.debug('json2ascii: with: {0} (len {1})'
1889 .format(dencoded, len(dencoded))) 1889 .format(dencoded, len(dencoded)))
1890 return dencoded 1890 return dencoded
1891 - elif isinstance(json_obj, unicode): 1891 + elif isinstance(json_obj, bytes):
1892 log.debug('json2ascii: encode unicode: {0}' 1892 log.debug('json2ascii: encode unicode: {0}'
1893 - .format(json_obj.encode(encoding, errors))) 1893 + .format(json_obj.decode(encoding, errors)))
1894 # cannot put original into logger 1894 # cannot put original into logger
1895 # print 'original: ' json_obj 1895 # print 'original: ' json_obj
1896 - return json_obj.encode(encoding, errors) 1896 + return json_obj.decode(encoding, errors)
1897 elif isinstance(json_obj, dict): 1897 elif isinstance(json_obj, dict):
1898 for key in json_obj: 1898 for key in json_obj:
1899 json_obj[key] = json2ascii(json_obj[key]) 1899 json_obj[key] = json2ascii(json_obj[key])
@@ -1931,18 +1931,18 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts): @@ -1931,18 +1931,18 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts):
1931 json_dict = json_parts 1931 json_dict = json_parts
1932 1932
1933 if not _have_printed_json_start: 1933 if not _have_printed_json_start:
1934 - print '[' 1934 + print('[')
1935 _have_printed_json_start = True 1935 _have_printed_json_start = True
1936 1936
1937 lines = json.dumps(json2ascii(json_dict), check_circular=False, 1937 lines = json.dumps(json2ascii(json_dict), check_circular=False,
1938 indent=4, ensure_ascii=False).splitlines() 1938 indent=4, ensure_ascii=False).splitlines()
1939 for line in lines[:-1]: 1939 for line in lines[:-1]:
1940 - print ' {0}'.format(line) 1940 + print(' {0}'.format(line))
1941 if _json_is_last: 1941 if _json_is_last:
1942 - print ' {0}'.format(lines[-1]) # print last line without comma  
1943 - print ']' 1942 + print(' {0}'.format(lines[-1])) # print last line without comma
  1943 + print(']')
1944 else: 1944 else:
1945 - print ' {0},'.format(lines[-1]) # print last line with comma 1945 + print(' {0},'.format(lines[-1])) # print last line with comma
1946 1946
1947 1947
1948 class VBA_Scanner(object): 1948 class VBA_Scanner(object):
@@ -1959,10 +1959,10 @@ class VBA_Scanner(object): @@ -1959,10 +1959,10 @@ class VBA_Scanner(object):
1959 """ 1959 """
1960 # join long lines ending with " _": 1960 # join long lines ending with " _":
1961 self.code = vba_collapse_long_lines(vba_code) 1961 self.code = vba_collapse_long_lines(vba_code)
1962 - self.code_hex = ''  
1963 - self.code_hex_rev = ''  
1964 - self.code_rev_hex = ''  
1965 - self.code_base64 = '' 1962 + self.code_hex = b''
  1963 + self.code_hex_rev = b''
  1964 + self.code_rev_hex = b''
  1965 + self.code_base64 = b''
1966 self.code_dridex = '' 1966 self.code_dridex = ''
1967 self.code_vba = '' 1967 self.code_vba = ''
1968 self.strReverse = None 1968 self.strReverse = None
@@ -1995,19 +1995,19 @@ class VBA_Scanner(object): @@ -1995,19 +1995,19 @@ class VBA_Scanner(object):
1995 if 'strreverse' in self.code.lower(): self.strReverse = True 1995 if 'strreverse' in self.code.lower(): self.strReverse = True
1996 # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: 1996 # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
1997 for encoded, decoded in self.hex_strings: 1997 for encoded, decoded in self.hex_strings:
1998 - self.code_hex += '\n' + decoded 1998 + self.code_hex += b'\n' + decoded
1999 # if the code contains "StrReverse", also append the hex strings in reverse order: 1999 # if the code contains "StrReverse", also append the hex strings in reverse order:
2000 if self.strReverse: 2000 if self.strReverse:
2001 # StrReverse after hex decoding: 2001 # StrReverse after hex decoding:
2002 - self.code_hex_rev += '\n' + decoded[::-1] 2002 + self.code_hex_rev += b'\n' + decoded[::-1]
2003 # StrReverse before hex decoding: 2003 # StrReverse before hex decoding:
2004 - self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1]) 2004 + self.code_rev_hex += b'\n' + binascii.unhexlify(encoded[::-1])
2005 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ 2005 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
2006 #TODO: also append the full code reversed if StrReverse? (risk of false positives?) 2006 #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
2007 # Detect Base64-encoded strings 2007 # Detect Base64-encoded strings
2008 self.base64_strings = detect_base64_strings(self.code) 2008 self.base64_strings = detect_base64_strings(self.code)
2009 for encoded, decoded in self.base64_strings: 2009 for encoded, decoded in self.base64_strings:
2010 - self.code_base64 += '\n' + decoded 2010 + self.code_base64 += b'\n' + decoded
2011 # Detect Dridex-encoded strings 2011 # Detect Dridex-encoded strings
2012 self.dridex_strings = detect_dridex_strings(self.code) 2012 self.dridex_strings = detect_dridex_strings(self.code)
2013 for encoded, decoded in self.dridex_strings: 2013 for encoded, decoded in self.dridex_strings:
@@ -2026,13 +2026,15 @@ class VBA_Scanner(object): @@ -2026,13 +2026,15 @@ class VBA_Scanner(object):
2026 2026
2027 for code, obfuscation in ( 2027 for code, obfuscation in (
2028 (self.code, None), 2028 (self.code, None),
2029 - (self.code_hex, 'Hex'), 2029 + (self.code_hex.decode('utf-8','replace'), 'Hex'),
2030 (self.code_hex_rev, 'Hex+StrReverse'), 2030 (self.code_hex_rev, 'Hex+StrReverse'),
2031 (self.code_rev_hex, 'StrReverse+Hex'), 2031 (self.code_rev_hex, 'StrReverse+Hex'),
2032 - (self.code_base64, 'Base64'), 2032 + (self.code_base64.decode('utf-8', 'replace'), 'Base64'),
2033 (self.code_dridex, 'Dridex'), 2033 (self.code_dridex, 'Dridex'),
2034 (self.code_vba, 'VBA expression'), 2034 (self.code_vba, 'VBA expression'),
2035 ): 2035 ):
  2036 + if isinstance(code,bytes):
  2037 + code=code.decode('utf-8','replace')
2036 self.autoexec_keywords += detect_autoexec(code, obfuscation) 2038 self.autoexec_keywords += detect_autoexec(code, obfuscation)
2037 self.suspicious_keywords += detect_suspicious(code, obfuscation) 2039 self.suspicious_keywords += detect_suspicious(code, obfuscation)
2038 self.iocs += detect_patterns(code, obfuscation) 2040 self.iocs += detect_patterns(code, obfuscation)
@@ -2158,7 +2160,7 @@ class VBA_Parser(object): @@ -2158,7 +2160,7 @@ class VBA_Parser(object):
2158 _file = filename 2160 _file = filename
2159 else: 2161 else:
2160 # file already read in memory, make it a file-like object for zipfile: 2162 # file already read in memory, make it a file-like object for zipfile:
2161 - _file = cStringIO.StringIO(data) 2163 + _file = BytesIO(data)
2162 #self.file = _file 2164 #self.file = _file
2163 self.ole_file = None 2165 self.ole_file = None
2164 self.ole_subfiles = [] 2166 self.ole_subfiles = []
@@ -2207,7 +2209,7 @@ class VBA_Parser(object): @@ -2207,7 +2209,7 @@ class VBA_Parser(object):
2207 if data is None: 2209 if data is None:
2208 data = open(filename, 'rb').read() 2210 data = open(filename, 'rb').read()
2209 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace 2211 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
2210 - if 'http://schemas.microsoft.com/office/word/2003/wordml' in data: 2212 + if b'http://schemas.microsoft.com/office/word/2003/wordml' in data:
2211 self.open_word2003xml(data) 2213 self.open_word2003xml(data)
2212 # store a lowercase version for the next tests: 2214 # store a lowercase version for the next tests:
2213 data_lowercase = data.lower() 2215 data_lowercase = data.lower()
@@ -2217,14 +2219,14 @@ class VBA_Parser(object): @@ -2217,14 +2219,14 @@ class VBA_Parser(object):
2217 # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored. 2219 # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored.
2218 # And the line is case insensitive. 2220 # And the line is case insensitive.
2219 # so we'll just check the presence of mime, version and multipart anywhere: 2221 # so we'll just check the presence of mime, version and multipart anywhere:
2220 - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \  
2221 - and 'multipart' in data_lowercase: 2222 + if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \
  2223 + and b'multipart' in data_lowercase:
2222 self.open_mht(data) 2224 self.open_mht(data)
2223 #TODO: handle exceptions 2225 #TODO: handle exceptions
2224 #TODO: Excel 2003 XML 2226 #TODO: Excel 2003 XML
2225 # Check if this is a plain text VBA or VBScript file: 2227 # Check if this is a plain text VBA or VBScript file:
2226 # To avoid scanning binary files, we simply check for some control chars: 2228 # To avoid scanning binary files, we simply check for some control chars:
2227 - if self.type is None and '\x00' not in data: 2229 + if self.type is None and b'\x00' not in data:
2228 self.open_text(data) 2230 self.open_text(data)
2229 if self.type is None: 2231 if self.type is None:
2230 # At this stage, could not match a known format: 2232 # At this stage, could not match a known format:
@@ -2358,6 +2360,8 @@ class VBA_Parser(object): @@ -2358,6 +2360,8 @@ class VBA_Parser(object):
2358 """ 2360 """
2359 log.info('Opening MHTML file %s' % self.filename) 2361 log.info('Opening MHTML file %s' % self.filename)
2360 try: 2362 try:
  2363 + if isinstance(data,bytes):
  2364 + data = data.decode('utf8', 'replace')
2361 # parse the MIME content 2365 # parse the MIME content
2362 # remove any leading whitespace or newline (workaround for issue in email package) 2366 # remove any leading whitespace or newline (workaround for issue in email package)
2363 stripped_data = data.lstrip('\r\n\t ') 2367 stripped_data = data.lstrip('\r\n\t ')
@@ -2387,7 +2391,8 @@ class VBA_Parser(object): @@ -2387,7 +2391,8 @@ class VBA_Parser(object):
2387 # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. 2391 # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
2388 # decompress the zlib data starting at offset 0x32, which is the OLE container: 2392 # decompress the zlib data starting at offset 0x32, which is the OLE container:
2389 # check ActiveMime header: 2393 # check ActiveMime header:
2390 - if isinstance(part_data, str) and is_mso_file(part_data): 2394 +
  2395 + if (isinstance(part_data, str) or isinstance(part_data, bytes)) and is_mso_file(part_data):
2391 log.debug('Found ActiveMime header, decompressing MSO container') 2396 log.debug('Found ActiveMime header, decompressing MSO container')
2392 try: 2397 try:
2393 ole_data = mso_file_extract(part_data) 2398 ole_data = mso_file_extract(part_data)
@@ -2458,6 +2463,8 @@ class VBA_Parser(object): @@ -2458,6 +2463,8 @@ class VBA_Parser(object):
2458 """ 2463 """
2459 log.info('Opening text file %s' % self.filename) 2464 log.info('Opening text file %s' % self.filename)
2460 # directly store the source code: 2465 # directly store the source code:
  2466 + if isinstance(data,bytes):
  2467 + data=data.decode('utf8','replace')
2461 self.vba_code_all_modules = data 2468 self.vba_code_all_modules = data
2462 self.contains_macros = True 2469 self.contains_macros = True
2463 # set type only if parsing succeeds 2470 # set type only if parsing succeeds
@@ -2596,7 +2603,7 @@ class VBA_Parser(object): @@ -2596,7 +2603,7 @@ class VBA_Parser(object):
2596 # Also look for VBA code in any stream including orphans 2603 # Also look for VBA code in any stream including orphans
2597 # (happens in some malformed files) 2604 # (happens in some malformed files)
2598 ole = self.ole_file 2605 ole = self.ole_file
2599 - for sid in xrange(len(ole.direntries)): 2606 + for sid in range(len(ole.direntries)):
2600 # check if id is already done above: 2607 # check if id is already done above:
2601 log.debug('Checking DirEntry #%d' % sid) 2608 log.debug('Checking DirEntry #%d' % sid)
2602 d = ole.direntries[sid] 2609 d = ole.direntries[sid]
@@ -2614,7 +2621,7 @@ class VBA_Parser(object): @@ -2614,7 +2621,7 @@ class VBA_Parser(object):
2614 log.debug('%r...[much more data]...%r' % (data[:100], data[-50:])) 2621 log.debug('%r...[much more data]...%r' % (data[:100], data[-50:]))
2615 else: 2622 else:
2616 log.debug(repr(data)) 2623 log.debug(repr(data))
2617 - if 'Attribut' in data: 2624 + if 'Attribut' in data.decode('utf-8','ignore'):
2618 log.debug('Found VBA compressed code') 2625 log.debug('Found VBA compressed code')
2619 self.contains_macros = True 2626 self.contains_macros = True
2620 except IOError as exc: 2627 except IOError as exc:
@@ -2662,7 +2669,7 @@ class VBA_Parser(object): @@ -2662,7 +2669,7 @@ class VBA_Parser(object):
2662 # Also look for VBA code in any stream including orphans 2669 # Also look for VBA code in any stream including orphans
2663 # (happens in some malformed files) 2670 # (happens in some malformed files)
2664 ole = self.ole_file 2671 ole = self.ole_file
2665 - for sid in xrange(len(ole.direntries)): 2672 + for sid in range(len(ole.direntries)):
2666 # check if id is already done above: 2673 # check if id is already done above:
2667 log.debug('Checking DirEntry #%d' % sid) 2674 log.debug('Checking DirEntry #%d' % sid)
2668 if sid in vba_stream_ids: 2675 if sid in vba_stream_ids:
@@ -2677,7 +2684,7 @@ class VBA_Parser(object): @@ -2677,7 +2684,7 @@ class VBA_Parser(object):
2677 # read data 2684 # read data
2678 log.debug('Reading data from stream %r' % d.name) 2685 log.debug('Reading data from stream %r' % d.name)
2679 data = ole._open(d.isectStart, d.size).read() 2686 data = ole._open(d.isectStart, d.size).read()
2680 - for match in re.finditer(r'\x00Attribut[^e]', data, flags=re.IGNORECASE): 2687 + for match in re.finditer(rb'\x00Attribut[^e]', data, flags=re.IGNORECASE):
2681 start = match.start() - 3 2688 start = match.start() - 3
2682 log.debug('Found VBA compressed code at index %X' % start) 2689 log.debug('Found VBA compressed code at index %X' % start)
2683 compressed_code = data[start:] 2690 compressed_code = data[start:]
@@ -2720,9 +2727,9 @@ class VBA_Parser(object): @@ -2720,9 +2727,9 @@ class VBA_Parser(object):
2720 self.vba_code_all_modules = '' 2727 self.vba_code_all_modules = ''
2721 for (_, _, _, vba_code) in self.extract_all_macros(): 2728 for (_, _, _, vba_code) in self.extract_all_macros():
2722 #TODO: filter code? (each module) 2729 #TODO: filter code? (each module)
2723 - self.vba_code_all_modules += vba_code + '\n' 2730 + self.vba_code_all_modules += vba_code.decode('utf-8', 'ignore') + '\n'
2724 for (_, _, form_string) in self.extract_form_strings(): 2731 for (_, _, form_string) in self.extract_form_strings():
2725 - self.vba_code_all_modules += form_string + '\n' 2732 + self.vba_code_all_modules += form_string.decode('utf-8', 'ignore') + '\n'
2726 # Analyze the whole code at once: 2733 # Analyze the whole code at once:
2727 scanner = VBA_Scanner(self.vba_code_all_modules) 2734 scanner = VBA_Scanner(self.vba_code_all_modules)
2728 self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate) 2735 self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate)
@@ -2897,7 +2904,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2897,7 +2904,7 @@ class VBA_Parser_CLI(VBA_Parser):
2897 """ 2904 """
2898 # print a waiting message only if the output is not redirected to a file: 2905 # print a waiting message only if the output is not redirected to a file:
2899 if sys.stdout.isatty(): 2906 if sys.stdout.isatty():
2900 - print 'Analysis...\r', 2907 + print('Analysis...\r')
2901 sys.stdout.flush() 2908 sys.stdout.flush()
2902 results = self.analyze_macros(show_decoded_strings, deobfuscate) 2909 results = self.analyze_macros(show_decoded_strings, deobfuscate)
2903 if results: 2910 if results:
@@ -2913,9 +2920,9 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2913,9 +2920,9 @@ class VBA_Parser_CLI(VBA_Parser):
2913 if not is_printable(description): 2920 if not is_printable(description):
2914 description = repr(description) 2921 description = repr(description)
2915 t.add_row((kw_type, keyword, description)) 2922 t.add_row((kw_type, keyword, description))
2916 - print t 2923 + print(t)
2917 else: 2924 else:
2918 - print 'No suspicious keyword or IOC found.' 2925 + print('No suspicious keyword or IOC found.')
2919 2926
2920 def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False): 2927 def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False):
2921 """ 2928 """
@@ -2929,7 +2936,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2929,7 +2936,7 @@ class VBA_Parser_CLI(VBA_Parser):
2929 """ 2936 """
2930 # print a waiting message only if the output is not redirected to a file: 2937 # print a waiting message only if the output is not redirected to a file:
2931 if sys.stdout.isatty(): 2938 if sys.stdout.isatty():
2932 - print 'Analysis...\r', 2939 + print('Analysis...\r')
2933 sys.stdout.flush() 2940 sys.stdout.flush()
2934 return [dict(type=kw_type, keyword=keyword, description=description) 2941 return [dict(type=kw_type, keyword=keyword, description=description)
2935 for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)] 2942 for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)]
@@ -2958,42 +2965,44 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2958,42 +2965,44 @@ class VBA_Parser_CLI(VBA_Parser):
2958 display_filename = '%s in %s' % (self.filename, self.container) 2965 display_filename = '%s in %s' % (self.filename, self.container)
2959 else: 2966 else:
2960 display_filename = self.filename 2967 display_filename = self.filename
2961 - print '=' * 79  
2962 - print 'FILE:', display_filename 2968 + print('=' * 79)
  2969 + print('FILE:', display_filename)
2963 try: 2970 try:
2964 #TODO: handle olefile errors, when an OLE file is malformed 2971 #TODO: handle olefile errors, when an OLE file is malformed
2965 - print 'Type:', self.type 2972 + print('Type: %s' % self.type)
2966 if self.detect_vba_macros(): 2973 if self.detect_vba_macros():
2967 #print 'Contains VBA Macros:' 2974 #print 'Contains VBA Macros:'
2968 for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): 2975 for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
2969 if hide_attributes: 2976 if hide_attributes:
2970 # hide attribute lines: 2977 # hide attribute lines:
  2978 + if isinstance(vba_code,bytes):
  2979 + vba_code =vba_code.decode('utf-8','replace')
2971 vba_code_filtered = filter_vba(vba_code) 2980 vba_code_filtered = filter_vba(vba_code)
2972 else: 2981 else:
2973 vba_code_filtered = vba_code 2982 vba_code_filtered = vba_code
2974 - print '-' * 79  
2975 - print 'VBA MACRO %s ' % vba_filename  
2976 - print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) 2983 + print('-' * 79)
  2984 + print('VBA MACRO %s ' % vba_filename)
  2985 + print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)))
2977 if display_code: 2986 if display_code:
2978 - print '- ' * 39 2987 + print('- ' * 39)
2979 # detect empty macros: 2988 # detect empty macros:
2980 if vba_code_filtered.strip() == '': 2989 if vba_code_filtered.strip() == '':
2981 - print '(empty macro)' 2990 + print('(empty macro)')
2982 else: 2991 else:
2983 - print vba_code_filtered 2992 + print(vba_code_filtered)
2984 for (subfilename, stream_path, form_string) in self.extract_form_strings(): 2993 for (subfilename, stream_path, form_string) in self.extract_form_strings():
2985 - print '-' * 79  
2986 - print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)  
2987 - print '- ' * 39  
2988 - print form_string 2994 + print('-' * 79)
  2995 + print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
  2996 + print('- ' * 39)
  2997 + print(form_string.decode('utf-8', 'ignore'))
2989 if not vba_code_only: 2998 if not vba_code_only:
2990 # analyse the code from all modules at once: 2999 # analyse the code from all modules at once:
2991 self.print_analysis(show_decoded_strings, deobfuscate) 3000 self.print_analysis(show_decoded_strings, deobfuscate)
2992 if show_deobfuscated_code: 3001 if show_deobfuscated_code:
2993 - print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n'  
2994 - print self.reveal() 3002 + print('MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n')
  3003 + print(self.reveal())
2995 else: 3004 else:
2996 - print 'No VBA macros found.' 3005 + print('No VBA macros found.')
2997 except OlevbaBaseException: 3006 except OlevbaBaseException:
2998 raise 3007 raise
2999 except Exception as exc: 3008 except Exception as exc:
@@ -3001,7 +3010,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3001,7 +3010,7 @@ class VBA_Parser_CLI(VBA_Parser):
3001 log.info('Error processing file %s (%s)' % (self.filename, exc)) 3010 log.info('Error processing file %s (%s)' % (self.filename, exc))
3002 log.debug('Traceback:', exc_info=True) 3011 log.debug('Traceback:', exc_info=True)
3003 raise ProcessingError(self.filename, exc) 3012 raise ProcessingError(self.filename, exc)
3004 - print '' 3013 + print('')
3005 3014
3006 3015
3007 def process_file_json(self, show_decoded_strings=False, 3016 def process_file_json(self, show_decoded_strings=False,
@@ -3048,7 +3057,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3048,7 +3057,7 @@ class VBA_Parser_CLI(VBA_Parser):
3048 curr_macro = {} 3057 curr_macro = {}
3049 if hide_attributes: 3058 if hide_attributes:
3050 # hide attribute lines: 3059 # hide attribute lines:
3051 - vba_code_filtered = filter_vba(vba_code) 3060 + vba_code_filtered = filter_vba(vba_code.decode('utf-8','replace'))
3052 else: 3061 else:
3053 vba_code_filtered = vba_code 3062 vba_code_filtered = vba_code
3054 3063
@@ -3087,7 +3096,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3087,7 +3096,7 @@ class VBA_Parser_CLI(VBA_Parser):
3087 if self.detect_vba_macros(): 3096 if self.detect_vba_macros():
3088 # print a waiting message only if the output is not redirected to a file: 3097 # print a waiting message only if the output is not redirected to a file:
3089 if sys.stdout.isatty(): 3098 if sys.stdout.isatty():
3090 - print 'Analysis...\r', 3099 + print('Analysis...\r')
3091 sys.stdout.flush() 3100 sys.stdout.flush()
3092 self.analyze_macros(show_decoded_strings=show_decoded_strings, 3101 self.analyze_macros(show_decoded_strings=show_decoded_strings,
3093 deobfuscate=deobfuscate) 3102 deobfuscate=deobfuscate)
@@ -3105,7 +3114,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3105,7 +3114,7 @@ class VBA_Parser_CLI(VBA_Parser):
3105 base64obf, dridex, vba_obf) 3114 base64obf, dridex, vba_obf)
3106 3115
3107 line = '%-12s %s' % (flags, self.filename) 3116 line = '%-12s %s' % (flags, self.filename)
3108 - print line 3117 + print(line)
3109 3118
3110 # old table display: 3119 # old table display:
3111 # macros = autoexec = suspicious = iocs = hexstrings = 'no' 3120 # macros = autoexec = suspicious = iocs = hexstrings = 'no'
@@ -3198,7 +3207,7 @@ def main(): @@ -3198,7 +3207,7 @@ def main():
3198 3207
3199 # Print help if no arguments are passed 3208 # Print help if no arguments are passed
3200 if len(args) == 0: 3209 if len(args) == 0:
3201 - print __doc__ 3210 + print(__doc__)
3202 parser.print_help() 3211 parser.print_help()
3203 sys.exit(RETURN_WRONG_ARGS) 3212 sys.exit(RETURN_WRONG_ARGS)
3204 3213
@@ -3209,7 +3218,7 @@ def main(): @@ -3209,7 +3218,7 @@ def main():
3209 url='http://decalage.info/python/oletools', 3218 url='http://decalage.info/python/oletools',
3210 type='MetaInformation') 3219 type='MetaInformation')
3211 else: 3220 else:
3212 - print 'olevba %s - http://decalage.info/python/oletools' % __version__ 3221 + print('olevba %s - http://decalage.info/python/oletools' % __version__)
3213 3222
3214 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') 3223 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
3215 # enable logging in the modules: 3224 # enable logging in the modules:
@@ -3229,8 +3238,8 @@ def main(): @@ -3229,8 +3238,8 @@ def main():
3229 # Column headers (do not know how many files there will be yet, so if no output_mode 3238 # Column headers (do not know how many files there will be yet, so if no output_mode
3230 # was specified, we will print triage for first file --> need these headers) 3239 # was specified, we will print triage for first file --> need these headers)
3231 if options.output_mode in ('triage', 'unspecified'): 3240 if options.output_mode in ('triage', 'unspecified'):
3232 - print '%-12s %-65s' % ('Flags', 'Filename')  
3233 - print '%-12s %-65s' % ('-' * 11, '-' * 65) 3241 + print('%-12s %-65s' % ('Flags', 'Filename'))
  3242 + print('%-12s %-65s' % ('-' * 11, '-' * 65))
3234 3243
3235 previous_container = None 3244 previous_container = None
3236 count = 0 3245 count = 0
@@ -3248,14 +3257,14 @@ def main(): @@ -3248,14 +3257,14 @@ def main():
3248 if isinstance(data, Exception): 3257 if isinstance(data, Exception):
3249 if isinstance(data, PathNotFoundException): 3258 if isinstance(data, PathNotFoundException):
3250 if options.output_mode in ('triage', 'unspecified'): 3259 if options.output_mode in ('triage', 'unspecified'):
3251 - print '%-12s %s - File not found' % ('?', filename) 3260 + print('%-12s %s - File not found' % ('?', filename))
3252 elif options.output_mode != 'json': 3261 elif options.output_mode != 'json':
3253 log.error('Given path %r does not exist!' % filename) 3262 log.error('Given path %r does not exist!' % filename)
3254 return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ 3263 return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \
3255 else RETURN_SEVERAL_ERRS 3264 else RETURN_SEVERAL_ERRS
3256 else: 3265 else:
3257 if options.output_mode in ('triage', 'unspecified'): 3266 if options.output_mode in ('triage', 'unspecified'):
3258 - print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container) 3267 + print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container))
3259 elif options.output_mode != 'json': 3268 elif options.output_mode != 'json':
3260 log.error('Exception opening/reading %r from zip file %r: %s' 3269 log.error('Exception opening/reading %r from zip file %r: %s'
3261 % (filename, container, data)) 3270 % (filename, container, data))
@@ -3282,7 +3291,7 @@ def main(): @@ -3282,7 +3291,7 @@ def main():
3282 # print container name when it changes: 3291 # print container name when it changes:
3283 if container != previous_container: 3292 if container != previous_container:
3284 if container is not None: 3293 if container is not None:
3285 - print '\nFiles in %s:' % container 3294 + print('\nFiles in %s:' % container)
3286 previous_container = container 3295 previous_container = container
3287 # summarized output for triage: 3296 # summarized output for triage:
3288 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, 3297 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
@@ -3300,8 +3309,8 @@ def main(): @@ -3300,8 +3309,8 @@ def main():
3300 3309
3301 except (SubstreamOpenError, UnexpectedDataError) as exc: 3310 except (SubstreamOpenError, UnexpectedDataError) as exc:
3302 if options.output_mode in ('triage', 'unspecified'): 3311 if options.output_mode in ('triage', 'unspecified'):
3303 - print '%-12s %s - Error opening substream or uenxpected ' \  
3304 - 'content' % ('?', filename) 3312 + print('%-12s %s - Error opening substream or uenxpected ' \
  3313 + 'content' % ('?', filename))
3305 elif options.output_mode == 'json': 3314 elif options.output_mode == 'json':
3306 print_json(file=filename, type='error', 3315 print_json(file=filename, type='error',
3307 error=type(exc).__name__, message=str(exc)) 3316 error=type(exc).__name__, message=str(exc))
@@ -3312,7 +3321,7 @@ def main(): @@ -3312,7 +3321,7 @@ def main():
3312 else RETURN_SEVERAL_ERRS 3321 else RETURN_SEVERAL_ERRS
3313 except FileOpenError as exc: 3322 except FileOpenError as exc:
3314 if options.output_mode in ('triage', 'unspecified'): 3323 if options.output_mode in ('triage', 'unspecified'):
3315 - print '%-12s %s - File format not supported' % ('?', filename) 3324 + print('%-12s %s - File format not supported' % ('?', filename))
3316 elif options.output_mode == 'json': 3325 elif options.output_mode == 'json':
3317 print_json(file=filename, type='error', 3326 print_json(file=filename, type='error',
3318 error=type(exc).__name__, message=str(exc)) 3327 error=type(exc).__name__, message=str(exc))
@@ -3322,7 +3331,7 @@ def main(): @@ -3322,7 +3331,7 @@ def main():
3322 else RETURN_SEVERAL_ERRS 3331 else RETURN_SEVERAL_ERRS
3323 except ProcessingError as exc: 3332 except ProcessingError as exc:
3324 if options.output_mode in ('triage', 'unspecified'): 3333 if options.output_mode in ('triage', 'unspecified'):
3325 - print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc) 3334 + print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))
3326 elif options.output_mode == 'json': 3335 elif options.output_mode == 'json':
3327 print_json(file=filename, type='error', 3336 print_json(file=filename, type='error',
3328 error=type(exc).__name__, 3337 error=type(exc).__name__,
@@ -3337,9 +3346,9 @@ def main(): @@ -3337,9 +3346,9 @@ def main():
3337 vba_parser.close() 3346 vba_parser.close()
3338 3347
3339 if options.output_mode == 'triage': 3348 if options.output_mode == 'triage':
3340 - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ 3349 + print('\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
3341 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ 3350 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
3342 - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' 3351 + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n')
3343 3352
3344 if count == 1 and options.output_mode == 'unspecified': 3353 if count == 1 and options.output_mode == 'unspecified':
3345 # if options -t, -d and -j were not specified and it's a single file, print details: 3354 # if options -t, -d and -j were not specified and it's a single file, print details:
oletools/ppt_parser.py
@@ -1570,4 +1570,4 @@ def iterative_decompress(stream, size, chunk_size=4096): @@ -1570,4 +1570,4 @@ def iterative_decompress(stream, size, chunk_size=4096):
1570 1570
1571 1571
1572 if __name__ == '__main__': 1572 if __name__ == '__main__':
1573 - print 'nothing here to run!' 1573 + print('nothing here to run!')
oletools/thirdparty/olefile/olefile.py
@@ -1030,10 +1030,11 @@ class OleDirectoryEntry: @@ -1030,10 +1030,11 @@ class OleDirectoryEntry:
1030 #[PL] this method was added to use simple recursion instead of a complex 1030 #[PL] this method was added to use simple recursion instead of a complex
1031 # algorithm. 1031 # algorithm.
1032 # if this is not a storage or a leaf of the tree, nothing to do: 1032 # if this is not a storage or a leaf of the tree, nothing to do:
  1033 +
1033 if child_sid == NOSTREAM: 1034 if child_sid == NOSTREAM:
1034 return 1035 return
1035 # check if child SID is in the proper range: 1036 # check if child SID is in the proper range:
1036 - if child_sid<0 or child_sid>=len(self.olefile.direntries): 1037 + if child_sid <= 0 or child_sid >= len(self.olefile.direntries):
1037 self.olefile._raise_defect(DEFECT_INCORRECT, 'OLE DirEntry index out of range') 1038 self.olefile._raise_defect(DEFECT_INCORRECT, 'OLE DirEntry index out of range')
1038 else: 1039 else:
1039 # get child direntry: 1040 # get child direntry:
oletools/thirdparty/olefile/olefile2.py
@@ -1004,7 +1004,7 @@ class OleFileIO: @@ -1004,7 +1004,7 @@ class OleFileIO:
1004 TIFF files). 1004 TIFF files).
1005 """ 1005 """
1006 1006
1007 - def __init__(self, filename = None, raise_defects=DEFECT_FATAL): 1007 + def __init__(self, filename = None, raise_defects=DEFECT_FATAL):
1008 """ 1008 """
1009 Constructor for OleFileIO class. 1009 Constructor for OleFileIO class.
1010 1010
oletools/thirdparty/tablestream/tablestream.py
@@ -236,7 +236,7 @@ class TableStream(object): @@ -236,7 +236,7 @@ class TableStream(object):
236 assert len(row) == self.num_columns 236 assert len(row) == self.num_columns
237 columns = [] 237 columns = []
238 max_lines = 0 238 max_lines = 0
239 - for i in xrange(self.num_columns): 239 + for i in range(self.num_columns):
240 cell = row[i] 240 cell = row[i]
241 # Convert to string: 241 # Convert to string:
242 # TODO: handle unicode properly 242 # TODO: handle unicode properly
@@ -245,7 +245,7 @@ class TableStream(object): @@ -245,7 +245,7 @@ class TableStream(object):
245 # encode to UTF8, avoiding errors 245 # encode to UTF8, avoiding errors
246 cell = cell.decode('utf-8', errors='replace') 246 cell = cell.decode('utf-8', errors='replace')
247 else: 247 else:
248 - cell = unicode(cell) 248 + cell = cell
249 # Wrap cell text according to the column width 249 # Wrap cell text according to the column width
250 # TODO: use a TextWrapper object for each column instead 250 # TODO: use a TextWrapper object for each column instead
251 # split the string if it contains newline characters, otherwise 251 # split the string if it contains newline characters, otherwise
@@ -257,16 +257,16 @@ class TableStream(object): @@ -257,16 +257,16 @@ class TableStream(object):
257 if colors is not None and self.outfile.isatty(): 257 if colors is not None and self.outfile.isatty():
258 color = colors[i] 258 color = colors[i]
259 if color: 259 if color:
260 - for j in xrange(len(column)): 260 + for j in range(len(column)):
261 # print '%r: %s' % (column[j], type(column[j])) 261 # print '%r: %s' % (column[j], type(column[j]))
262 column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color)) 262 column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color))
263 columns.append(column) 263 columns.append(column)
264 # determine which column has the highest number of lines 264 # determine which column has the highest number of lines
265 max_lines = max(len(columns[i]), max_lines) 265 max_lines = max(len(columns[i]), max_lines)
266 # transpose: write output line by line 266 # transpose: write output line by line
267 - for j in xrange(max_lines): 267 + for j in range(max_lines):
268 self.write(self.style.vertical_left) 268 self.write(self.style.vertical_left)
269 - for i in xrange(self.num_columns): 269 + for i in range(self.num_columns):
270 column = columns[i] 270 column = columns[i]
271 if j<len(column): 271 if j<len(column):
272 # text to be written 272 # text to be written