Commit 1541d5dedc31c06d3adbf382b51c5e4794fb711e

Authored by Philippe Lagadec
Committed by GitHub
2 parents e73e4dc3 4984d587

Merge pull request #64 from sebdraven/master

Conversion of all oletools to Python 3.5 (temporarily breaking compatibility with Python 2.7)
oletools/mraptor.py
... ... @@ -233,16 +233,16 @@ def main():
233 233  
234 234 # Print help if no arguments are passed
235 235 if len(args) == 0:
236   - print __doc__
  236 + print(__doc__)
237 237 parser.print_help()
238   - print '\nAn exit code is returned based on the analysis result:'
  238 + print('\nAn exit code is returned based on the analysis result:')
239 239 for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious):
240   - print ' - %d: %s' % (result.exit_code, result.name)
  240 + print(' - %d: %s' % (result.exit_code, result.name))
241 241 sys.exit()
242 242  
243 243 # print banner with version
244   - print 'MacroRaptor %s - http://decalage.info/python/oletools' % __version__
245   - print 'This is work in progress, please report issues at %s' % URL_ISSUES
  244 + print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__)
  245 + print('This is work in progress, please report issues at %s' % URL_ISSUES)
246 246  
247 247 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
248 248 # enable logging in the modules:
... ... @@ -292,7 +292,7 @@ def main():
292 292 vba_code_all_modules = ''
293 293 try:
294 294 for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros():
295   - vba_code_all_modules += vba_code + '\n'
  295 + vba_code_all_modules += vba_code.decode('utf-8','replace') + '\n'
296 296 except Exception as e:
297 297 # log.error('Error when parsing VBA macros from file %r' % full_name)
298 298 result = Result_Error
... ... @@ -319,9 +319,9 @@ def main():
319 319 global_result = result
320 320 exitcode = result.exit_code
321 321  
322   - print ''
323   - print 'Flags: A=AutoExec, W=Write, X=Execute'
324   - print 'Exit code: %d - %s' % (exitcode, global_result.name)
  322 + print('')
  323 + print('Flags: A=AutoExec, W=Write, X=Execute')
  324 + print('Exit code: %d - %s' % (exitcode, global_result.name))
325 325 sys.exit(exitcode)
326 326  
327 327 if __name__ == '__main__':
... ...
oletools/olemap.py
... ... @@ -90,14 +90,14 @@ FAT_COLORS = {
90 90  
91 91 if __name__ == '__main__':
92 92 # print banner with version
93   - print 'olemap %s - http://decalage.info/python/oletools' % __version__
  93 + print('olemap %s - http://decalage.info/python/oletools' % __version__)
94 94  
95 95 fname = sys.argv[1]
96 96 ole = olefile.OleFileIO(fname)
97 97  
98   - print 'FAT:'
  98 + print('FAT:')
99 99 t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #'])
100   - for i in xrange(ole.nb_sect):
  100 + for i in range(ole.nb_sect):
101 101 fat_value = ole.fat[i]
102 102 fat_type = FAT_TYPES.get(fat_value, '<Data>')
103 103 color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default'])
... ... @@ -106,15 +106,15 @@ if __name__ == &#39;__main__&#39;:
106 106 # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)
107 107 t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value],
108 108 colors=[None, color_type, None, None])
109   - print ''
  109 + print('')
110 110  
111   - print 'MiniFAT:'
  111 + print('MiniFAT:')
112 112 # load MiniFAT if it wasn't already done:
113 113 ole.loadminifat()
114   - for i in xrange(len(ole.minifat)):
  114 + for i in range(len(ole.minifat)):
115 115 fat_value = ole.minifat[i]
116 116 fat_type = FAT_TYPES.get(fat_value, 'Data')
117   - print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)
  117 + print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value))
118 118  
119 119 ole.close()
120 120  
... ...
oletools/oletimes.py
... ... @@ -94,6 +94,6 @@ for obj in ole.listdir(streams=True, storages=True):
94 94 #print '- %s: mtime=%s ctime=%s' % (repr('/'.join(obj)), ole.getmtime(obj), ole.getctime(obj))
95 95 t.add_row((repr('/'.join(obj)), dt2str(ole.getmtime(obj)), dt2str(ole.getctime(obj))))
96 96  
97   -print t
  97 +print(t)
98 98  
99 99 ole.close()
... ...
oletools/olevba.py
... ... @@ -215,7 +215,7 @@ __version__ = &#39;0.50&#39;
215 215  
216 216 import sys, logging
217 217 import struct
218   -import cStringIO
  218 +from _io import StringIO,BytesIO
219 219 import math
220 220 import zipfile
221 221 import re
... ... @@ -240,9 +240,9 @@ except ImportError:
240 240 # Python <2.5: standalone ElementTree install
241 241 import elementtree.cElementTree as ET
242 242 except ImportError:
243   - raise ImportError, "lxml or ElementTree are not installed, " \
  243 + raise(ImportError, "lxml or ElementTree are not installed, " \
244 244 + "see http://codespeak.net/lxml " \
245   - + "or http://effbot.org/zone/element-index.htm"
  245 + + "or http://effbot.org/zone/element-index.htm")
246 246  
247 247 import thirdparty.olefile as olefile
248 248 from thirdparty.prettytable import prettytable
... ... @@ -421,7 +421,7 @@ TYPE2TAG = {
421 421  
422 422  
423 423 # MSO files ActiveMime header magic
424   -MSO_ACTIVEMIME_HEADER = 'ActiveMime'
  424 +MSO_ACTIVEMIME_HEADER = b'ActiveMime'
425 425  
426 426 MODULE_EXTENSION = "bas"
427 427 CLASS_EXTENSION = "cls"
... ... @@ -630,7 +630,7 @@ re_dridex_string = re.compile(r&#39;&quot;[0-9A-Za-z]{20,}&quot;&#39;)
630 630 re_nothex_check = re.compile(r'[G-Zg-z]')
631 631  
632 632 # regex to extract printable strings (at least 5 chars) from VBA Forms:
633   -re_printable_string = re.compile(r'[\t\r\n\x20-\xFF]{5,}')
  633 +re_printable_string = re.compile(rb'[\t\r\n\x20-\xFF]{5,}')
634 634  
635 635  
636 636 # === PARTIAL VBA GRAMMAR ====================================================
... ... @@ -1060,10 +1060,10 @@ def decompress_stream(compressed_container):
1060 1060 # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the
1061 1061 # DecompressedBuffer (section 2.4.1.1.2).
1062 1062  
1063   - decompressed_container = '' # result
  1063 + decompressed_container = b'' # result
1064 1064 compressed_current = 0
1065 1065  
1066   - sig_byte = ord(compressed_container[compressed_current])
  1066 + sig_byte = compressed_container[compressed_current]
1067 1067 if sig_byte != 0x01:
1068 1068 raise ValueError('invalid signature byte {0:02X}'.format(sig_byte))
1069 1069  
... ... @@ -1109,7 +1109,7 @@ def decompress_stream(compressed_container):
1109 1109 # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk
1110 1110 # uncompressed chunk: read the next 4096 bytes as-is
1111 1111 #TODO: check if there are at least 4096 bytes left
1112   - decompressed_container += compressed_container[compressed_current:compressed_current + 4096]
  1112 + decompressed_container += bytes([compressed_container[compressed_current:compressed_current + 4096]])
1113 1113 compressed_current += 4096
1114 1114 else:
1115 1115 # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk
... ... @@ -1120,9 +1120,9 @@ def decompress_stream(compressed_container):
1120 1120 # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end))
1121 1121 # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or
1122 1122 # copy tokens (reference to a previous literal token)
1123   - flag_byte = ord(compressed_container[compressed_current])
  1123 + flag_byte = compressed_container[compressed_current]
1124 1124 compressed_current += 1
1125   - for bit_index in xrange(0, 8):
  1125 + for bit_index in range(0, 8):
1126 1126 # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end))
1127 1127 if compressed_current >= compressed_end:
1128 1128 break
... ... @@ -1132,7 +1132,7 @@ def decompress_stream(compressed_container):
1132 1132 #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit))
1133 1133 if flag_bit == 0: # LiteralToken
1134 1134 # copy one byte directly to output
1135   - decompressed_container += compressed_container[compressed_current]
  1135 + decompressed_container += bytes([compressed_container[compressed_current]])
1136 1136 compressed_current += 1
1137 1137 else: # CopyToken
1138 1138 # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken
... ... @@ -1147,8 +1147,8 @@ def decompress_stream(compressed_container):
1147 1147 offset = (temp1 >> temp2) + 1
1148 1148 #log.debug('offset=%d length=%d' % (offset, length))
1149 1149 copy_source = len(decompressed_container) - offset
1150   - for index in xrange(copy_source, copy_source + length):
1151   - decompressed_container += decompressed_container[index]
  1150 + for index in range(copy_source, copy_source + length):
  1151 + decompressed_container += bytes([decompressed_container[index]])
1152 1152 compressed_current += 2
1153 1153 return decompressed_container
1154 1154  
... ... @@ -1191,7 +1191,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1191 1191 code_modules = {}
1192 1192  
1193 1193 for line in project:
1194   - line = line.strip()
  1194 + line = line.strip().decode('utf-8','ignore')
1195 1195 if '=' in line:
1196 1196 # split line at the 1st equal sign:
1197 1197 name, value = line.split('=', 1)
... ... @@ -1222,7 +1222,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1222 1222 else:
1223 1223 raise UnexpectedDataError(dir_path, name, expected, value)
1224 1224  
1225   - dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed))
  1225 + dir_stream = BytesIO(decompress_stream(dir_compressed))
1226 1226  
1227 1227 # PROJECTSYSKIND Record
1228 1228 projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0]
... ... @@ -1484,7 +1484,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1484 1484 uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace')
1485 1485  
1486 1486 log.debug("parsing {0} modules".format(projectmodules_count))
1487   - for projectmodule_index in xrange(0, projectmodules_count):
  1487 + for projectmodule_index in range(0, projectmodules_count):
1488 1488 try:
1489 1489 modulename_id = struct.unpack("<H", dir_stream.read(2))[0]
1490 1490 check_value('MODULENAME_Id', 0x0019, modulename_id)
... ... @@ -1881,19 +1881,19 @@ def json2ascii(json_obj, encoding=&#39;utf8&#39;, errors=&#39;replace&#39;):
1881 1881 pass
1882 1882 elif isinstance(json_obj, str):
1883 1883 # de-code and re-encode
1884   - dencoded = json_obj.decode(encoding, errors).encode(encoding, errors)
  1884 + dencoded = json_obj
1885 1885 if dencoded != json_obj:
1886 1886 log.debug('json2ascii: replaced: {0} (len {1})'
1887 1887 .format(json_obj, len(json_obj)))
1888 1888 log.debug('json2ascii: with: {0} (len {1})'
1889 1889 .format(dencoded, len(dencoded)))
1890 1890 return dencoded
1891   - elif isinstance(json_obj, unicode):
  1891 + elif isinstance(json_obj, bytes):
1892 1892 log.debug('json2ascii: encode unicode: {0}'
1893   - .format(json_obj.encode(encoding, errors)))
  1893 + .format(json_obj.decode(encoding, errors)))
1894 1894 # cannot put original into logger
1895 1895 # print 'original: ' json_obj
1896   - return json_obj.encode(encoding, errors)
  1896 + return json_obj.decode(encoding, errors)
1897 1897 elif isinstance(json_obj, dict):
1898 1898 for key in json_obj:
1899 1899 json_obj[key] = json2ascii(json_obj[key])
... ... @@ -1931,18 +1931,18 @@ def print_json(json_dict=None, _json_is_last=False, **json_parts):
1931 1931 json_dict = json_parts
1932 1932  
1933 1933 if not _have_printed_json_start:
1934   - print '['
  1934 + print('[')
1935 1935 _have_printed_json_start = True
1936 1936  
1937 1937 lines = json.dumps(json2ascii(json_dict), check_circular=False,
1938 1938 indent=4, ensure_ascii=False).splitlines()
1939 1939 for line in lines[:-1]:
1940   - print ' {0}'.format(line)
  1940 + print(' {0}'.format(line))
1941 1941 if _json_is_last:
1942   - print ' {0}'.format(lines[-1]) # print last line without comma
1943   - print ']'
  1942 + print(' {0}'.format(lines[-1])) # print last line without comma
  1943 + print(']')
1944 1944 else:
1945   - print ' {0},'.format(lines[-1]) # print last line with comma
  1945 + print(' {0},'.format(lines[-1])) # print last line with comma
1946 1946  
1947 1947  
1948 1948 class VBA_Scanner(object):
... ... @@ -1959,10 +1959,10 @@ class VBA_Scanner(object):
1959 1959 """
1960 1960 # join long lines ending with " _":
1961 1961 self.code = vba_collapse_long_lines(vba_code)
1962   - self.code_hex = ''
1963   - self.code_hex_rev = ''
1964   - self.code_rev_hex = ''
1965   - self.code_base64 = ''
  1962 + self.code_hex = b''
  1963 + self.code_hex_rev = b''
  1964 + self.code_rev_hex = b''
  1965 + self.code_base64 = b''
1966 1966 self.code_dridex = ''
1967 1967 self.code_vba = ''
1968 1968 self.strReverse = None
... ... @@ -1995,19 +1995,19 @@ class VBA_Scanner(object):
1995 1995 if 'strreverse' in self.code.lower(): self.strReverse = True
1996 1996 # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
1997 1997 for encoded, decoded in self.hex_strings:
1998   - self.code_hex += '\n' + decoded
  1998 + self.code_hex += b'\n' + decoded
1999 1999 # if the code contains "StrReverse", also append the hex strings in reverse order:
2000 2000 if self.strReverse:
2001 2001 # StrReverse after hex decoding:
2002   - self.code_hex_rev += '\n' + decoded[::-1]
  2002 + self.code_hex_rev += b'\n' + decoded[::-1]
2003 2003 # StrReverse before hex decoding:
2004   - self.code_rev_hex += '\n' + binascii.unhexlify(encoded[::-1])
  2004 + self.code_rev_hex += b'\n' + binascii.unhexlify(encoded[::-1])
2005 2005 #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
2006 2006 #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
2007 2007 # Detect Base64-encoded strings
2008 2008 self.base64_strings = detect_base64_strings(self.code)
2009 2009 for encoded, decoded in self.base64_strings:
2010   - self.code_base64 += '\n' + decoded
  2010 + self.code_base64 += b'\n' + decoded
2011 2011 # Detect Dridex-encoded strings
2012 2012 self.dridex_strings = detect_dridex_strings(self.code)
2013 2013 for encoded, decoded in self.dridex_strings:
... ... @@ -2026,13 +2026,15 @@ class VBA_Scanner(object):
2026 2026  
2027 2027 for code, obfuscation in (
2028 2028 (self.code, None),
2029   - (self.code_hex, 'Hex'),
  2029 + (self.code_hex.decode('utf-8','replace'), 'Hex'),
2030 2030 (self.code_hex_rev, 'Hex+StrReverse'),
2031 2031 (self.code_rev_hex, 'StrReverse+Hex'),
2032   - (self.code_base64, 'Base64'),
  2032 + (self.code_base64.decode('utf-8', 'replace'), 'Base64'),
2033 2033 (self.code_dridex, 'Dridex'),
2034 2034 (self.code_vba, 'VBA expression'),
2035 2035 ):
  2036 + if isinstance(code,bytes):
  2037 + code=code.decode('utf-8','replace')
2036 2038 self.autoexec_keywords += detect_autoexec(code, obfuscation)
2037 2039 self.suspicious_keywords += detect_suspicious(code, obfuscation)
2038 2040 self.iocs += detect_patterns(code, obfuscation)
... ... @@ -2158,7 +2160,7 @@ class VBA_Parser(object):
2158 2160 _file = filename
2159 2161 else:
2160 2162 # file already read in memory, make it a file-like object for zipfile:
2161   - _file = cStringIO.StringIO(data)
  2163 + _file = BytesIO(data)
2162 2164 #self.file = _file
2163 2165 self.ole_file = None
2164 2166 self.ole_subfiles = []
... ... @@ -2207,7 +2209,7 @@ class VBA_Parser(object):
2207 2209 if data is None:
2208 2210 data = open(filename, 'rb').read()
2209 2211 # check if it is a Word 2003 XML file (WordProcessingML): must contain the namespace
2210   - if 'http://schemas.microsoft.com/office/word/2003/wordml' in data:
  2212 + if b'http://schemas.microsoft.com/office/word/2003/wordml' in data:
2211 2213 self.open_word2003xml(data)
2212 2214 # store a lowercase version for the next tests:
2213 2215 data_lowercase = data.lower()
... ... @@ -2217,14 +2219,14 @@ class VBA_Parser(object):
2217 2219 # and even whitespaces in between "MIME", "-", "Version" and ":". The version number is ignored.
2218 2220 # And the line is case insensitive.
2219 2221 # so we'll just check the presence of mime, version and multipart anywhere:
2220   - if self.type is None and 'mime' in data_lowercase and 'version' in data_lowercase \
2221   - and 'multipart' in data_lowercase:
  2222 + if self.type is None and b'mime' in data_lowercase and b'version' in data_lowercase \
  2223 + and b'multipart' in data_lowercase:
2222 2224 self.open_mht(data)
2223 2225 #TODO: handle exceptions
2224 2226 #TODO: Excel 2003 XML
2225 2227 # Check if this is a plain text VBA or VBScript file:
2226 2228 # To avoid scanning binary files, we simply check for some control chars:
2227   - if self.type is None and '\x00' not in data:
  2229 + if self.type is None and b'\x00' not in data:
2228 2230 self.open_text(data)
2229 2231 if self.type is None:
2230 2232 # At this stage, could not match a known format:
... ... @@ -2358,6 +2360,8 @@ class VBA_Parser(object):
2358 2360 """
2359 2361 log.info('Opening MHTML file %s' % self.filename)
2360 2362 try:
  2363 + if isinstance(data,bytes):
  2364 + data = data.decode('utf8', 'replace')
2361 2365 # parse the MIME content
2362 2366 # remove any leading whitespace or newline (workaround for issue in email package)
2363 2367 stripped_data = data.lstrip('\r\n\t ')
... ... @@ -2387,7 +2391,8 @@ class VBA_Parser(object):
2387 2391 # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
2388 2392 # decompress the zlib data starting at offset 0x32, which is the OLE container:
2389 2393 # check ActiveMime header:
2390   - if isinstance(part_data, str) and is_mso_file(part_data):
  2394 +
  2395 + if (isinstance(part_data, str) or isinstance(part_data, bytes)) and is_mso_file(part_data):
2391 2396 log.debug('Found ActiveMime header, decompressing MSO container')
2392 2397 try:
2393 2398 ole_data = mso_file_extract(part_data)
... ... @@ -2458,6 +2463,8 @@ class VBA_Parser(object):
2458 2463 """
2459 2464 log.info('Opening text file %s' % self.filename)
2460 2465 # directly store the source code:
  2466 + if isinstance(data,bytes):
  2467 + data=data.decode('utf8','replace')
2461 2468 self.vba_code_all_modules = data
2462 2469 self.contains_macros = True
2463 2470 # set type only if parsing succeeds
... ... @@ -2596,7 +2603,7 @@ class VBA_Parser(object):
2596 2603 # Also look for VBA code in any stream including orphans
2597 2604 # (happens in some malformed files)
2598 2605 ole = self.ole_file
2599   - for sid in xrange(len(ole.direntries)):
  2606 + for sid in range(len(ole.direntries)):
2600 2607 # check if id is already done above:
2601 2608 log.debug('Checking DirEntry #%d' % sid)
2602 2609 d = ole.direntries[sid]
... ... @@ -2614,7 +2621,7 @@ class VBA_Parser(object):
2614 2621 log.debug('%r...[much more data]...%r' % (data[:100], data[-50:]))
2615 2622 else:
2616 2623 log.debug(repr(data))
2617   - if 'Attribut' in data:
  2624 + if 'Attribut' in data.decode('utf-8','ignore'):
2618 2625 log.debug('Found VBA compressed code')
2619 2626 self.contains_macros = True
2620 2627 except IOError as exc:
... ... @@ -2662,7 +2669,7 @@ class VBA_Parser(object):
2662 2669 # Also look for VBA code in any stream including orphans
2663 2670 # (happens in some malformed files)
2664 2671 ole = self.ole_file
2665   - for sid in xrange(len(ole.direntries)):
  2672 + for sid in range(len(ole.direntries)):
2666 2673 # check if id is already done above:
2667 2674 log.debug('Checking DirEntry #%d' % sid)
2668 2675 if sid in vba_stream_ids:
... ... @@ -2677,7 +2684,7 @@ class VBA_Parser(object):
2677 2684 # read data
2678 2685 log.debug('Reading data from stream %r' % d.name)
2679 2686 data = ole._open(d.isectStart, d.size).read()
2680   - for match in re.finditer(r'\x00Attribut[^e]', data, flags=re.IGNORECASE):
  2687 + for match in re.finditer(rb'\x00Attribut[^e]', data, flags=re.IGNORECASE):
2681 2688 start = match.start() - 3
2682 2689 log.debug('Found VBA compressed code at index %X' % start)
2683 2690 compressed_code = data[start:]
... ... @@ -2720,9 +2727,9 @@ class VBA_Parser(object):
2720 2727 self.vba_code_all_modules = ''
2721 2728 for (_, _, _, vba_code) in self.extract_all_macros():
2722 2729 #TODO: filter code? (each module)
2723   - self.vba_code_all_modules += vba_code + '\n'
  2730 + self.vba_code_all_modules += vba_code.decode('utf-8', 'ignore') + '\n'
2724 2731 for (_, _, form_string) in self.extract_form_strings():
2725   - self.vba_code_all_modules += form_string + '\n'
  2732 + self.vba_code_all_modules += form_string.decode('utf-8', 'ignore') + '\n'
2726 2733 # Analyze the whole code at once:
2727 2734 scanner = VBA_Scanner(self.vba_code_all_modules)
2728 2735 self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate)
... ... @@ -2897,7 +2904,7 @@ class VBA_Parser_CLI(VBA_Parser):
2897 2904 """
2898 2905 # print a waiting message only if the output is not redirected to a file:
2899 2906 if sys.stdout.isatty():
2900   - print 'Analysis...\r',
  2907 + print('Analysis...\r')
2901 2908 sys.stdout.flush()
2902 2909 results = self.analyze_macros(show_decoded_strings, deobfuscate)
2903 2910 if results:
... ... @@ -2913,9 +2920,9 @@ class VBA_Parser_CLI(VBA_Parser):
2913 2920 if not is_printable(description):
2914 2921 description = repr(description)
2915 2922 t.add_row((kw_type, keyword, description))
2916   - print t
  2923 + print(t)
2917 2924 else:
2918   - print 'No suspicious keyword or IOC found.'
  2925 + print('No suspicious keyword or IOC found.')
2919 2926  
2920 2927 def print_analysis_json(self, show_decoded_strings=False, deobfuscate=False):
2921 2928 """
... ... @@ -2929,7 +2936,7 @@ class VBA_Parser_CLI(VBA_Parser):
2929 2936 """
2930 2937 # print a waiting message only if the output is not redirected to a file:
2931 2938 if sys.stdout.isatty():
2932   - print 'Analysis...\r',
  2939 + print('Analysis...\r')
2933 2940 sys.stdout.flush()
2934 2941 return [dict(type=kw_type, keyword=keyword, description=description)
2935 2942 for kw_type, keyword, description in self.analyze_macros(show_decoded_strings, deobfuscate)]
... ... @@ -2958,42 +2965,44 @@ class VBA_Parser_CLI(VBA_Parser):
2958 2965 display_filename = '%s in %s' % (self.filename, self.container)
2959 2966 else:
2960 2967 display_filename = self.filename
2961   - print '=' * 79
2962   - print 'FILE:', display_filename
  2968 + print('=' * 79)
  2969 + print('FILE:', display_filename)
2963 2970 try:
2964 2971 #TODO: handle olefile errors, when an OLE file is malformed
2965   - print 'Type:', self.type
  2972 + print('Type: %s' % self.type)
2966 2973 if self.detect_vba_macros():
2967 2974 #print 'Contains VBA Macros:'
2968 2975 for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
2969 2976 if hide_attributes:
2970 2977 # hide attribute lines:
  2978 + if isinstance(vba_code,bytes):
  2979 + vba_code =vba_code.decode('utf-8','replace')
2971 2980 vba_code_filtered = filter_vba(vba_code)
2972 2981 else:
2973 2982 vba_code_filtered = vba_code
2974   - print '-' * 79
2975   - print 'VBA MACRO %s ' % vba_filename
2976   - print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
  2983 + print('-' * 79)
  2984 + print('VBA MACRO %s ' % vba_filename)
  2985 + print('in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)))
2977 2986 if display_code:
2978   - print '- ' * 39
  2987 + print('- ' * 39)
2979 2988 # detect empty macros:
2980 2989 if vba_code_filtered.strip() == '':
2981   - print '(empty macro)'
  2990 + print('(empty macro)')
2982 2991 else:
2983   - print vba_code_filtered
  2992 + print(vba_code_filtered)
2984 2993 for (subfilename, stream_path, form_string) in self.extract_form_strings():
2985   - print '-' * 79
2986   - print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)
2987   - print '- ' * 39
2988   - print form_string
  2994 + print('-' * 79)
  2995 + print('VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path))
  2996 + print('- ' * 39)
  2997 + print(form_string.decode('utf-8', 'ignore'))
2989 2998 if not vba_code_only:
2990 2999 # analyse the code from all modules at once:
2991 3000 self.print_analysis(show_decoded_strings, deobfuscate)
2992 3001 if show_deobfuscated_code:
2993   - print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n'
2994   - print self.reveal()
  3002 + print('MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n')
  3003 + print(self.reveal())
2995 3004 else:
2996   - print 'No VBA macros found.'
  3005 + print('No VBA macros found.')
2997 3006 except OlevbaBaseException:
2998 3007 raise
2999 3008 except Exception as exc:
... ... @@ -3001,7 +3010,7 @@ class VBA_Parser_CLI(VBA_Parser):
3001 3010 log.info('Error processing file %s (%s)' % (self.filename, exc))
3002 3011 log.debug('Traceback:', exc_info=True)
3003 3012 raise ProcessingError(self.filename, exc)
3004   - print ''
  3013 + print('')
3005 3014  
3006 3015  
3007 3016 def process_file_json(self, show_decoded_strings=False,
... ... @@ -3048,7 +3057,7 @@ class VBA_Parser_CLI(VBA_Parser):
3048 3057 curr_macro = {}
3049 3058 if hide_attributes:
3050 3059 # hide attribute lines:
3051   - vba_code_filtered = filter_vba(vba_code)
  3060 + vba_code_filtered = filter_vba(vba_code.decode('utf-8','replace'))
3052 3061 else:
3053 3062 vba_code_filtered = vba_code
3054 3063  
... ... @@ -3087,7 +3096,7 @@ class VBA_Parser_CLI(VBA_Parser):
3087 3096 if self.detect_vba_macros():
3088 3097 # print a waiting message only if the output is not redirected to a file:
3089 3098 if sys.stdout.isatty():
3090   - print 'Analysis...\r',
  3099 + print('Analysis...\r')
3091 3100 sys.stdout.flush()
3092 3101 self.analyze_macros(show_decoded_strings=show_decoded_strings,
3093 3102 deobfuscate=deobfuscate)
... ... @@ -3105,7 +3114,7 @@ class VBA_Parser_CLI(VBA_Parser):
3105 3114 base64obf, dridex, vba_obf)
3106 3115  
3107 3116 line = '%-12s %s' % (flags, self.filename)
3108   - print line
  3117 + print(line)
3109 3118  
3110 3119 # old table display:
3111 3120 # macros = autoexec = suspicious = iocs = hexstrings = 'no'
... ... @@ -3198,7 +3207,7 @@ def main():
3198 3207  
3199 3208 # Print help if no arguments are passed
3200 3209 if len(args) == 0:
3201   - print __doc__
  3210 + print(__doc__)
3202 3211 parser.print_help()
3203 3212 sys.exit(RETURN_WRONG_ARGS)
3204 3213  
... ... @@ -3209,7 +3218,7 @@ def main():
3209 3218 url='http://decalage.info/python/oletools',
3210 3219 type='MetaInformation')
3211 3220 else:
3212   - print 'olevba %s - http://decalage.info/python/oletools' % __version__
  3221 + print('olevba %s - http://decalage.info/python/oletools' % __version__)
3213 3222  
3214 3223 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
3215 3224 # enable logging in the modules:
... ... @@ -3229,8 +3238,8 @@ def main():
3229 3238 # Column headers (do not know how many files there will be yet, so if no output_mode
3230 3239 # was specified, we will print triage for first file --> need these headers)
3231 3240 if options.output_mode in ('triage', 'unspecified'):
3232   - print '%-12s %-65s' % ('Flags', 'Filename')
3233   - print '%-12s %-65s' % ('-' * 11, '-' * 65)
  3241 + print('%-12s %-65s' % ('Flags', 'Filename'))
  3242 + print('%-12s %-65s' % ('-' * 11, '-' * 65))
3234 3243  
3235 3244 previous_container = None
3236 3245 count = 0
... ... @@ -3248,14 +3257,14 @@ def main():
3248 3257 if isinstance(data, Exception):
3249 3258 if isinstance(data, PathNotFoundException):
3250 3259 if options.output_mode in ('triage', 'unspecified'):
3251   - print '%-12s %s - File not found' % ('?', filename)
  3260 + print('%-12s %s - File not found' % ('?', filename))
3252 3261 elif options.output_mode != 'json':
3253 3262 log.error('Given path %r does not exist!' % filename)
3254 3263 return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \
3255 3264 else RETURN_SEVERAL_ERRS
3256 3265 else:
3257 3266 if options.output_mode in ('triage', 'unspecified'):
3258   - print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container)
  3267 + print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container))
3259 3268 elif options.output_mode != 'json':
3260 3269 log.error('Exception opening/reading %r from zip file %r: %s'
3261 3270 % (filename, container, data))
... ... @@ -3282,7 +3291,7 @@ def main():
3282 3291 # print container name when it changes:
3283 3292 if container != previous_container:
3284 3293 if container is not None:
3285   - print '\nFiles in %s:' % container
  3294 + print('\nFiles in %s:' % container)
3286 3295 previous_container = container
3287 3296 # summarized output for triage:
3288 3297 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
... ... @@ -3300,8 +3309,8 @@ def main():
3300 3309  
3301 3310 except (SubstreamOpenError, UnexpectedDataError) as exc:
3302 3311 if options.output_mode in ('triage', 'unspecified'):
3303   - print '%-12s %s - Error opening substream or uenxpected ' \
3304   - 'content' % ('?', filename)
  3312 + print('%-12s %s - Error opening substream or uenxpected ' \
  3313 + 'content' % ('?', filename))
3305 3314 elif options.output_mode == 'json':
3306 3315 print_json(file=filename, type='error',
3307 3316 error=type(exc).__name__, message=str(exc))
... ... @@ -3312,7 +3321,7 @@ def main():
3312 3321 else RETURN_SEVERAL_ERRS
3313 3322 except FileOpenError as exc:
3314 3323 if options.output_mode in ('triage', 'unspecified'):
3315   - print '%-12s %s - File format not supported' % ('?', filename)
  3324 + print('%-12s %s - File format not supported' % ('?', filename))
3316 3325 elif options.output_mode == 'json':
3317 3326 print_json(file=filename, type='error',
3318 3327 error=type(exc).__name__, message=str(exc))
... ... @@ -3322,7 +3331,7 @@ def main():
3322 3331 else RETURN_SEVERAL_ERRS
3323 3332 except ProcessingError as exc:
3324 3333 if options.output_mode in ('triage', 'unspecified'):
3325   - print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc)
  3334 + print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))
3326 3335 elif options.output_mode == 'json':
3327 3336 print_json(file=filename, type='error',
3328 3337 error=type(exc).__name__,
... ... @@ -3337,9 +3346,9 @@ def main():
3337 3346 vba_parser.close()
3338 3347  
3339 3348 if options.output_mode == 'triage':
3340   - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
  3349 + print('\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
3341 3350 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
3342   - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n'
  3351 + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n')
3343 3352  
3344 3353 if count == 1 and options.output_mode == 'unspecified':
3345 3354 # if options -t, -d and -j were not specified and it's a single file, print details:
... ...
oletools/ppt_parser.py
... ... @@ -1570,4 +1570,4 @@ def iterative_decompress(stream, size, chunk_size=4096):
1570 1570  
1571 1571  
1572 1572 if __name__ == '__main__':
1573   - print 'nothing here to run!'
  1573 + print('nothing here to run!')
... ...
oletools/thirdparty/olefile/olefile.py
... ... @@ -1030,10 +1030,11 @@ class OleDirectoryEntry:
1030 1030 #[PL] this method was added to use simple recursion instead of a complex
1031 1031 # algorithm.
1032 1032 # if this is not a storage or a leaf of the tree, nothing to do:
  1033 +
1033 1034 if child_sid == NOSTREAM:
1034 1035 return
1035 1036 # check if child SID is in the proper range:
1036   - if child_sid<0 or child_sid>=len(self.olefile.direntries):
  1037 + if child_sid <= 0 or child_sid >= len(self.olefile.direntries):
1037 1038 self.olefile._raise_defect(DEFECT_INCORRECT, 'OLE DirEntry index out of range')
1038 1039 else:
1039 1040 # get child direntry:
... ...
oletools/thirdparty/olefile/olefile2.py
... ... @@ -1004,7 +1004,7 @@ class OleFileIO:
1004 1004 TIFF files).
1005 1005 """
1006 1006  
1007   - def __init__(self, filename = None, raise_defects=DEFECT_FATAL):
  1007 + def __init__(self, filename = None, raise_defects=DEFECT_FATAL):
1008 1008 """
1009 1009 Constructor for OleFileIO class.
1010 1010  
... ...
oletools/thirdparty/tablestream/tablestream.py
... ... @@ -236,7 +236,7 @@ class TableStream(object):
236 236 assert len(row) == self.num_columns
237 237 columns = []
238 238 max_lines = 0
239   - for i in xrange(self.num_columns):
  239 + for i in range(self.num_columns):
240 240 cell = row[i]
241 241 # Convert to string:
242 242 # TODO: handle unicode properly
... ... @@ -245,7 +245,7 @@ class TableStream(object):
245 245 # encode to UTF8, avoiding errors
246 246 cell = cell.decode('utf-8', errors='replace')
247 247 else:
248   - cell = unicode(cell)
  248 + cell = cell
249 249 # Wrap cell text according to the column width
250 250 # TODO: use a TextWrapper object for each column instead
251 251 # split the string if it contains newline characters, otherwise
... ... @@ -257,16 +257,16 @@ class TableStream(object):
257 257 if colors is not None and self.outfile.isatty():
258 258 color = colors[i]
259 259 if color:
260   - for j in xrange(len(column)):
  260 + for j in range(len(column)):
261 261 # print '%r: %s' % (column[j], type(column[j]))
262 262 column[j] = colorclass.Color('{auto%s}%s{/%s}' % (color, column[j], color))
263 263 columns.append(column)
264 264 # determine which column has the highest number of lines
265 265 max_lines = max(len(columns[i]), max_lines)
266 266 # transpose: write output line by line
267   - for j in xrange(max_lines):
  267 + for j in range(max_lines):
268 268 self.write(self.style.vertical_left)
269   - for i in xrange(self.num_columns):
  269 + for i in range(self.num_columns):
270 270 column = columns[i]
271 271 if j<len(column):
272 272 # text to be written
... ...