diff --git a/oletools/olevba.py b/oletools/olevba.py index 34504e0..194fc2b 100755 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -214,7 +214,7 @@ __version__ = '0.48' import sys, logging import struct -from _io import StringIO +from _io import StringIO,BytesIO import math import zipfile import re @@ -613,7 +613,7 @@ re_dridex_string = re.compile(r'"[0-9A-Za-z]{20,}"') re_nothex_check = re.compile(r'[G-Zg-z]') # regex to extract printable strings (at least 5 chars) from VBA Forms: -re_printable_string = re.compile(r'[\t\r\n\x20-\xFF]{5,}') +re_printable_string = re.compile(rb'[\t\r\n\x20-\xFF]{5,}') # === PARTIAL VBA GRAMMAR ==================================================== @@ -1043,10 +1043,10 @@ def decompress_stream(compressed_container): # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the # DecompressedBuffer (section 2.4.1.1.2). - decompressed_container = '' # result + decompressed_container = b'' # result compressed_current = 0 - sig_byte = ord(compressed_container[compressed_current]) + sig_byte = compressed_container[compressed_current] if sig_byte != 0x01: raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) @@ -1092,7 +1092,7 @@ def decompress_stream(compressed_container): # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk # uncompressed chunk: read the next 4096 bytes as-is #TODO: check if there are at least 4096 bytes left - decompressed_container += compressed_container[compressed_current:compressed_current + 4096] + decompressed_container += bytes([compressed_container[compressed_current:compressed_current + 4096]]) compressed_current += 4096 else: # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk @@ -1103,7 +1103,7 @@ def decompress_stream(compressed_container): # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or # copy tokens (reference to a previous literal token) - flag_byte = ord(compressed_container[compressed_current]) + flag_byte = compressed_container[compressed_current] compressed_current += 1 for bit_index in range(0, 8): # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) @@ -1115,7 +1115,7 @@ def decompress_stream(compressed_container): #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) if flag_bit == 0: # LiteralToken # copy one byte directly to output - decompressed_container += compressed_container[compressed_current] + decompressed_container += bytes([compressed_container[compressed_current]]) compressed_current += 1 else: # CopyToken # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken @@ -1130,8 +1130,8 @@ def decompress_stream(compressed_container): offset = (temp1 >> temp2) + 1 #log.debug('offset=%d length=%d' % (offset, length)) copy_source = len(decompressed_container) - offset - for index in xrange(copy_source, copy_source + length): - decompressed_container += decompressed_container[index] + for index in range(copy_source, copy_source + length): + decompressed_container += bytes([decompressed_container[index]]) compressed_current += 2 return decompressed_container @@ -1174,7 +1174,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): code_modules = {} for line in project: - line = line.strip() + line = line.strip().decode('utf-8','ignore') if '=' in line: # split line at the 1st equal sign: name, value = line.split('=', 1) @@ -1205,7 +1205,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): else: raise UnexpectedDataError(dir_path, name, expected, value) - dir_stream = StringIO(decompress_stream(dir_compressed)) + dir_stream = BytesIO(decompress_stream(dir_compressed)) # PROJECTSYSKIND Record projectsyskind_id = struct.unpack("