From ea37768589cffccc2835cfa503dfe50e893773de Mon Sep 17 00:00:00 2001 From: decalage2 Date: Wed, 12 Dec 2018 16:01:22 +0100 Subject: [PATCH] sync olevba and olevba3 (work in progress): updated decompress_stream to use a bytearray --- oletools/olevba.py | 31 ++++++++++++++++++++----------- oletools/olevba3.py | 47 ++++++++++++----------------------------------- 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/oletools/olevba.py b/oletools/olevba.py index d8c3e0c..a2df8f9 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -322,6 +322,9 @@ email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t if sys.version_info[0] <= 2: # Python 2.x + # to use ord on bytes/bytearray items the same way in Python 2+3 + # on Python 2, just use the normal ord() because items are bytes + byte_ord = ord if sys.version_info[1] <= 6: # Python 2.6 # use is_zipfile backported from Python 2.7: @@ -331,6 +334,9 @@ if sys.version_info[0] <= 2: from zipfile import is_zipfile else: # Python 3.x+ + # to use ord on bytes/bytearray items the same way in Python 2+3 + # on Python 3, items are int, so just return the item + byte_ord = lambda x: x from zipfile import is_zipfile # xrange is now called range: xrange = range @@ -1235,10 +1241,13 @@ def decompress_stream(compressed_container): # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the # DecompressedBuffer (section 2.4.1.1.2). - decompressed_container = '' # result + # Check the input is a bytearray: + if not isinstance(compressed_container, bytearray): + raise TypeError('decompress_stream requires a bytearray as input') + decompressed_container = bytearray() # result compressed_current = 0 - sig_byte = ord(compressed_container[compressed_current]) + sig_byte = compressed_container[compressed_current] if sig_byte != 0x01: raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) @@ -1284,7 +1293,7 @@ def decompress_stream(compressed_container): # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk # uncompressed chunk: read the next 4096 bytes as-is #TODO: check if there are at least 4096 bytes left - decompressed_container += compressed_container[compressed_current:compressed_current + 4096] + decompressed_container.extend([compressed_container[compressed_current:compressed_current + 4096]]) compressed_current += 4096 else: # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk @@ -1295,7 +1304,7 @@ def decompress_stream(compressed_container): # log.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or # copy tokens (reference to a previous literal token) - flag_byte = ord(compressed_container[compressed_current]) + flag_byte = compressed_container[compressed_current] compressed_current += 1 for bit_index in xrange(0, 8): # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) @@ -1307,7 +1316,7 @@ def decompress_stream(compressed_container): #log.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) if flag_bit == 0: # LiteralToken # copy one byte directly to output - decompressed_container += compressed_container[compressed_current] + decompressed_container.extend([compressed_container[compressed_current]]) compressed_current += 1 else: # CopyToken # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken @@ -1323,9 +1332,9 @@ def decompress_stream(compressed_container): #log.debug('offset=%d length=%d' % (offset, length)) copy_source = len(decompressed_container) - offset for index in xrange(copy_source, copy_source + length): - decompressed_container += decompressed_container[index] + decompressed_container.extend([decompressed_container[index]]) compressed_current += 2 - return decompressed_container + return bytes(decompressed_container) def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1366,6 +1375,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): code_modules = {} for line in project: + log.debug('PROJECT: %r' % line) line = line.strip() if '=' in line: # split line at the 1st equal sign: @@ -1397,7 +1407,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): else: raise UnexpectedDataError(dir_path, name, expected, value) - dir_stream = BytesIO(decompress_stream(dir_compressed)) + dir_stream = BytesIO(decompress_stream(bytearray(dir_compressed))) # PROJECTSYSKIND Record projectsyskind_id = struct.unpack(" 0: - code_data = decompress_stream(code_data) + code_data = decompress_stream(bytearray(code_data)) # case-insensitive search in the code_modules dict to find the file extension: filext = code_modules.get(modulename_modulename.lower(), 'bin') filename = '{0}.{1}'.format(modulename_modulename, filext) @@ -2120,7 +2130,6 @@ def print_json(json_dict=None, _json_is_first=False, _json_is_last=False, :param bool _json_is_last: set to True only for very last entry to complete the top-level json-list """ - if json_dict and json_parts: raise ValueError('Invalid json argument: want either single dict or ' 'key=value parts but got both)') @@ -2949,7 +2958,7 @@ class VBA_Parser(object): log.debug('Found VBA compressed code at index %X' % start) compressed_code = data[start:] try: - vba_code = decompress_stream(compressed_code) + vba_code = decompress_stream(bytearray(compressed_code)) yield (self.filename, d.name, d.name, vba_code) except Exception as exc: # display the exception with full stack trace for debugging diff --git a/oletools/olevba3.py b/oletools/olevba3.py index 514ce14..346d4cf 100644 --- a/oletools/olevba3.py +++ b/oletools/olevba3.py @@ -1232,6 +1232,9 @@ def decompress_stream(compressed_container): # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the # DecompressedBuffer (section 2.4.1.1.2). + # Check the input is a bytearray: + if not isinstance(compressed_container, bytearray): + raise TypeError('decompress_stream requires a bytearray as input') decompressed_container = bytearray() # result compressed_current = 0 @@ -1294,7 +1297,7 @@ def decompress_stream(compressed_container): # copy tokens (reference to a previous literal token) flag_byte = compressed_container[compressed_current] compressed_current += 1 - for bit_index in range(0, 8): + for bit_index in xrange(0, 8): # log.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) if compressed_current >= compressed_end: break @@ -1319,7 +1322,7 @@ def decompress_stream(compressed_container): offset = (temp1 >> temp2) + 1 #log.debug('offset=%d length=%d' % (offset, length)) copy_source = len(decompressed_container) - offset - for index in range(copy_source, copy_source + length): + for index in xrange(copy_source, copy_source + length): decompressed_container.extend([decompressed_container[index]]) compressed_current += 2 return bytes(decompressed_container) @@ -1394,7 +1397,7 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): else: raise UnexpectedDataError(dir_path, name, expected, value) - dir_stream = BytesIO(decompress_stream(dir_compressed)) + dir_stream = BytesIO(decompress_stream(bytearray(dir_compressed))) # PROJECTSYSKIND Record projectsyskind_id = struct.unpack(" 0: - code_data = decompress_stream(code_data) + code_data = decompress_stream(bytearray(code_data)) # case-insensitive search in the code_modules dict to find the file extension: filext = code_modules.get(modulename_modulename.lower(), 'bin') filename = '{0}.{1}'.format(modulename_modulename, filext) @@ -2950,7 +2953,7 @@ class VBA_Parser(object): log.debug('Found VBA compressed code at index %X' % start) compressed_code = data[start:] try: - vba_code = decompress_stream(compressed_code) + vba_code = decompress_stream(bytearray(compressed_code)) yield (self.filename, d.name, d.name, vba_code) except Exception as exc: # display the exception with full stack trace for debugging @@ -2978,6 +2981,8 @@ class VBA_Parser(object): """ runs extract_macros and analyze the source code of all VBA macros found in the file. + All results are stored in self.analysis_results. + If called more than once, simply returns the previous results. """ if self.detect_vba_macros(): # if the analysis was already done, avoid doing it twice: @@ -3390,16 +3395,6 @@ class VBA_Parser_CLI(VBA_Parser): line = '%-12s %s' % (flags, self.filename) print(line) - - # old table display: - # macros = autoexec = suspicious = iocs = hexstrings = 'no' - # if nb_macros: macros = 'YES:%d' % nb_macros - # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec - # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious - # if nb_iocs: iocs = 'YES:%d' % nb_iocs - # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings - # # 2nd line = info - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings) except Exception as exc: # display the exception with full stack trace for debugging only log.debug('Error processing file %s (%s)' % (self.filename, exc), @@ -3407,20 +3402,6 @@ class VBA_Parser_CLI(VBA_Parser): raise ProcessingError(self.filename, exc) - # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'), - # header=False, border=False) - # t.align = 'l' - # t.max_width['filename'] = 30 - # t.max_width['type'] = 10 - # t.max_width['macros'] = 6 - # t.max_width['autoexec'] = 6 - # t.max_width['suspicious'] = 6 - # t.max_width['ioc'] = 6 - # t.max_width['hexstrings'] = 6 - # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings)) - # print t - - #=== MAIN ===================================================================== def parse_args(cmd_line_args=None): @@ -3515,10 +3496,6 @@ def main(cmd_line_args=None): # enable logging in the modules: enable_logging() - # Old display with number of items detected: - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) - # with the option --reveal, make sure --deobf is also enabled: if options.show_deobfuscated_code and not options.deobfuscate: log.info('set --deobf because --reveal was set') -- libgit2 0.21.4