From 4188b3d137c358ecb4e80e4f78424b609d7a23ac Mon Sep 17 00:00:00 2001 From: kirk-sayre-work Date: Mon, 12 Aug 2019 15:01:33 -0500 Subject: [PATCH] Added --no-xlm option to skip extracting XLM macros. --- oletools/olevba.py | 217 +++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 1 file changed, 43 insertions(+), 174 deletions(-) diff --git a/oletools/olevba.py b/oletools/olevba.py index 211099e..d025945 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -217,9 +217,8 @@ from __future__ import print_function # 2019-03-25 CH: - added decryption of password-protected files # 2019-04-09 PL: - decompress_stream accepts bytes (issue #422) # 2019-05-23 v0.55 PL: - added option --pcode to call pcodedmp and display P-code -# 2019-06-05 PL: - added VBA stomping detection -__version__ = '0.55.dev3' +__version__ = '0.55.dev1' #------------------------------------------------------------------------------ # TODO: @@ -287,6 +286,7 @@ except ImportError: + "or http://effbot.org/zone/element-index.htm") import colorclass +from pcodedmp import pcodedmp # On Windows, colorclass needs to be enabled: if os.name == 'nt': @@ -704,9 +704,8 @@ SUSPICIOUS_KEYWORDS = { 'May run code from a DLL using Excel 4 Macros (XLM/XLF)': ('REGISTER',), 'May inject code into another process': - ('CreateThread', 'CreateUserThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload - 'VirtualAllocEx', 'RtlMoveMemory', 'WriteProcessMemory', - 'SetContextThread', 'QueueApcThread', 'WriteVirtualMemory', 'VirtualProtect' + ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload + 'VirtualAllocEx', 'RtlMoveMemory', 'WriteProcessMemory' ), 'May run a shellcode in memory': ('EnumSystemLanguageGroupsW?', # Used by Hancitor in Oct 2016 @@ -2589,10 +2588,6 @@ class VBA_Parser(object): #: Encoding for VBA source code and strings returned by all methods self.encoding = encoding self.xlm_macros = [] - #: Output from pcodedmp, disassembly of the VBA P-code - self.pcodedmp_output = None - #: Flag set to True/False if VBA stomping detected - self.vba_stomping_detected = None # if filename is None: # if isinstance(_file, basestring): @@ -3096,7 +3091,7 @@ class VBA_Parser(object): log.debug('Trace:', exc_trace=True) else: raise SubstreamOpenError(self.filename, d.name, exc) - if self.detect_xlm_macros(): + if (not self.no_xlm) and self.detect_xlm_macros(): self.contains_macros = True return self.contains_macros @@ -3208,15 +3203,6 @@ class VBA_Parser(object): for line in self.xlm_macros: vba_code += "' " + line + '\n' yield ('xlm_macro', 'xlm_macro', 'xlm_macro.txt', vba_code) - # Analyse the VBA P-code to detect VBA stomping: - # If stomping is detected, add a fake VBA module with the P-code as source comments - # so that VBA_Scanner can find keywords and IOCs in it - if self.detect_vba_stomping(): - vba_code = '' - for line in self.pcodedmp_output.splitlines(): - vba_code += "' " + line + '\n' - yield ('VBA P-code', 'VBA P-code', 'VBA_P-code.txt', vba_code) - def extract_all_macros(self): """ @@ -3256,13 +3242,6 @@ class VBA_Parser(object): # Analyze the whole code at once: scanner = VBA_Scanner(self.vba_code_all_modules) self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate) - if self.detect_vba_stomping(): - log.debug('adding VBA stomping to suspicious keywords') - keyword = 'VBA Stomping' - description = 'VBA Stomping was detected: the VBA source code and P-code are different, '\ - 'this may have been used to hide malicious code' - scanner.suspicious_keywords.append((keyword, description)) - scanner.results.append(('Suspicious', keyword, description)) autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary() self.nb_autoexec += autoexec self.nb_suspicious += suspicious @@ -3429,136 +3408,6 @@ class VBA_Parser(object): for variable in oleform.extract_OleFormVariables(ole, form_storage): yield (self.filename, '/'.join(form_storage), variable) - def extract_pcode(self): - """ - Extract and disassemble the VBA P-code, using pcodedmp - - :return: VBA P-code disassembly - :rtype: str - """ - # only run it once: - if self.pcodedmp_output is None: - log.debug('Calling pcodedmp to extract and disassemble the VBA P-code') - # import pcodedmp here to avoid circular imports: - try: - from pcodedmp import pcodedmp - except Exception as e: - # This may happen with Pypy, because pcodedmp imports win_unicode_console... - # TODO: this is a workaround, we just ignore P-code - # TODO: here we just use log.info, because the word "error" in the output makes some of the tests fail... - log.info('Exception when importing pcodedmp: {}'.format(e)) - self.pcodedmp_output = '' - return '' - # logging is disabled after importing pcodedmp, need to re-enable it - # This is because pcodedmp imports olevba again :-/ - # TODO: here it works only if logging was enabled, need to change pcodedmp! - enable_logging() - # pcodedmp prints all its output to sys.stdout, so we need to capture it so that - # we can process the results later on. - # save sys.stdout, then modify it to capture pcodedmp's output: - # stdout = sys.stdout - if PYTHON2: - # on Python 2, console output is bytes - output = BytesIO() - else: - # on Python 3, console output is unicode - output = StringIO() - # sys.stdout = output - # we need to fake an argparser for those two args used by pcodedmp: - class args: - disasmOnly = True - verbose = False - try: - # TODO: handle files in memory too - log.debug('before pcodedmp') - pcodedmp.processFile(self.filename, args, output_file=output) - log.debug('after pcodedmp') - except Exception as e: - # print('Error while running pcodedmp: {}'.format(e), file=sys.stderr, flush=True) - # set sys.stdout back to its original value - # sys.stdout = stdout - log.exception('Error while running pcodedmp') - # finally: - # # set sys.stdout back to its original value - # sys.stdout = stdout - self.pcodedmp_output = output.getvalue() - # print(self.pcodedmp_output) - # log.debug(self.pcodedmp_output) - return self.pcodedmp_output - - def detect_vba_stomping(self): - """ - Detect VBA stomping, by comparing the keywords present in the P-code and - in the VBA source code. - - :return: True if VBA stomping detected, False otherwise - :rtype: bool - """ - # only run it once: - if self.vba_stomping_detected is None: - log.debug('Analysing the P-code to detect VBA stomping') - self.extract_pcode() - # print('pcodedmp OK') - log.debug('pcodedmp OK') - # process the output to extract keywords, to detect VBA stomping - keywords = set() - for line in self.pcodedmp_output.splitlines(): - if line.startswith('\t'): - log.debug('P-code: ' + line.strip()) - tokens = line.split(None, 1) - mnemonic = tokens[0] - args = '' - if len(tokens) == 2: - args = tokens[1].strip() - # log.debug(repr([mnemonic, args])) - # if mnemonic in ('VarDefn',): - # # just add the rest of the line - # keywords.add(args) - # if mnemonic == 'FuncDefn': - # # function definition: just strip parentheses - # funcdefn = args.strip('()') - # keywords.add(funcdefn) - if mnemonic in ('ArgsCall', 'ArgsLd', 'St', 'Ld', 'MemSt', 'Label'): - # add 1st argument: - name = args.split(None, 1)[0] - # sometimes pcodedmp reports names like "id_FFFF", which are not - # directly present in the VBA source code - # (for example "Me" in VBA appears as id_FFFF in P-code) - if not name.startswith('id_'): - keywords.add(name) - if mnemonic == 'LitStr': - # re_string = re.compile(r'\"([^\"]|\"\")*\"') - # for match in re_string.finditer(line): - # print('\t' + match.group()) - # the string is the 2nd argument: - s = args.split(None, 1)[1] - # tricky issue: when a string contains double quotes inside, - # pcodedmp returns a single ", whereas in the VBA source code - # it is always a double "". - # We have to remove the " around the strings, then double the remaining ", - # and put back the " around: - if len(s)>=2: - assert(s[0]=='"' and s[-1]=='"') - s = s[1:-1] - s = s.replace('"', '""') - s = '"' + s + '"' - keywords.add(s) - log.debug('Keywords extracted from P-code: ' + repr(sorted(keywords))) - self.vba_stomping_detected = False - # TODO: add a method to get all VBA code as one string - vba_code_all_modules = '' - for (_, _, _, vba_code) in self.extract_all_macros(): - vba_code_all_modules += vba_code + '\n' - for keyword in keywords: - if keyword not in vba_code_all_modules: - log.debug('Keyword {!r} not found in VBA code'.format(keyword)) - log.debug('VBA STOMPING DETECTED!') - self.vba_stomping_detected = True - break - if not self.vba_stomping_detected: - log.debug('No VBA stomping detected.') - return self.vba_stomping_detected - def close(self): """ Close all the open files. This method must be called after usage, if @@ -3629,8 +3478,6 @@ class VBA_Parser_CLI(VBA_Parser): color_type = COLOR_TYPE.get(kw_type, None) t.write_row((kw_type, keyword, description), colors=(color_type, None, None)) t.close() - if self.vba_stomping_detected: - print('VBA Stomping detection is experimental: please report any false positive/negative at https://github.com/decalage2/oletools/issues') else: print('No suspicious keyword or IOC found.') @@ -3673,7 +3520,7 @@ class VBA_Parser_CLI(VBA_Parser): def process_file(self, show_decoded_strings=False, display_code=True, hide_attributes=True, vba_code_only=False, show_deobfuscated_code=False, - deobfuscate=False, pcode=False): + deobfuscate=False, pcode=False, no_xlm=False): """ Process a single file @@ -3686,9 +3533,11 @@ class VBA_Parser_CLI(VBA_Parser): :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) :param pcode bool: if True, call pcodedmp to disassemble P-code and display it + :param no_xlm bool: if True, don't use the BIFF plugin to extract old style XLM macros """ #TODO: replace print by writing to a provided output file (sys.stdout by default) # fix conflicting parameters: + self.no_xlm = no_xlm if vba_code_only and not display_code: display_code = True if self.container: @@ -3758,8 +3607,30 @@ class VBA_Parser_CLI(VBA_Parser): if pcode: print('-' * 79) print('P-CODE disassembly:') - pcode = self.extract_pcode() - print(pcode) + # pcodedmp prints all its output to sys.stdout, so we need to capture it so that + # we can process the results later on. + # save sys.stdout, then modify it to capture pcodedmp's output: + stdout = sys.stdout + if PYTHON2: + # on Python 2, console output is bytes + output = BytesIO() + else: + # on Python 3, console output is unicode + output = StringIO() + sys.stdout = output + # we need to fake an argparser for those two args used by pcodedmp: + class args: + disasmOnly = True + verbose = False + try: + # TODO: handle files in memory too + pcodedmp.processFile(self.filename, args) + except Exception: + log.error('Error while running pcodedmp') + finally: + # set sys.stdout back to its original value + sys.stdout = stdout + print(output.getvalue()) if not vba_code_only: # analyse the code from all modules at once: @@ -3782,7 +3653,7 @@ class VBA_Parser_CLI(VBA_Parser): def process_file_json(self, show_decoded_strings=False, display_code=True, hide_attributes=True, vba_code_only=False, show_deobfuscated_code=False, - deobfuscate=False): + deobfuscate=False, no_xlm=False): """ Process a single file @@ -3799,6 +3670,7 @@ class VBA_Parser_CLI(VBA_Parser): """ #TODO: fix conflicting parameters (?) + self.no_xlm = no_xlm if vba_code_only and not display_code: display_code = True @@ -3949,6 +3821,8 @@ def parse_args(cmd_line_args=None): help="Do not raise errors if opening of substream fails") parser.add_option('--pcode', dest="pcode", action="store_true", default=False, help="Disassemble and display the P-code (using pcodedmp)") + parser.add_option('--no-xlm', dest="no_xlm", action="store_true", default=False, + help="Do not extract XLM Excel macros. This may speed up analysis of large files.") (options, args) = parser.parse_args(cmd_line_args) @@ -3983,21 +3857,21 @@ def process_file(filename, data, container, options, crypto_nesting=0): if options.output_mode == 'detailed': # fully detailed output vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, - display_code=options.display_code, - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, - show_deobfuscated_code=options.show_deobfuscated_code, - deobfuscate=options.deobfuscate, pcode=options.pcode) + display_code=options.display_code, + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, + show_deobfuscated_code=options.show_deobfuscated_code, + deobfuscate=options.deobfuscate, pcode=options.pcode, no_xlm=options.no_xlm) elif options.output_mode == 'triage': # summarized output for triage: vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, - deobfuscate=options.deobfuscate) + deobfuscate=options.deobfuscate, no_xlm=options.no_xlm) elif options.output_mode == 'json': print_json( vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, display_code=options.display_code, hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, show_deobfuscated_code=options.show_deobfuscated_code, - deobfuscate=options.deobfuscate)) + deobfuscate=options.deobfuscate, no_xlm=options.no_xlm)) else: # (should be impossible) raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) @@ -4064,13 +3938,8 @@ def process_file(filename, data, container, options, crypto_nesting=0): except Exception: raise finally: # clean up - try: - log.debug('Removing crypt temp file {}'.format(decrypted_file)) + if decrypted_file is not None and os.path.isfile(decrypted_file): os.unlink(decrypted_file) - except Exception: # e.g. file does not exist or is None - pass - # no idea what to return now - raise Exception('Programming error -- should never have reached this!') def main(cmd_line_args=None): -- libgit2 0.21.4