From 2394f6191156016ae979a32319633fa6e702a220 Mon Sep 17 00:00:00 2001 From: decalage2 Date: Mon, 28 Sep 2020 21:03:31 +0200 Subject: [PATCH] olevba: added VBA_Parser.get_vba_code_all_modules, partial fix for issue #619, updated mraptor to use it --- oletools/mraptor.py | 5 ++--- oletools/olevba.py | 33 ++++++++++++++++++++++++--------- setup.py | 2 +- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/oletools/mraptor.py b/oletools/mraptor.py index 198b90d..daf8728 100644 --- a/oletools/mraptor.py +++ b/oletools/mraptor.py @@ -62,7 +62,7 @@ http://www.decalage.info/python/oletools # 2019-11-06 v0.55 PL: - added SetTimer # 2020-04-20 v0.56 PL: - added keywords RUN and CALL for XLM macros (issue #562) -__version__ = '0.56dev5' +__version__ = '0.56dev12' #------------------------------------------------------------------------------ # TODO: @@ -318,8 +318,7 @@ def main(): if vba_parser.detect_vba_macros(): vba_code_all_modules = '' try: - for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros(): - vba_code_all_modules += vba_code + '\n' + vba_code_all_modules = vba_parser.get_vba_code_all_modules() except Exception as e: # log.error('Error when parsing VBA macros from file %r' % full_name) result = Result_Error diff --git a/oletools/olevba.py b/oletools/olevba.py index d820f00..9a31540 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -231,8 +231,10 @@ from __future__ import print_function # 2020-09-16 PL: - enabled relaxed mode by default (issues #477, #593) # - fixed detect_vba_macros to always return VBA code as # unicode on Python 3 (issues #455, #477, #587, #593) +# 2020-09-28 PL: - added VBA_Parser.get_vba_code_all_modules (partial fix +# for issue #619) -__version__ = '0.56dev11' +__version__ = '0.56dev12' #------------------------------------------------------------------------------ # TODO: @@ -3401,6 +3403,7 @@ class VBA_Parser(object): by calling extract_macros(), store the results as a list of tuples (filename, stream_path, vba_filename, vba_code) in self.modules. See extract_macros for details. + :returns: list of tuples (filename, stream_path, vba_filename, vba_code) """ if self.modules is None: self.modules = [] @@ -3410,6 +3413,23 @@ class VBA_Parser(object): return self.modules + def get_vba_code_all_modules(self): + """ + Extract the VBA macro source code from all modules, and return it + as a single string (str) with all modules concatenated. + If an exception is triggered when decompressing a VBA module, it + will not be included. The error is logged but the exception is not + raised further. + :return: str + """ + vba_code_all_modules = '' + for (_, _, _, vba_code) in self.extract_all_macros(): + if not isinstance(vba_code, str): + log.error('VBA code returned by extract_all_macros is not a string') + else: + vba_code_all_modules += vba_code + '\n' + return vba_code_all_modules + def analyze_macros(self, show_decoded_strings=False, deobfuscate=False): """ @@ -3424,10 +3444,7 @@ class VBA_Parser(object): return self.analysis_results # variable to merge source code from all modules: if self.vba_code_all_modules is None: - self.vba_code_all_modules = '' - for (_, _, _, vba_code) in self.extract_all_macros(): - #TODO: filter code? (each module) - self.vba_code_all_modules += vba_code + '\n' + self.vba_code_all_modules = self.get_vba_code_all_modules() for (_, _, form_string) in self.extract_form_strings(): self.vba_code_all_modules += form_string + '\n' # Analyze the whole code at once: @@ -3749,10 +3766,8 @@ class VBA_Parser(object): keywords.add(s) log.debug('Keywords extracted from P-code: ' + repr(sorted(keywords))) self.vba_stomping_detected = False - # TODO: add a method to get all VBA code as one string - vba_code_all_modules = '' - for (_, _, _, vba_code) in self.extract_all_macros(): - vba_code_all_modules += vba_code + '\n' + # get all VBA code as one string + vba_code_all_modules = self.get_vba_code_all_modules() for keyword in keywords: if keyword not in vba_code_all_modules: log.debug('Keyword {!r} not found in VBA code'.format(keyword)) diff --git a/setup.py b/setup.py index aa7bf06..f2a247f 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ import os, fnmatch #--- METADATA ----------------------------------------------------------------- name = "oletools" -version = '0.56dev11' +version = '0.56dev12' desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" long_desc = open('oletools/README.rst').read() author = "Philippe Lagadec" -- libgit2 0.21.4