Commit 2394f6191156016ae979a32319633fa6e702a220
1 parent
40faecbf
olevba: added VBA_Parser.get_vba_code_all_modules, partial fix for issue #619, u…
…pdated mraptor to use it
Showing
3 changed files
with
27 additions
and
13 deletions
oletools/mraptor.py
| @@ -62,7 +62,7 @@ http://www.decalage.info/python/oletools | @@ -62,7 +62,7 @@ http://www.decalage.info/python/oletools | ||
| 62 | # 2019-11-06 v0.55 PL: - added SetTimer | 62 | # 2019-11-06 v0.55 PL: - added SetTimer |
| 63 | # 2020-04-20 v0.56 PL: - added keywords RUN and CALL for XLM macros (issue #562) | 63 | # 2020-04-20 v0.56 PL: - added keywords RUN and CALL for XLM macros (issue #562) |
| 64 | 64 | ||
| 65 | -__version__ = '0.56dev5' | 65 | +__version__ = '0.56dev12' |
| 66 | 66 | ||
| 67 | #------------------------------------------------------------------------------ | 67 | #------------------------------------------------------------------------------ |
| 68 | # TODO: | 68 | # TODO: |
| @@ -318,8 +318,7 @@ def main(): | @@ -318,8 +318,7 @@ def main(): | ||
| 318 | if vba_parser.detect_vba_macros(): | 318 | if vba_parser.detect_vba_macros(): |
| 319 | vba_code_all_modules = '' | 319 | vba_code_all_modules = '' |
| 320 | try: | 320 | try: |
| 321 | - for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros(): | ||
| 322 | - vba_code_all_modules += vba_code + '\n' | 321 | + vba_code_all_modules = vba_parser.get_vba_code_all_modules() |
| 323 | except Exception as e: | 322 | except Exception as e: |
| 324 | # log.error('Error when parsing VBA macros from file %r' % full_name) | 323 | # log.error('Error when parsing VBA macros from file %r' % full_name) |
| 325 | result = Result_Error | 324 | result = Result_Error |
oletools/olevba.py
| @@ -231,8 +231,10 @@ from __future__ import print_function | @@ -231,8 +231,10 @@ from __future__ import print_function | ||
| 231 | # 2020-09-16 PL: - enabled relaxed mode by default (issues #477, #593) | 231 | # 2020-09-16 PL: - enabled relaxed mode by default (issues #477, #593) |
| 232 | # - fixed detect_vba_macros to always return VBA code as | 232 | # - fixed detect_vba_macros to always return VBA code as |
| 233 | # unicode on Python 3 (issues #455, #477, #587, #593) | 233 | # unicode on Python 3 (issues #455, #477, #587, #593) |
| 234 | +# 2020-09-28 PL: - added VBA_Parser.get_vba_code_all_modules (partial fix | ||
| 235 | +# for issue #619) | ||
| 234 | 236 | ||
| 235 | -__version__ = '0.56dev11' | 237 | +__version__ = '0.56dev12' |
| 236 | 238 | ||
| 237 | #------------------------------------------------------------------------------ | 239 | #------------------------------------------------------------------------------ |
| 238 | # TODO: | 240 | # TODO: |
| @@ -3401,6 +3403,7 @@ class VBA_Parser(object): | @@ -3401,6 +3403,7 @@ class VBA_Parser(object): | ||
| 3401 | by calling extract_macros(), store the results as a list of tuples | 3403 | by calling extract_macros(), store the results as a list of tuples |
| 3402 | (filename, stream_path, vba_filename, vba_code) in self.modules. | 3404 | (filename, stream_path, vba_filename, vba_code) in self.modules. |
| 3403 | See extract_macros for details. | 3405 | See extract_macros for details. |
| 3406 | + :returns: list of tuples (filename, stream_path, vba_filename, vba_code) | ||
| 3404 | """ | 3407 | """ |
| 3405 | if self.modules is None: | 3408 | if self.modules is None: |
| 3406 | self.modules = [] | 3409 | self.modules = [] |
| @@ -3410,6 +3413,23 @@ class VBA_Parser(object): | @@ -3410,6 +3413,23 @@ class VBA_Parser(object): | ||
| 3410 | return self.modules | 3413 | return self.modules |
| 3411 | 3414 | ||
| 3412 | 3415 | ||
| 3416 | + def get_vba_code_all_modules(self): | ||
| 3417 | + """ | ||
| 3418 | + Extract the VBA macro source code from all modules, and return it | ||
| 3419 | + as a single string (str) with all modules concatenated. | ||
| 3420 | + If an exception is triggered when decompressing a VBA module, it | ||
| 3421 | + will not be included. The error is logged but the exception is not | ||
| 3422 | + raised further. | ||
| 3423 | + :return: str | ||
| 3424 | + """ | ||
| 3425 | + vba_code_all_modules = '' | ||
| 3426 | + for (_, _, _, vba_code) in self.extract_all_macros(): | ||
| 3427 | + if not isinstance(vba_code, str): | ||
| 3428 | + log.error('VBA code returned by extract_all_macros is not a string') | ||
| 3429 | + else: | ||
| 3430 | + vba_code_all_modules += vba_code + '\n' | ||
| 3431 | + return vba_code_all_modules | ||
| 3432 | + | ||
| 3413 | 3433 | ||
| 3414 | def analyze_macros(self, show_decoded_strings=False, deobfuscate=False): | 3434 | def analyze_macros(self, show_decoded_strings=False, deobfuscate=False): |
| 3415 | """ | 3435 | """ |
| @@ -3424,10 +3444,7 @@ class VBA_Parser(object): | @@ -3424,10 +3444,7 @@ class VBA_Parser(object): | ||
| 3424 | return self.analysis_results | 3444 | return self.analysis_results |
| 3425 | # variable to merge source code from all modules: | 3445 | # variable to merge source code from all modules: |
| 3426 | if self.vba_code_all_modules is None: | 3446 | if self.vba_code_all_modules is None: |
| 3427 | - self.vba_code_all_modules = '' | ||
| 3428 | - for (_, _, _, vba_code) in self.extract_all_macros(): | ||
| 3429 | - #TODO: filter code? (each module) | ||
| 3430 | - self.vba_code_all_modules += vba_code + '\n' | 3447 | + self.vba_code_all_modules = self.get_vba_code_all_modules() |
| 3431 | for (_, _, form_string) in self.extract_form_strings(): | 3448 | for (_, _, form_string) in self.extract_form_strings(): |
| 3432 | self.vba_code_all_modules += form_string + '\n' | 3449 | self.vba_code_all_modules += form_string + '\n' |
| 3433 | # Analyze the whole code at once: | 3450 | # Analyze the whole code at once: |
| @@ -3749,10 +3766,8 @@ class VBA_Parser(object): | @@ -3749,10 +3766,8 @@ class VBA_Parser(object): | ||
| 3749 | keywords.add(s) | 3766 | keywords.add(s) |
| 3750 | log.debug('Keywords extracted from P-code: ' + repr(sorted(keywords))) | 3767 | log.debug('Keywords extracted from P-code: ' + repr(sorted(keywords))) |
| 3751 | self.vba_stomping_detected = False | 3768 | self.vba_stomping_detected = False |
| 3752 | - # TODO: add a method to get all VBA code as one string | ||
| 3753 | - vba_code_all_modules = '' | ||
| 3754 | - for (_, _, _, vba_code) in self.extract_all_macros(): | ||
| 3755 | - vba_code_all_modules += vba_code + '\n' | 3769 | + # get all VBA code as one string |
| 3770 | + vba_code_all_modules = self.get_vba_code_all_modules() | ||
| 3756 | for keyword in keywords: | 3771 | for keyword in keywords: |
| 3757 | if keyword not in vba_code_all_modules: | 3772 | if keyword not in vba_code_all_modules: |
| 3758 | log.debug('Keyword {!r} not found in VBA code'.format(keyword)) | 3773 | log.debug('Keyword {!r} not found in VBA code'.format(keyword)) |
setup.py
| @@ -52,7 +52,7 @@ import os, fnmatch | @@ -52,7 +52,7 @@ import os, fnmatch | ||
| 52 | #--- METADATA ----------------------------------------------------------------- | 52 | #--- METADATA ----------------------------------------------------------------- |
| 53 | 53 | ||
| 54 | name = "oletools" | 54 | name = "oletools" |
| 55 | -version = '0.56dev11' | 55 | +version = '0.56dev12' |
| 56 | desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" | 56 | desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" |
| 57 | long_desc = open('oletools/README.rst').read() | 57 | long_desc = open('oletools/README.rst').read() |
| 58 | author = "Philippe Lagadec" | 58 | author = "Philippe Lagadec" |