Commit 4188b3d137c358ecb4e80e4f78424b609d7a23ac

Authored by kirk-sayre-work
1 parent 677d9ad5

Added --no-xlm option to skip extracting XLM macros.

Showing 1 changed file with 43 additions and 174 deletions
oletools/olevba.py
... ... @@ -217,9 +217,8 @@ from __future__ import print_function
217 217 # 2019-03-25 CH: - added decryption of password-protected files
218 218 # 2019-04-09 PL: - decompress_stream accepts bytes (issue #422)
219 219 # 2019-05-23 v0.55 PL: - added option --pcode to call pcodedmp and display P-code
220   -# 2019-06-05 PL: - added VBA stomping detection
221 220  
222   -__version__ = '0.55.dev3'
  221 +__version__ = '0.55.dev1'
223 222  
224 223 #------------------------------------------------------------------------------
225 224 # TODO:
... ... @@ -287,6 +286,7 @@ except ImportError:
287 286 + "or http://effbot.org/zone/element-index.htm")
288 287  
289 288 import colorclass
  289 +from pcodedmp import pcodedmp
290 290  
291 291 # On Windows, colorclass needs to be enabled:
292 292 if os.name == 'nt':
... ... @@ -704,9 +704,8 @@ SUSPICIOUS_KEYWORDS = {
704 704 'May run code from a DLL using Excel 4 Macros (XLM/XLF)':
705 705 ('REGISTER',),
706 706 'May inject code into another process':
707   - ('CreateThread', 'CreateUserThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload
708   - 'VirtualAllocEx', 'RtlMoveMemory', 'WriteProcessMemory',
709   - 'SetContextThread', 'QueueApcThread', 'WriteVirtualMemory', 'VirtualProtect'
  707 + ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload
  708 + 'VirtualAllocEx', 'RtlMoveMemory', 'WriteProcessMemory'
710 709 ),
711 710 'May run a shellcode in memory':
712 711 ('EnumSystemLanguageGroupsW?', # Used by Hancitor in Oct 2016
... ... @@ -2589,10 +2588,6 @@ class VBA_Parser(object):
2589 2588 #: Encoding for VBA source code and strings returned by all methods
2590 2589 self.encoding = encoding
2591 2590 self.xlm_macros = []
2592   - #: Output from pcodedmp, disassembly of the VBA P-code
2593   - self.pcodedmp_output = None
2594   - #: Flag set to True/False if VBA stomping detected
2595   - self.vba_stomping_detected = None
2596 2591  
2597 2592 # if filename is None:
2598 2593 # if isinstance(_file, basestring):
... ... @@ -3096,7 +3091,7 @@ class VBA_Parser(object):
3096 3091 log.debug('Trace:', exc_trace=True)
3097 3092 else:
3098 3093 raise SubstreamOpenError(self.filename, d.name, exc)
3099   - if self.detect_xlm_macros():
  3094 + if (not self.no_xlm) and self.detect_xlm_macros():
3100 3095 self.contains_macros = True
3101 3096 return self.contains_macros
3102 3097  
... ... @@ -3208,15 +3203,6 @@ class VBA_Parser(object):
3208 3203 for line in self.xlm_macros:
3209 3204 vba_code += "' " + line + '\n'
3210 3205 yield ('xlm_macro', 'xlm_macro', 'xlm_macro.txt', vba_code)
3211   - # Analyse the VBA P-code to detect VBA stomping:
3212   - # If stomping is detected, add a fake VBA module with the P-code as source comments
3213   - # so that VBA_Scanner can find keywords and IOCs in it
3214   - if self.detect_vba_stomping():
3215   - vba_code = ''
3216   - for line in self.pcodedmp_output.splitlines():
3217   - vba_code += "' " + line + '\n'
3218   - yield ('VBA P-code', 'VBA P-code', 'VBA_P-code.txt', vba_code)
3219   -
3220 3206  
3221 3207 def extract_all_macros(self):
3222 3208 """
... ... @@ -3256,13 +3242,6 @@ class VBA_Parser(object):
3256 3242 # Analyze the whole code at once:
3257 3243 scanner = VBA_Scanner(self.vba_code_all_modules)
3258 3244 self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate)
3259   - if self.detect_vba_stomping():
3260   - log.debug('adding VBA stomping to suspicious keywords')
3261   - keyword = 'VBA Stomping'
3262   - description = 'VBA Stomping was detected: the VBA source code and P-code are different, '\
3263   - 'this may have been used to hide malicious code'
3264   - scanner.suspicious_keywords.append((keyword, description))
3265   - scanner.results.append(('Suspicious', keyword, description))
3266 3245 autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary()
3267 3246 self.nb_autoexec += autoexec
3268 3247 self.nb_suspicious += suspicious
... ... @@ -3429,136 +3408,6 @@ class VBA_Parser(object):
3429 3408 for variable in oleform.extract_OleFormVariables(ole, form_storage):
3430 3409 yield (self.filename, '/'.join(form_storage), variable)
3431 3410  
3432   - def extract_pcode(self):
3433   - """
3434   - Extract and disassemble the VBA P-code, using pcodedmp
3435   -
3436   - :return: VBA P-code disassembly
3437   - :rtype: str
3438   - """
3439   - # only run it once:
3440   - if self.pcodedmp_output is None:
3441   - log.debug('Calling pcodedmp to extract and disassemble the VBA P-code')
3442   - # import pcodedmp here to avoid circular imports:
3443   - try:
3444   - from pcodedmp import pcodedmp
3445   - except Exception as e:
3446   - # This may happen with Pypy, because pcodedmp imports win_unicode_console...
3447   - # TODO: this is a workaround, we just ignore P-code
3448   - # TODO: here we just use log.info, because the word "error" in the output makes some of the tests fail...
3449   - log.info('Exception when importing pcodedmp: {}'.format(e))
3450   - self.pcodedmp_output = ''
3451   - return ''
3452   - # logging is disabled after importing pcodedmp, need to re-enable it
3453   - # This is because pcodedmp imports olevba again :-/
3454   - # TODO: here it works only if logging was enabled, need to change pcodedmp!
3455   - enable_logging()
3456   - # pcodedmp prints all its output to sys.stdout, so we need to capture it so that
3457   - # we can process the results later on.
3458   - # save sys.stdout, then modify it to capture pcodedmp's output:
3459   - # stdout = sys.stdout
3460   - if PYTHON2:
3461   - # on Python 2, console output is bytes
3462   - output = BytesIO()
3463   - else:
3464   - # on Python 3, console output is unicode
3465   - output = StringIO()
3466   - # sys.stdout = output
3467   - # we need to fake an argparser for those two args used by pcodedmp:
3468   - class args:
3469   - disasmOnly = True
3470   - verbose = False
3471   - try:
3472   - # TODO: handle files in memory too
3473   - log.debug('before pcodedmp')
3474   - pcodedmp.processFile(self.filename, args, output_file=output)
3475   - log.debug('after pcodedmp')
3476   - except Exception as e:
3477   - # print('Error while running pcodedmp: {}'.format(e), file=sys.stderr, flush=True)
3478   - # set sys.stdout back to its original value
3479   - # sys.stdout = stdout
3480   - log.exception('Error while running pcodedmp')
3481   - # finally:
3482   - # # set sys.stdout back to its original value
3483   - # sys.stdout = stdout
3484   - self.pcodedmp_output = output.getvalue()
3485   - # print(self.pcodedmp_output)
3486   - # log.debug(self.pcodedmp_output)
3487   - return self.pcodedmp_output
3488   -
3489   - def detect_vba_stomping(self):
3490   - """
3491   - Detect VBA stomping, by comparing the keywords present in the P-code and
3492   - in the VBA source code.
3493   -
3494   - :return: True if VBA stomping detected, False otherwise
3495   - :rtype: bool
3496   - """
3497   - # only run it once:
3498   - if self.vba_stomping_detected is None:
3499   - log.debug('Analysing the P-code to detect VBA stomping')
3500   - self.extract_pcode()
3501   - # print('pcodedmp OK')
3502   - log.debug('pcodedmp OK')
3503   - # process the output to extract keywords, to detect VBA stomping
3504   - keywords = set()
3505   - for line in self.pcodedmp_output.splitlines():
3506   - if line.startswith('\t'):
3507   - log.debug('P-code: ' + line.strip())
3508   - tokens = line.split(None, 1)
3509   - mnemonic = tokens[0]
3510   - args = ''
3511   - if len(tokens) == 2:
3512   - args = tokens[1].strip()
3513   - # log.debug(repr([mnemonic, args]))
3514   - # if mnemonic in ('VarDefn',):
3515   - # # just add the rest of the line
3516   - # keywords.add(args)
3517   - # if mnemonic == 'FuncDefn':
3518   - # # function definition: just strip parentheses
3519   - # funcdefn = args.strip('()')
3520   - # keywords.add(funcdefn)
3521   - if mnemonic in ('ArgsCall', 'ArgsLd', 'St', 'Ld', 'MemSt', 'Label'):
3522   - # add 1st argument:
3523   - name = args.split(None, 1)[0]
3524   - # sometimes pcodedmp reports names like "id_FFFF", which are not
3525   - # directly present in the VBA source code
3526   - # (for example "Me" in VBA appears as id_FFFF in P-code)
3527   - if not name.startswith('id_'):
3528   - keywords.add(name)
3529   - if mnemonic == 'LitStr':
3530   - # re_string = re.compile(r'\"([^\"]|\"\")*\"')
3531   - # for match in re_string.finditer(line):
3532   - # print('\t' + match.group())
3533   - # the string is the 2nd argument:
3534   - s = args.split(None, 1)[1]
3535   - # tricky issue: when a string contains double quotes inside,
3536   - # pcodedmp returns a single ", whereas in the VBA source code
3537   - # it is always a double "".
3538   - # We have to remove the " around the strings, then double the remaining ",
3539   - # and put back the " around:
3540   - if len(s)>=2:
3541   - assert(s[0]=='"' and s[-1]=='"')
3542   - s = s[1:-1]
3543   - s = s.replace('"', '""')
3544   - s = '"' + s + '"'
3545   - keywords.add(s)
3546   - log.debug('Keywords extracted from P-code: ' + repr(sorted(keywords)))
3547   - self.vba_stomping_detected = False
3548   - # TODO: add a method to get all VBA code as one string
3549   - vba_code_all_modules = ''
3550   - for (_, _, _, vba_code) in self.extract_all_macros():
3551   - vba_code_all_modules += vba_code + '\n'
3552   - for keyword in keywords:
3553   - if keyword not in vba_code_all_modules:
3554   - log.debug('Keyword {!r} not found in VBA code'.format(keyword))
3555   - log.debug('VBA STOMPING DETECTED!')
3556   - self.vba_stomping_detected = True
3557   - break
3558   - if not self.vba_stomping_detected:
3559   - log.debug('No VBA stomping detected.')
3560   - return self.vba_stomping_detected
3561   -
3562 3411 def close(self):
3563 3412 """
3564 3413 Close all the open files. This method must be called after usage, if
... ... @@ -3629,8 +3478,6 @@ class VBA_Parser_CLI(VBA_Parser):
3629 3478 color_type = COLOR_TYPE.get(kw_type, None)
3630 3479 t.write_row((kw_type, keyword, description), colors=(color_type, None, None))
3631 3480 t.close()
3632   - if self.vba_stomping_detected:
3633   - print('VBA Stomping detection is experimental: please report any false positive/negative at https://github.com/decalage2/oletools/issues')
3634 3481 else:
3635 3482 print('No suspicious keyword or IOC found.')
3636 3483  
... ... @@ -3673,7 +3520,7 @@ class VBA_Parser_CLI(VBA_Parser):
3673 3520 def process_file(self, show_decoded_strings=False,
3674 3521 display_code=True, hide_attributes=True,
3675 3522 vba_code_only=False, show_deobfuscated_code=False,
3676   - deobfuscate=False, pcode=False):
  3523 + deobfuscate=False, pcode=False, no_xlm=False):
3677 3524 """
3678 3525 Process a single file
3679 3526  
... ... @@ -3686,9 +3533,11 @@ class VBA_Parser_CLI(VBA_Parser):
3686 3533 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
3687 3534 :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
3688 3535 :param pcode bool: if True, call pcodedmp to disassemble P-code and display it
  3536 + :param no_xlm bool: if True, don't use the BIFF plugin to extract old style XLM macros
3689 3537 """
3690 3538 #TODO: replace print by writing to a provided output file (sys.stdout by default)
3691 3539 # fix conflicting parameters:
  3540 + self.no_xlm = no_xlm
3692 3541 if vba_code_only and not display_code:
3693 3542 display_code = True
3694 3543 if self.container:
... ... @@ -3758,8 +3607,30 @@ class VBA_Parser_CLI(VBA_Parser):
3758 3607 if pcode:
3759 3608 print('-' * 79)
3760 3609 print('P-CODE disassembly:')
3761   - pcode = self.extract_pcode()
3762   - print(pcode)
  3610 + # pcodedmp prints all its output to sys.stdout, so we need to capture it so that
  3611 + # we can process the results later on.
  3612 + # save sys.stdout, then modify it to capture pcodedmp's output:
  3613 + stdout = sys.stdout
  3614 + if PYTHON2:
  3615 + # on Python 2, console output is bytes
  3616 + output = BytesIO()
  3617 + else:
  3618 + # on Python 3, console output is unicode
  3619 + output = StringIO()
  3620 + sys.stdout = output
  3621 + # we need to fake an argparser for those two args used by pcodedmp:
  3622 + class args:
  3623 + disasmOnly = True
  3624 + verbose = False
  3625 + try:
  3626 + # TODO: handle files in memory too
  3627 + pcodedmp.processFile(self.filename, args)
  3628 + except Exception:
  3629 + log.error('Error while running pcodedmp')
  3630 + finally:
  3631 + # set sys.stdout back to its original value
  3632 + sys.stdout = stdout
  3633 + print(output.getvalue())
3763 3634  
3764 3635 if not vba_code_only:
3765 3636 # analyse the code from all modules at once:
... ... @@ -3782,7 +3653,7 @@ class VBA_Parser_CLI(VBA_Parser):
3782 3653 def process_file_json(self, show_decoded_strings=False,
3783 3654 display_code=True, hide_attributes=True,
3784 3655 vba_code_only=False, show_deobfuscated_code=False,
3785   - deobfuscate=False):
  3656 + deobfuscate=False, no_xlm=False):
3786 3657 """
3787 3658 Process a single file
3788 3659  
... ... @@ -3799,6 +3670,7 @@ class VBA_Parser_CLI(VBA_Parser):
3799 3670 """
3800 3671 #TODO: fix conflicting parameters (?)
3801 3672  
  3673 + self.no_xlm = no_xlm
3802 3674 if vba_code_only and not display_code:
3803 3675 display_code = True
3804 3676  
... ... @@ -3949,6 +3821,8 @@ def parse_args(cmd_line_args=None):
3949 3821 help="Do not raise errors if opening of substream fails")
3950 3822 parser.add_option('--pcode', dest="pcode", action="store_true", default=False,
3951 3823 help="Disassemble and display the P-code (using pcodedmp)")
  3824 + parser.add_option('--no-xlm', dest="no_xlm", action="store_true", default=False,
  3825 + help="Do not extract XLM Excel macros. This may speed up analysis of large files.")
3952 3826  
3953 3827 (options, args) = parser.parse_args(cmd_line_args)
3954 3828  
... ... @@ -3983,21 +3857,21 @@ def process_file(filename, data, container, options, crypto_nesting=0):
3983 3857 if options.output_mode == 'detailed':
3984 3858 # fully detailed output
3985 3859 vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
3986   - display_code=options.display_code,
3987   - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
3988   - show_deobfuscated_code=options.show_deobfuscated_code,
3989   - deobfuscate=options.deobfuscate, pcode=options.pcode)
  3860 + display_code=options.display_code,
  3861 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3862 + show_deobfuscated_code=options.show_deobfuscated_code,
  3863 + deobfuscate=options.deobfuscate, pcode=options.pcode, no_xlm=options.no_xlm)
3990 3864 elif options.output_mode == 'triage':
3991 3865 # summarized output for triage:
3992 3866 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
3993   - deobfuscate=options.deobfuscate)
  3867 + deobfuscate=options.deobfuscate, no_xlm=options.no_xlm)
3994 3868 elif options.output_mode == 'json':
3995 3869 print_json(
3996 3870 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
3997 3871 display_code=options.display_code,
3998 3872 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
3999 3873 show_deobfuscated_code=options.show_deobfuscated_code,
4000   - deobfuscate=options.deobfuscate))
  3874 + deobfuscate=options.deobfuscate, no_xlm=options.no_xlm))
4001 3875 else: # (should be impossible)
4002 3876 raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
4003 3877  
... ... @@ -4064,13 +3938,8 @@ def process_file(filename, data, container, options, crypto_nesting=0):
4064 3938 except Exception:
4065 3939 raise
4066 3940 finally: # clean up
4067   - try:
4068   - log.debug('Removing crypt temp file {}'.format(decrypted_file))
  3941 + if decrypted_file is not None and os.path.isfile(decrypted_file):
4069 3942 os.unlink(decrypted_file)
4070   - except Exception: # e.g. file does not exist or is None
4071   - pass
4072   - # no idea what to return now
4073   - raise Exception('Programming error -- should never have reached this!')
4074 3943  
4075 3944  
4076 3945 def main(cmd_line_args=None):
... ...