Commit 4188b3d137c358ecb4e80e4f78424b609d7a23ac

Authored by kirk-sayre-work
1 parent 677d9ad5

Added --no-xlm option to skip extracting XLM macros.

Showing 1 changed file with 43 additions and 174 deletions
oletools/olevba.py
@@ -217,9 +217,8 @@ from __future__ import print_function @@ -217,9 +217,8 @@ from __future__ import print_function
217 # 2019-03-25 CH: - added decryption of password-protected files 217 # 2019-03-25 CH: - added decryption of password-protected files
218 # 2019-04-09 PL: - decompress_stream accepts bytes (issue #422) 218 # 2019-04-09 PL: - decompress_stream accepts bytes (issue #422)
219 # 2019-05-23 v0.55 PL: - added option --pcode to call pcodedmp and display P-code 219 # 2019-05-23 v0.55 PL: - added option --pcode to call pcodedmp and display P-code
220 -# 2019-06-05 PL: - added VBA stomping detection  
221 220
222 -__version__ = '0.55.dev3' 221 +__version__ = '0.55.dev1'
223 222
224 #------------------------------------------------------------------------------ 223 #------------------------------------------------------------------------------
225 # TODO: 224 # TODO:
@@ -287,6 +286,7 @@ except ImportError: @@ -287,6 +286,7 @@ except ImportError:
287 + "or http://effbot.org/zone/element-index.htm") 286 + "or http://effbot.org/zone/element-index.htm")
288 287
289 import colorclass 288 import colorclass
  289 +from pcodedmp import pcodedmp
290 290
291 # On Windows, colorclass needs to be enabled: 291 # On Windows, colorclass needs to be enabled:
292 if os.name == 'nt': 292 if os.name == 'nt':
@@ -704,9 +704,8 @@ SUSPICIOUS_KEYWORDS = { @@ -704,9 +704,8 @@ SUSPICIOUS_KEYWORDS = {
704 'May run code from a DLL using Excel 4 Macros (XLM/XLF)': 704 'May run code from a DLL using Excel 4 Macros (XLM/XLF)':
705 ('REGISTER',), 705 ('REGISTER',),
706 'May inject code into another process': 706 'May inject code into another process':
707 - ('CreateThread', 'CreateUserThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload  
708 - 'VirtualAllocEx', 'RtlMoveMemory', 'WriteProcessMemory',  
709 - 'SetContextThread', 'QueueApcThread', 'WriteVirtualMemory', 'VirtualProtect' 707 + ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload
  708 + 'VirtualAllocEx', 'RtlMoveMemory', 'WriteProcessMemory'
710 ), 709 ),
711 'May run a shellcode in memory': 710 'May run a shellcode in memory':
712 ('EnumSystemLanguageGroupsW?', # Used by Hancitor in Oct 2016 711 ('EnumSystemLanguageGroupsW?', # Used by Hancitor in Oct 2016
@@ -2589,10 +2588,6 @@ class VBA_Parser(object): @@ -2589,10 +2588,6 @@ class VBA_Parser(object):
2589 #: Encoding for VBA source code and strings returned by all methods 2588 #: Encoding for VBA source code and strings returned by all methods
2590 self.encoding = encoding 2589 self.encoding = encoding
2591 self.xlm_macros = [] 2590 self.xlm_macros = []
2592 - #: Output from pcodedmp, disassembly of the VBA P-code  
2593 - self.pcodedmp_output = None  
2594 - #: Flag set to True/False if VBA stomping detected  
2595 - self.vba_stomping_detected = None  
2596 2591
2597 # if filename is None: 2592 # if filename is None:
2598 # if isinstance(_file, basestring): 2593 # if isinstance(_file, basestring):
@@ -3096,7 +3091,7 @@ class VBA_Parser(object): @@ -3096,7 +3091,7 @@ class VBA_Parser(object):
3096 log.debug('Trace:', exc_trace=True) 3091 log.debug('Trace:', exc_trace=True)
3097 else: 3092 else:
3098 raise SubstreamOpenError(self.filename, d.name, exc) 3093 raise SubstreamOpenError(self.filename, d.name, exc)
3099 - if self.detect_xlm_macros(): 3094 + if (not self.no_xlm) and self.detect_xlm_macros():
3100 self.contains_macros = True 3095 self.contains_macros = True
3101 return self.contains_macros 3096 return self.contains_macros
3102 3097
@@ -3208,15 +3203,6 @@ class VBA_Parser(object): @@ -3208,15 +3203,6 @@ class VBA_Parser(object):
3208 for line in self.xlm_macros: 3203 for line in self.xlm_macros:
3209 vba_code += "' " + line + '\n' 3204 vba_code += "' " + line + '\n'
3210 yield ('xlm_macro', 'xlm_macro', 'xlm_macro.txt', vba_code) 3205 yield ('xlm_macro', 'xlm_macro', 'xlm_macro.txt', vba_code)
3211 - # Analyse the VBA P-code to detect VBA stomping:  
3212 - # If stomping is detected, add a fake VBA module with the P-code as source comments  
3213 - # so that VBA_Scanner can find keywords and IOCs in it  
3214 - if self.detect_vba_stomping():  
3215 - vba_code = ''  
3216 - for line in self.pcodedmp_output.splitlines():  
3217 - vba_code += "' " + line + '\n'  
3218 - yield ('VBA P-code', 'VBA P-code', 'VBA_P-code.txt', vba_code)  
3219 -  
3220 3206
3221 def extract_all_macros(self): 3207 def extract_all_macros(self):
3222 """ 3208 """
@@ -3256,13 +3242,6 @@ class VBA_Parser(object): @@ -3256,13 +3242,6 @@ class VBA_Parser(object):
3256 # Analyze the whole code at once: 3242 # Analyze the whole code at once:
3257 scanner = VBA_Scanner(self.vba_code_all_modules) 3243 scanner = VBA_Scanner(self.vba_code_all_modules)
3258 self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate) 3244 self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate)
3259 - if self.detect_vba_stomping():  
3260 - log.debug('adding VBA stomping to suspicious keywords')  
3261 - keyword = 'VBA Stomping'  
3262 - description = 'VBA Stomping was detected: the VBA source code and P-code are different, '\  
3263 - 'this may have been used to hide malicious code'  
3264 - scanner.suspicious_keywords.append((keyword, description))  
3265 - scanner.results.append(('Suspicious', keyword, description))  
3266 autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary() 3245 autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary()
3267 self.nb_autoexec += autoexec 3246 self.nb_autoexec += autoexec
3268 self.nb_suspicious += suspicious 3247 self.nb_suspicious += suspicious
@@ -3429,136 +3408,6 @@ class VBA_Parser(object): @@ -3429,136 +3408,6 @@ class VBA_Parser(object):
3429 for variable in oleform.extract_OleFormVariables(ole, form_storage): 3408 for variable in oleform.extract_OleFormVariables(ole, form_storage):
3430 yield (self.filename, '/'.join(form_storage), variable) 3409 yield (self.filename, '/'.join(form_storage), variable)
3431 3410
3432 - def extract_pcode(self):  
3433 - """  
3434 - Extract and disassemble the VBA P-code, using pcodedmp  
3435 -  
3436 - :return: VBA P-code disassembly  
3437 - :rtype: str  
3438 - """  
3439 - # only run it once:  
3440 - if self.pcodedmp_output is None:  
3441 - log.debug('Calling pcodedmp to extract and disassemble the VBA P-code')  
3442 - # import pcodedmp here to avoid circular imports:  
3443 - try:  
3444 - from pcodedmp import pcodedmp  
3445 - except Exception as e:  
3446 - # This may happen with Pypy, because pcodedmp imports win_unicode_console...  
3447 - # TODO: this is a workaround, we just ignore P-code  
3448 - # TODO: here we just use log.info, because the word "error" in the output makes some of the tests fail...  
3449 - log.info('Exception when importing pcodedmp: {}'.format(e))  
3450 - self.pcodedmp_output = ''  
3451 - return ''  
3452 - # logging is disabled after importing pcodedmp, need to re-enable it  
3453 - # This is because pcodedmp imports olevba again :-/  
3454 - # TODO: here it works only if logging was enabled, need to change pcodedmp!  
3455 - enable_logging()  
3456 - # pcodedmp prints all its output to sys.stdout, so we need to capture it so that  
3457 - # we can process the results later on.  
3458 - # save sys.stdout, then modify it to capture pcodedmp's output:  
3459 - # stdout = sys.stdout  
3460 - if PYTHON2:  
3461 - # on Python 2, console output is bytes  
3462 - output = BytesIO()  
3463 - else:  
3464 - # on Python 3, console output is unicode  
3465 - output = StringIO()  
3466 - # sys.stdout = output  
3467 - # we need to fake an argparser for those two args used by pcodedmp:  
3468 - class args:  
3469 - disasmOnly = True  
3470 - verbose = False  
3471 - try:  
3472 - # TODO: handle files in memory too  
3473 - log.debug('before pcodedmp')  
3474 - pcodedmp.processFile(self.filename, args, output_file=output)  
3475 - log.debug('after pcodedmp')  
3476 - except Exception as e:  
3477 - # print('Error while running pcodedmp: {}'.format(e), file=sys.stderr, flush=True)  
3478 - # set sys.stdout back to its original value  
3479 - # sys.stdout = stdout  
3480 - log.exception('Error while running pcodedmp')  
3481 - # finally:  
3482 - # # set sys.stdout back to its original value  
3483 - # sys.stdout = stdout  
3484 - self.pcodedmp_output = output.getvalue()  
3485 - # print(self.pcodedmp_output)  
3486 - # log.debug(self.pcodedmp_output)  
3487 - return self.pcodedmp_output  
3488 -  
3489 - def detect_vba_stomping(self):  
3490 - """  
3491 - Detect VBA stomping, by comparing the keywords present in the P-code and  
3492 - in the VBA source code.  
3493 -  
3494 - :return: True if VBA stomping detected, False otherwise  
3495 - :rtype: bool  
3496 - """  
3497 - # only run it once:  
3498 - if self.vba_stomping_detected is None:  
3499 - log.debug('Analysing the P-code to detect VBA stomping')  
3500 - self.extract_pcode()  
3501 - # print('pcodedmp OK')  
3502 - log.debug('pcodedmp OK')  
3503 - # process the output to extract keywords, to detect VBA stomping  
3504 - keywords = set()  
3505 - for line in self.pcodedmp_output.splitlines():  
3506 - if line.startswith('\t'):  
3507 - log.debug('P-code: ' + line.strip())  
3508 - tokens = line.split(None, 1)  
3509 - mnemonic = tokens[0]  
3510 - args = ''  
3511 - if len(tokens) == 2:  
3512 - args = tokens[1].strip()  
3513 - # log.debug(repr([mnemonic, args]))  
3514 - # if mnemonic in ('VarDefn',):  
3515 - # # just add the rest of the line  
3516 - # keywords.add(args)  
3517 - # if mnemonic == 'FuncDefn':  
3518 - # # function definition: just strip parentheses  
3519 - # funcdefn = args.strip('()')  
3520 - # keywords.add(funcdefn)  
3521 - if mnemonic in ('ArgsCall', 'ArgsLd', 'St', 'Ld', 'MemSt', 'Label'):  
3522 - # add 1st argument:  
3523 - name = args.split(None, 1)[0]  
3524 - # sometimes pcodedmp reports names like "id_FFFF", which are not  
3525 - # directly present in the VBA source code  
3526 - # (for example "Me" in VBA appears as id_FFFF in P-code)  
3527 - if not name.startswith('id_'):  
3528 - keywords.add(name)  
3529 - if mnemonic == 'LitStr':  
3530 - # re_string = re.compile(r'\"([^\"]|\"\")*\"')  
3531 - # for match in re_string.finditer(line):  
3532 - # print('\t' + match.group())  
3533 - # the string is the 2nd argument:  
3534 - s = args.split(None, 1)[1]  
3535 - # tricky issue: when a string contains double quotes inside,  
3536 - # pcodedmp returns a single ", whereas in the VBA source code  
3537 - # it is always a double "".  
3538 - # We have to remove the " around the strings, then double the remaining ",  
3539 - # and put back the " around:  
3540 - if len(s)>=2:  
3541 - assert(s[0]=='"' and s[-1]=='"')  
3542 - s = s[1:-1]  
3543 - s = s.replace('"', '""')  
3544 - s = '"' + s + '"'  
3545 - keywords.add(s)  
3546 - log.debug('Keywords extracted from P-code: ' + repr(sorted(keywords)))  
3547 - self.vba_stomping_detected = False  
3548 - # TODO: add a method to get all VBA code as one string  
3549 - vba_code_all_modules = ''  
3550 - for (_, _, _, vba_code) in self.extract_all_macros():  
3551 - vba_code_all_modules += vba_code + '\n'  
3552 - for keyword in keywords:  
3553 - if keyword not in vba_code_all_modules:  
3554 - log.debug('Keyword {!r} not found in VBA code'.format(keyword))  
3555 - log.debug('VBA STOMPING DETECTED!')  
3556 - self.vba_stomping_detected = True  
3557 - break  
3558 - if not self.vba_stomping_detected:  
3559 - log.debug('No VBA stomping detected.')  
3560 - return self.vba_stomping_detected  
3561 -  
3562 def close(self): 3411 def close(self):
3563 """ 3412 """
3564 Close all the open files. This method must be called after usage, if 3413 Close all the open files. This method must be called after usage, if
@@ -3629,8 +3478,6 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3629,8 +3478,6 @@ class VBA_Parser_CLI(VBA_Parser):
3629 color_type = COLOR_TYPE.get(kw_type, None) 3478 color_type = COLOR_TYPE.get(kw_type, None)
3630 t.write_row((kw_type, keyword, description), colors=(color_type, None, None)) 3479 t.write_row((kw_type, keyword, description), colors=(color_type, None, None))
3631 t.close() 3480 t.close()
3632 - if self.vba_stomping_detected:  
3633 - print('VBA Stomping detection is experimental: please report any false positive/negative at https://github.com/decalage2/oletools/issues')  
3634 else: 3481 else:
3635 print('No suspicious keyword or IOC found.') 3482 print('No suspicious keyword or IOC found.')
3636 3483
@@ -3673,7 +3520,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3673,7 +3520,7 @@ class VBA_Parser_CLI(VBA_Parser):
3673 def process_file(self, show_decoded_strings=False, 3520 def process_file(self, show_decoded_strings=False,
3674 display_code=True, hide_attributes=True, 3521 display_code=True, hide_attributes=True,
3675 vba_code_only=False, show_deobfuscated_code=False, 3522 vba_code_only=False, show_deobfuscated_code=False,
3676 - deobfuscate=False, pcode=False): 3523 + deobfuscate=False, pcode=False, no_xlm=False):
3677 """ 3524 """
3678 Process a single file 3525 Process a single file
3679 3526
@@ -3686,9 +3533,11 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3686,9 +3533,11 @@ class VBA_Parser_CLI(VBA_Parser):
3686 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) 3533 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
3687 :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) 3534 :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
3688 :param pcode bool: if True, call pcodedmp to disassemble P-code and display it 3535 :param pcode bool: if True, call pcodedmp to disassemble P-code and display it
  3536 + :param no_xlm bool: if True, don't use the BIFF plugin to extract old style XLM macros
3689 """ 3537 """
3690 #TODO: replace print by writing to a provided output file (sys.stdout by default) 3538 #TODO: replace print by writing to a provided output file (sys.stdout by default)
3691 # fix conflicting parameters: 3539 # fix conflicting parameters:
  3540 + self.no_xlm = no_xlm
3692 if vba_code_only and not display_code: 3541 if vba_code_only and not display_code:
3693 display_code = True 3542 display_code = True
3694 if self.container: 3543 if self.container:
@@ -3758,8 +3607,30 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3758,8 +3607,30 @@ class VBA_Parser_CLI(VBA_Parser):
3758 if pcode: 3607 if pcode:
3759 print('-' * 79) 3608 print('-' * 79)
3760 print('P-CODE disassembly:') 3609 print('P-CODE disassembly:')
3761 - pcode = self.extract_pcode()  
3762 - print(pcode) 3610 + # pcodedmp prints all its output to sys.stdout, so we need to capture it so that
  3611 + # we can process the results later on.
  3612 + # save sys.stdout, then modify it to capture pcodedmp's output:
  3613 + stdout = sys.stdout
  3614 + if PYTHON2:
  3615 + # on Python 2, console output is bytes
  3616 + output = BytesIO()
  3617 + else:
  3618 + # on Python 3, console output is unicode
  3619 + output = StringIO()
  3620 + sys.stdout = output
  3621 + # we need to fake an argparser for those two args used by pcodedmp:
  3622 + class args:
  3623 + disasmOnly = True
  3624 + verbose = False
  3625 + try:
  3626 + # TODO: handle files in memory too
  3627 + pcodedmp.processFile(self.filename, args)
  3628 + except Exception:
  3629 + log.error('Error while running pcodedmp')
  3630 + finally:
  3631 + # set sys.stdout back to its original value
  3632 + sys.stdout = stdout
  3633 + print(output.getvalue())
3763 3634
3764 if not vba_code_only: 3635 if not vba_code_only:
3765 # analyse the code from all modules at once: 3636 # analyse the code from all modules at once:
@@ -3782,7 +3653,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3782,7 +3653,7 @@ class VBA_Parser_CLI(VBA_Parser):
3782 def process_file_json(self, show_decoded_strings=False, 3653 def process_file_json(self, show_decoded_strings=False,
3783 display_code=True, hide_attributes=True, 3654 display_code=True, hide_attributes=True,
3784 vba_code_only=False, show_deobfuscated_code=False, 3655 vba_code_only=False, show_deobfuscated_code=False,
3785 - deobfuscate=False): 3656 + deobfuscate=False, no_xlm=False):
3786 """ 3657 """
3787 Process a single file 3658 Process a single file
3788 3659
@@ -3799,6 +3670,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -3799,6 +3670,7 @@ class VBA_Parser_CLI(VBA_Parser):
3799 """ 3670 """
3800 #TODO: fix conflicting parameters (?) 3671 #TODO: fix conflicting parameters (?)
3801 3672
  3673 + self.no_xlm = no_xlm
3802 if vba_code_only and not display_code: 3674 if vba_code_only and not display_code:
3803 display_code = True 3675 display_code = True
3804 3676
@@ -3949,6 +3821,8 @@ def parse_args(cmd_line_args=None): @@ -3949,6 +3821,8 @@ def parse_args(cmd_line_args=None):
3949 help="Do not raise errors if opening of substream fails") 3821 help="Do not raise errors if opening of substream fails")
3950 parser.add_option('--pcode', dest="pcode", action="store_true", default=False, 3822 parser.add_option('--pcode', dest="pcode", action="store_true", default=False,
3951 help="Disassemble and display the P-code (using pcodedmp)") 3823 help="Disassemble and display the P-code (using pcodedmp)")
  3824 + parser.add_option('--no-xlm', dest="no_xlm", action="store_true", default=False,
  3825 + help="Do not extract XLM Excel macros. This may speed up analysis of large files.")
3952 3826
3953 (options, args) = parser.parse_args(cmd_line_args) 3827 (options, args) = parser.parse_args(cmd_line_args)
3954 3828
@@ -3983,21 +3857,21 @@ def process_file(filename, data, container, options, crypto_nesting=0): @@ -3983,21 +3857,21 @@ def process_file(filename, data, container, options, crypto_nesting=0):
3983 if options.output_mode == 'detailed': 3857 if options.output_mode == 'detailed':
3984 # fully detailed output 3858 # fully detailed output
3985 vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, 3859 vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
3986 - display_code=options.display_code,  
3987 - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,  
3988 - show_deobfuscated_code=options.show_deobfuscated_code,  
3989 - deobfuscate=options.deobfuscate, pcode=options.pcode) 3860 + display_code=options.display_code,
  3861 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3862 + show_deobfuscated_code=options.show_deobfuscated_code,
  3863 + deobfuscate=options.deobfuscate, pcode=options.pcode, no_xlm=options.no_xlm)
3990 elif options.output_mode == 'triage': 3864 elif options.output_mode == 'triage':
3991 # summarized output for triage: 3865 # summarized output for triage:
3992 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, 3866 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
3993 - deobfuscate=options.deobfuscate) 3867 + deobfuscate=options.deobfuscate, no_xlm=options.no_xlm)
3994 elif options.output_mode == 'json': 3868 elif options.output_mode == 'json':
3995 print_json( 3869 print_json(
3996 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, 3870 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
3997 display_code=options.display_code, 3871 display_code=options.display_code,
3998 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, 3872 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
3999 show_deobfuscated_code=options.show_deobfuscated_code, 3873 show_deobfuscated_code=options.show_deobfuscated_code,
4000 - deobfuscate=options.deobfuscate)) 3874 + deobfuscate=options.deobfuscate, no_xlm=options.no_xlm))
4001 else: # (should be impossible) 3875 else: # (should be impossible)
4002 raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) 3876 raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
4003 3877
@@ -4064,13 +3938,8 @@ def process_file(filename, data, container, options, crypto_nesting=0): @@ -4064,13 +3938,8 @@ def process_file(filename, data, container, options, crypto_nesting=0):
4064 except Exception: 3938 except Exception:
4065 raise 3939 raise
4066 finally: # clean up 3940 finally: # clean up
4067 - try:  
4068 - log.debug('Removing crypt temp file {}'.format(decrypted_file)) 3941 + if decrypted_file is not None and os.path.isfile(decrypted_file):
4069 os.unlink(decrypted_file) 3942 os.unlink(decrypted_file)
4070 - except Exception: # e.g. file does not exist or is None  
4071 - pass  
4072 - # no idea what to return now  
4073 - raise Exception('Programming error -- should never have reached this!')  
4074 3943
4075 3944
4076 def main(cmd_line_args=None): 3945 def main(cmd_line_args=None):