Commit 4188b3d137c358ecb4e80e4f78424b609d7a23ac
1 parent
677d9ad5
Added --no-xlm option to skip extracting XLM macros.
Showing
1 changed file
with
43 additions
and
174 deletions
oletools/olevba.py
| ... | ... | @@ -217,9 +217,8 @@ from __future__ import print_function |
| 217 | 217 | # 2019-03-25 CH: - added decryption of password-protected files |
| 218 | 218 | # 2019-04-09 PL: - decompress_stream accepts bytes (issue #422) |
| 219 | 219 | # 2019-05-23 v0.55 PL: - added option --pcode to call pcodedmp and display P-code |
| 220 | -# 2019-06-05 PL: - added VBA stomping detection | |
| 221 | 220 | |
| 222 | -__version__ = '0.55.dev3' | |
| 221 | +__version__ = '0.55.dev1' | |
| 223 | 222 | |
| 224 | 223 | #------------------------------------------------------------------------------ |
| 225 | 224 | # TODO: |
| ... | ... | @@ -287,6 +286,7 @@ except ImportError: |
| 287 | 286 | + "or http://effbot.org/zone/element-index.htm") |
| 288 | 287 | |
| 289 | 288 | import colorclass |
| 289 | +from pcodedmp import pcodedmp | |
| 290 | 290 | |
| 291 | 291 | # On Windows, colorclass needs to be enabled: |
| 292 | 292 | if os.name == 'nt': |
| ... | ... | @@ -704,9 +704,8 @@ SUSPICIOUS_KEYWORDS = { |
| 704 | 704 | 'May run code from a DLL using Excel 4 Macros (XLM/XLF)': |
| 705 | 705 | ('REGISTER',), |
| 706 | 706 | 'May inject code into another process': |
| 707 | - ('CreateThread', 'CreateUserThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload | |
| 708 | - 'VirtualAllocEx', 'RtlMoveMemory', 'WriteProcessMemory', | |
| 709 | - 'SetContextThread', 'QueueApcThread', 'WriteVirtualMemory', 'VirtualProtect' | |
| 707 | + ('CreateThread', 'VirtualAlloc', # (issue #9) suggested by Davy Douhine - used by MSF payload | |
| 708 | + 'VirtualAllocEx', 'RtlMoveMemory', 'WriteProcessMemory' | |
| 710 | 709 | ), |
| 711 | 710 | 'May run a shellcode in memory': |
| 712 | 711 | ('EnumSystemLanguageGroupsW?', # Used by Hancitor in Oct 2016 |
| ... | ... | @@ -2589,10 +2588,6 @@ class VBA_Parser(object): |
| 2589 | 2588 | #: Encoding for VBA source code and strings returned by all methods |
| 2590 | 2589 | self.encoding = encoding |
| 2591 | 2590 | self.xlm_macros = [] |
| 2592 | - #: Output from pcodedmp, disassembly of the VBA P-code | |
| 2593 | - self.pcodedmp_output = None | |
| 2594 | - #: Flag set to True/False if VBA stomping detected | |
| 2595 | - self.vba_stomping_detected = None | |
| 2596 | 2591 | |
| 2597 | 2592 | # if filename is None: |
| 2598 | 2593 | # if isinstance(_file, basestring): |
| ... | ... | @@ -3096,7 +3091,7 @@ class VBA_Parser(object): |
| 3096 | 3091 | log.debug('Trace:', exc_trace=True) |
| 3097 | 3092 | else: |
| 3098 | 3093 | raise SubstreamOpenError(self.filename, d.name, exc) |
| 3099 | - if self.detect_xlm_macros(): | |
| 3094 | + if (not self.no_xlm) and self.detect_xlm_macros(): | |
| 3100 | 3095 | self.contains_macros = True |
| 3101 | 3096 | return self.contains_macros |
| 3102 | 3097 | |
| ... | ... | @@ -3208,15 +3203,6 @@ class VBA_Parser(object): |
| 3208 | 3203 | for line in self.xlm_macros: |
| 3209 | 3204 | vba_code += "' " + line + '\n' |
| 3210 | 3205 | yield ('xlm_macro', 'xlm_macro', 'xlm_macro.txt', vba_code) |
| 3211 | - # Analyse the VBA P-code to detect VBA stomping: | |
| 3212 | - # If stomping is detected, add a fake VBA module with the P-code as source comments | |
| 3213 | - # so that VBA_Scanner can find keywords and IOCs in it | |
| 3214 | - if self.detect_vba_stomping(): | |
| 3215 | - vba_code = '' | |
| 3216 | - for line in self.pcodedmp_output.splitlines(): | |
| 3217 | - vba_code += "' " + line + '\n' | |
| 3218 | - yield ('VBA P-code', 'VBA P-code', 'VBA_P-code.txt', vba_code) | |
| 3219 | - | |
| 3220 | 3206 | |
| 3221 | 3207 | def extract_all_macros(self): |
| 3222 | 3208 | """ |
| ... | ... | @@ -3256,13 +3242,6 @@ class VBA_Parser(object): |
| 3256 | 3242 | # Analyze the whole code at once: |
| 3257 | 3243 | scanner = VBA_Scanner(self.vba_code_all_modules) |
| 3258 | 3244 | self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate) |
| 3259 | - if self.detect_vba_stomping(): | |
| 3260 | - log.debug('adding VBA stomping to suspicious keywords') | |
| 3261 | - keyword = 'VBA Stomping' | |
| 3262 | - description = 'VBA Stomping was detected: the VBA source code and P-code are different, '\ | |
| 3263 | - 'this may have been used to hide malicious code' | |
| 3264 | - scanner.suspicious_keywords.append((keyword, description)) | |
| 3265 | - scanner.results.append(('Suspicious', keyword, description)) | |
| 3266 | 3245 | autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary() |
| 3267 | 3246 | self.nb_autoexec += autoexec |
| 3268 | 3247 | self.nb_suspicious += suspicious |
| ... | ... | @@ -3429,136 +3408,6 @@ class VBA_Parser(object): |
| 3429 | 3408 | for variable in oleform.extract_OleFormVariables(ole, form_storage): |
| 3430 | 3409 | yield (self.filename, '/'.join(form_storage), variable) |
| 3431 | 3410 | |
| 3432 | - def extract_pcode(self): | |
| 3433 | - """ | |
| 3434 | - Extract and disassemble the VBA P-code, using pcodedmp | |
| 3435 | - | |
| 3436 | - :return: VBA P-code disassembly | |
| 3437 | - :rtype: str | |
| 3438 | - """ | |
| 3439 | - # only run it once: | |
| 3440 | - if self.pcodedmp_output is None: | |
| 3441 | - log.debug('Calling pcodedmp to extract and disassemble the VBA P-code') | |
| 3442 | - # import pcodedmp here to avoid circular imports: | |
| 3443 | - try: | |
| 3444 | - from pcodedmp import pcodedmp | |
| 3445 | - except Exception as e: | |
| 3446 | - # This may happen with Pypy, because pcodedmp imports win_unicode_console... | |
| 3447 | - # TODO: this is a workaround, we just ignore P-code | |
| 3448 | - # TODO: here we just use log.info, because the word "error" in the output makes some of the tests fail... | |
| 3449 | - log.info('Exception when importing pcodedmp: {}'.format(e)) | |
| 3450 | - self.pcodedmp_output = '' | |
| 3451 | - return '' | |
| 3452 | - # logging is disabled after importing pcodedmp, need to re-enable it | |
| 3453 | - # This is because pcodedmp imports olevba again :-/ | |
| 3454 | - # TODO: here it works only if logging was enabled, need to change pcodedmp! | |
| 3455 | - enable_logging() | |
| 3456 | - # pcodedmp prints all its output to sys.stdout, so we need to capture it so that | |
| 3457 | - # we can process the results later on. | |
| 3458 | - # save sys.stdout, then modify it to capture pcodedmp's output: | |
| 3459 | - # stdout = sys.stdout | |
| 3460 | - if PYTHON2: | |
| 3461 | - # on Python 2, console output is bytes | |
| 3462 | - output = BytesIO() | |
| 3463 | - else: | |
| 3464 | - # on Python 3, console output is unicode | |
| 3465 | - output = StringIO() | |
| 3466 | - # sys.stdout = output | |
| 3467 | - # we need to fake an argparser for those two args used by pcodedmp: | |
| 3468 | - class args: | |
| 3469 | - disasmOnly = True | |
| 3470 | - verbose = False | |
| 3471 | - try: | |
| 3472 | - # TODO: handle files in memory too | |
| 3473 | - log.debug('before pcodedmp') | |
| 3474 | - pcodedmp.processFile(self.filename, args, output_file=output) | |
| 3475 | - log.debug('after pcodedmp') | |
| 3476 | - except Exception as e: | |
| 3477 | - # print('Error while running pcodedmp: {}'.format(e), file=sys.stderr, flush=True) | |
| 3478 | - # set sys.stdout back to its original value | |
| 3479 | - # sys.stdout = stdout | |
| 3480 | - log.exception('Error while running pcodedmp') | |
| 3481 | - # finally: | |
| 3482 | - # # set sys.stdout back to its original value | |
| 3483 | - # sys.stdout = stdout | |
| 3484 | - self.pcodedmp_output = output.getvalue() | |
| 3485 | - # print(self.pcodedmp_output) | |
| 3486 | - # log.debug(self.pcodedmp_output) | |
| 3487 | - return self.pcodedmp_output | |
| 3488 | - | |
| 3489 | - def detect_vba_stomping(self): | |
| 3490 | - """ | |
| 3491 | - Detect VBA stomping, by comparing the keywords present in the P-code and | |
| 3492 | - in the VBA source code. | |
| 3493 | - | |
| 3494 | - :return: True if VBA stomping detected, False otherwise | |
| 3495 | - :rtype: bool | |
| 3496 | - """ | |
| 3497 | - # only run it once: | |
| 3498 | - if self.vba_stomping_detected is None: | |
| 3499 | - log.debug('Analysing the P-code to detect VBA stomping') | |
| 3500 | - self.extract_pcode() | |
| 3501 | - # print('pcodedmp OK') | |
| 3502 | - log.debug('pcodedmp OK') | |
| 3503 | - # process the output to extract keywords, to detect VBA stomping | |
| 3504 | - keywords = set() | |
| 3505 | - for line in self.pcodedmp_output.splitlines(): | |
| 3506 | - if line.startswith('\t'): | |
| 3507 | - log.debug('P-code: ' + line.strip()) | |
| 3508 | - tokens = line.split(None, 1) | |
| 3509 | - mnemonic = tokens[0] | |
| 3510 | - args = '' | |
| 3511 | - if len(tokens) == 2: | |
| 3512 | - args = tokens[1].strip() | |
| 3513 | - # log.debug(repr([mnemonic, args])) | |
| 3514 | - # if mnemonic in ('VarDefn',): | |
| 3515 | - # # just add the rest of the line | |
| 3516 | - # keywords.add(args) | |
| 3517 | - # if mnemonic == 'FuncDefn': | |
| 3518 | - # # function definition: just strip parentheses | |
| 3519 | - # funcdefn = args.strip('()') | |
| 3520 | - # keywords.add(funcdefn) | |
| 3521 | - if mnemonic in ('ArgsCall', 'ArgsLd', 'St', 'Ld', 'MemSt', 'Label'): | |
| 3522 | - # add 1st argument: | |
| 3523 | - name = args.split(None, 1)[0] | |
| 3524 | - # sometimes pcodedmp reports names like "id_FFFF", which are not | |
| 3525 | - # directly present in the VBA source code | |
| 3526 | - # (for example "Me" in VBA appears as id_FFFF in P-code) | |
| 3527 | - if not name.startswith('id_'): | |
| 3528 | - keywords.add(name) | |
| 3529 | - if mnemonic == 'LitStr': | |
| 3530 | - # re_string = re.compile(r'\"([^\"]|\"\")*\"') | |
| 3531 | - # for match in re_string.finditer(line): | |
| 3532 | - # print('\t' + match.group()) | |
| 3533 | - # the string is the 2nd argument: | |
| 3534 | - s = args.split(None, 1)[1] | |
| 3535 | - # tricky issue: when a string contains double quotes inside, | |
| 3536 | - # pcodedmp returns a single ", whereas in the VBA source code | |
| 3537 | - # it is always a double "". | |
| 3538 | - # We have to remove the " around the strings, then double the remaining ", | |
| 3539 | - # and put back the " around: | |
| 3540 | - if len(s)>=2: | |
| 3541 | - assert(s[0]=='"' and s[-1]=='"') | |
| 3542 | - s = s[1:-1] | |
| 3543 | - s = s.replace('"', '""') | |
| 3544 | - s = '"' + s + '"' | |
| 3545 | - keywords.add(s) | |
| 3546 | - log.debug('Keywords extracted from P-code: ' + repr(sorted(keywords))) | |
| 3547 | - self.vba_stomping_detected = False | |
| 3548 | - # TODO: add a method to get all VBA code as one string | |
| 3549 | - vba_code_all_modules = '' | |
| 3550 | - for (_, _, _, vba_code) in self.extract_all_macros(): | |
| 3551 | - vba_code_all_modules += vba_code + '\n' | |
| 3552 | - for keyword in keywords: | |
| 3553 | - if keyword not in vba_code_all_modules: | |
| 3554 | - log.debug('Keyword {!r} not found in VBA code'.format(keyword)) | |
| 3555 | - log.debug('VBA STOMPING DETECTED!') | |
| 3556 | - self.vba_stomping_detected = True | |
| 3557 | - break | |
| 3558 | - if not self.vba_stomping_detected: | |
| 3559 | - log.debug('No VBA stomping detected.') | |
| 3560 | - return self.vba_stomping_detected | |
| 3561 | - | |
| 3562 | 3411 | def close(self): |
| 3563 | 3412 | """ |
| 3564 | 3413 | Close all the open files. This method must be called after usage, if |
| ... | ... | @@ -3629,8 +3478,6 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3629 | 3478 | color_type = COLOR_TYPE.get(kw_type, None) |
| 3630 | 3479 | t.write_row((kw_type, keyword, description), colors=(color_type, None, None)) |
| 3631 | 3480 | t.close() |
| 3632 | - if self.vba_stomping_detected: | |
| 3633 | - print('VBA Stomping detection is experimental: please report any false positive/negative at https://github.com/decalage2/oletools/issues') | |
| 3634 | 3481 | else: |
| 3635 | 3482 | print('No suspicious keyword or IOC found.') |
| 3636 | 3483 | |
| ... | ... | @@ -3673,7 +3520,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3673 | 3520 | def process_file(self, show_decoded_strings=False, |
| 3674 | 3521 | display_code=True, hide_attributes=True, |
| 3675 | 3522 | vba_code_only=False, show_deobfuscated_code=False, |
| 3676 | - deobfuscate=False, pcode=False): | |
| 3523 | + deobfuscate=False, pcode=False, no_xlm=False): | |
| 3677 | 3524 | """ |
| 3678 | 3525 | Process a single file |
| 3679 | 3526 | |
| ... | ... | @@ -3686,9 +3533,11 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3686 | 3533 | :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) |
| 3687 | 3534 | :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow) |
| 3688 | 3535 | :param pcode bool: if True, call pcodedmp to disassemble P-code and display it |
| 3536 | + :param no_xlm bool: if True, don't use the BIFF plugin to extract old style XLM macros | |
| 3689 | 3537 | """ |
| 3690 | 3538 | #TODO: replace print by writing to a provided output file (sys.stdout by default) |
| 3691 | 3539 | # fix conflicting parameters: |
| 3540 | + self.no_xlm = no_xlm | |
| 3692 | 3541 | if vba_code_only and not display_code: |
| 3693 | 3542 | display_code = True |
| 3694 | 3543 | if self.container: |
| ... | ... | @@ -3758,8 +3607,30 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3758 | 3607 | if pcode: |
| 3759 | 3608 | print('-' * 79) |
| 3760 | 3609 | print('P-CODE disassembly:') |
| 3761 | - pcode = self.extract_pcode() | |
| 3762 | - print(pcode) | |
| 3610 | + # pcodedmp prints all its output to sys.stdout, so we need to capture it so that | |
| 3611 | + # we can process the results later on. | |
| 3612 | + # save sys.stdout, then modify it to capture pcodedmp's output: | |
| 3613 | + stdout = sys.stdout | |
| 3614 | + if PYTHON2: | |
| 3615 | + # on Python 2, console output is bytes | |
| 3616 | + output = BytesIO() | |
| 3617 | + else: | |
| 3618 | + # on Python 3, console output is unicode | |
| 3619 | + output = StringIO() | |
| 3620 | + sys.stdout = output | |
| 3621 | + # we need to fake an argparser for those two args used by pcodedmp: | |
| 3622 | + class args: | |
| 3623 | + disasmOnly = True | |
| 3624 | + verbose = False | |
| 3625 | + try: | |
| 3626 | + # TODO: handle files in memory too | |
| 3627 | + pcodedmp.processFile(self.filename, args) | |
| 3628 | + except Exception: | |
| 3629 | + log.error('Error while running pcodedmp') | |
| 3630 | + finally: | |
| 3631 | + # set sys.stdout back to its original value | |
| 3632 | + sys.stdout = stdout | |
| 3633 | + print(output.getvalue()) | |
| 3763 | 3634 | |
| 3764 | 3635 | if not vba_code_only: |
| 3765 | 3636 | # analyse the code from all modules at once: |
| ... | ... | @@ -3782,7 +3653,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3782 | 3653 | def process_file_json(self, show_decoded_strings=False, |
| 3783 | 3654 | display_code=True, hide_attributes=True, |
| 3784 | 3655 | vba_code_only=False, show_deobfuscated_code=False, |
| 3785 | - deobfuscate=False): | |
| 3656 | + deobfuscate=False, no_xlm=False): | |
| 3786 | 3657 | """ |
| 3787 | 3658 | Process a single file |
| 3788 | 3659 | |
| ... | ... | @@ -3799,6 +3670,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 3799 | 3670 | """ |
| 3800 | 3671 | #TODO: fix conflicting parameters (?) |
| 3801 | 3672 | |
| 3673 | + self.no_xlm = no_xlm | |
| 3802 | 3674 | if vba_code_only and not display_code: |
| 3803 | 3675 | display_code = True |
| 3804 | 3676 | |
| ... | ... | @@ -3949,6 +3821,8 @@ def parse_args(cmd_line_args=None): |
| 3949 | 3821 | help="Do not raise errors if opening of substream fails") |
| 3950 | 3822 | parser.add_option('--pcode', dest="pcode", action="store_true", default=False, |
| 3951 | 3823 | help="Disassemble and display the P-code (using pcodedmp)") |
| 3824 | + parser.add_option('--no-xlm', dest="no_xlm", action="store_true", default=False, | |
| 3825 | + help="Do not extract XLM Excel macros. This may speed up analysis of large files.") | |
| 3952 | 3826 | |
| 3953 | 3827 | (options, args) = parser.parse_args(cmd_line_args) |
| 3954 | 3828 | |
| ... | ... | @@ -3983,21 +3857,21 @@ def process_file(filename, data, container, options, crypto_nesting=0): |
| 3983 | 3857 | if options.output_mode == 'detailed': |
| 3984 | 3858 | # fully detailed output |
| 3985 | 3859 | vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, |
| 3986 | - display_code=options.display_code, | |
| 3987 | - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3988 | - show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3989 | - deobfuscate=options.deobfuscate, pcode=options.pcode) | |
| 3860 | + display_code=options.display_code, | |
| 3861 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3862 | + show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3863 | + deobfuscate=options.deobfuscate, pcode=options.pcode, no_xlm=options.no_xlm) | |
| 3990 | 3864 | elif options.output_mode == 'triage': |
| 3991 | 3865 | # summarized output for triage: |
| 3992 | 3866 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, |
| 3993 | - deobfuscate=options.deobfuscate) | |
| 3867 | + deobfuscate=options.deobfuscate, no_xlm=options.no_xlm) | |
| 3994 | 3868 | elif options.output_mode == 'json': |
| 3995 | 3869 | print_json( |
| 3996 | 3870 | vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, |
| 3997 | 3871 | display_code=options.display_code, |
| 3998 | 3872 | hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, |
| 3999 | 3873 | show_deobfuscated_code=options.show_deobfuscated_code, |
| 4000 | - deobfuscate=options.deobfuscate)) | |
| 3874 | + deobfuscate=options.deobfuscate, no_xlm=options.no_xlm)) | |
| 4001 | 3875 | else: # (should be impossible) |
| 4002 | 3876 | raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) |
| 4003 | 3877 | |
| ... | ... | @@ -4064,13 +3938,8 @@ def process_file(filename, data, container, options, crypto_nesting=0): |
| 4064 | 3938 | except Exception: |
| 4065 | 3939 | raise |
| 4066 | 3940 | finally: # clean up |
| 4067 | - try: | |
| 4068 | - log.debug('Removing crypt temp file {}'.format(decrypted_file)) | |
| 3941 | + if decrypted_file is not None and os.path.isfile(decrypted_file): | |
| 4069 | 3942 | os.unlink(decrypted_file) |
| 4070 | - except Exception: # e.g. file does not exist or is None | |
| 4071 | - pass | |
| 4072 | - # no idea what to return now | |
| 4073 | - raise Exception('Programming error -- should never have reached this!') | |
| 4074 | 3943 | |
| 4075 | 3944 | |
| 4076 | 3945 | def main(cmd_line_args=None): | ... | ... |