Commit 4539d6b15509fe2ac9982269f05a0d9b7dea482c
olevba: added option --no-deobfuscate (temporary)
Showing
3 changed files
with
206 additions
and
20 deletions
oletools/olevba.py
| ... | ... | @@ -164,8 +164,10 @@ https://github.com/unixfreak0037/officeparser |
| 164 | 164 | # 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr |
| 165 | 165 | # 2016-02-29 PL: - added Workbook_Activate to suspicious keywords |
| 166 | 166 | # 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis |
| 167 | +# 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck) | |
| 168 | +# 2016-03-16 CH: - added option --no-deobfuscate (temporary) | |
| 167 | 169 | |
| 168 | -__version__ = '0.44' | |
| 170 | +__version__ = '0.45' | |
| 169 | 171 | |
| 170 | 172 | #------------------------------------------------------------------------------ |
| 171 | 173 | # TODO: |
| ... | ... | @@ -215,6 +217,7 @@ import traceback |
| 215 | 217 | import zlib |
| 216 | 218 | import email # for MHTML parsing |
| 217 | 219 | import string # for printable |
| 220 | +import json # for json output mode (argument --json) | |
| 218 | 221 | |
| 219 | 222 | # import lxml or ElementTree for XML parsing: |
| 220 | 223 | try: |
| ... | ... | @@ -1655,6 +1658,42 @@ def detect_vba_strings(vba_code): |
| 1655 | 1658 | return results |
| 1656 | 1659 | |
| 1657 | 1660 | |
| 1661 | +def json2ascii(json_obj, encoding='utf8', errors='replace'): | |
| 1662 | + """ ensure there is no unicode in json and all strings are safe to decode | |
| 1663 | + | |
| 1664 | + works recursively, decodes and re-encodes every string to/from unicode | |
| 1665 | + to ensure there will be no trouble in loading the dumped json output | |
| 1666 | + """ | |
| 1667 | + if json_obj is None: | |
| 1668 | + pass | |
| 1669 | + elif isinstance(json_obj, (bool, int, float)): | |
| 1670 | + pass | |
| 1671 | + elif isinstance(json_obj, str): | |
| 1672 | + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) | |
| 1673 | + if dencoded != str: | |
| 1674 | + logging.info('json2ascii: replaced: {0} (len {1})' | |
| 1675 | + .format(json_obj, len(json_obj))) | |
| 1676 | + logging.info('json2ascii: with: {0} (len {1})' | |
| 1677 | + .format(dencoded, len(dencoded))) | |
| 1678 | + return dencoded | |
| 1679 | + elif isinstance(json_obj, unicode): | |
| 1680 | + logging.info('json2ascii: replaced: {0}' | |
| 1681 | + .format(json_obj.encode(encoding, errors))) | |
| 1682 | + # cannot put original into logger | |
| 1683 | + # print 'original: ' json_obj | |
| 1684 | + return json_obj.encode(encoding, errors) | |
| 1685 | + elif isinstance(json_obj, dict): | |
| 1686 | + for key in json_obj: | |
| 1687 | + json_obj[key] = json2ascii(json_obj[key]) | |
| 1688 | + elif isinstance(json_obj, (list,tuple)): | |
| 1689 | + for item in json_obj: | |
| 1690 | + item = json2ascii(item) | |
| 1691 | + else: | |
| 1692 | + logging.debug('unexpected type in json2ascii: {0} -- leave as is' | |
| 1693 | + .format(type(json_obj))) | |
| 1694 | + return json_obj | |
| 1695 | + | |
| 1696 | + | |
| 1658 | 1697 | class VBA_Scanner(object): |
| 1659 | 1698 | """ |
| 1660 | 1699 | Class to scan the source code of a VBA module to find obfuscated strings, |
| ... | ... | @@ -2512,6 +2551,20 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2512 | 2551 | else: |
| 2513 | 2552 | print 'No suspicious keyword or IOC found.' |
| 2514 | 2553 | |
| 2554 | + def print_analysis_json(self, show_decoded_strings=False): | |
| 2555 | + """ | |
| 2556 | + Analyze the provided VBA code, and return the results in json format | |
| 2557 | + | |
| 2558 | + :param vba_code: str, VBA source code to be analyzed | |
| 2559 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 2560 | + :return: dict | |
| 2561 | + """ | |
| 2562 | + # print a waiting message only if the output is not redirected to a file: | |
| 2563 | + if sys.stdout.isatty(): | |
| 2564 | + print 'Analysis...\r', | |
| 2565 | + sys.stdout.flush() | |
| 2566 | + return [dict(type=kw_type, keyword=keyword, description=description) | |
| 2567 | + for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)] | |
| 2515 | 2568 | |
| 2516 | 2569 | def process_file(self, show_decoded_strings=False, |
| 2517 | 2570 | display_code=True, global_analysis=True, hide_attributes=True, |
| ... | ... | @@ -2592,7 +2645,82 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2592 | 2645 | print '' |
| 2593 | 2646 | |
| 2594 | 2647 | |
| 2595 | - def process_file_triage(self, show_decoded_strings=False, skip_deobfuscate=False): | |
| 2648 | + def process_file_json(self, show_decoded_strings=False, | |
| 2649 | + display_code=True, global_analysis=True, hide_attributes=True, | |
| 2650 | + vba_code_only=False, show_deobfuscated_code=False): | |
| 2651 | + """ | |
| 2652 | + Process a single file | |
| 2653 | + | |
| 2654 | + every "show" or "print" here is to be translated as "add to json" | |
| 2655 | + | |
| 2656 | + :param filename: str, path and filename of file on disk, or within the container. | |
| 2657 | + :param data: bytes, content of the file if it is in a container, None if it is a file on disk. | |
| 2658 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 2659 | + :param display_code: bool, if False VBA source code is not displayed (default True) | |
| 2660 | + :param global_analysis: bool, if True all modules are merged for a single analysis (default), | |
| 2661 | + otherwise each module is analyzed separately (old behaviour) | |
| 2662 | + :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) | |
| 2663 | + """ | |
| 2664 | + #TODO: fix conflicting parameters (?) | |
| 2665 | + | |
| 2666 | + if vba_code_only and not display_code: | |
| 2667 | + display_code = True | |
| 2668 | + | |
| 2669 | + result = {} | |
| 2670 | + | |
| 2671 | + if self.container: | |
| 2672 | + result['container'] = self.container | |
| 2673 | + else: | |
| 2674 | + result['container'] = None | |
| 2675 | + result['file'] = self.filename | |
| 2676 | + result['json_conversion_successful'] = False | |
| 2677 | + result['analysis'] = None | |
| 2678 | + result['code_deobfuscated'] = None | |
| 2679 | + | |
| 2680 | + try: | |
| 2681 | + #TODO: handle olefile errors, when an OLE file is malformed | |
| 2682 | + result['type'] = self.type | |
| 2683 | + macros = [] | |
| 2684 | + if self.detect_vba_macros(): | |
| 2685 | + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): | |
| 2686 | + curr_macro = {} | |
| 2687 | + if hide_attributes: | |
| 2688 | + # hide attribute lines: | |
| 2689 | + vba_code_filtered = filter_vba(vba_code) | |
| 2690 | + else: | |
| 2691 | + vba_code_filtered = vba_code | |
| 2692 | + | |
| 2693 | + curr_macro['vba_filename'] = vba_filename | |
| 2694 | + curr_macro['subfilename'] = subfilename | |
| 2695 | + curr_macro['ole_stream'] = stream_path | |
| 2696 | + if display_code: | |
| 2697 | + curr_macro['code'] = vba_code_filtered.strip() | |
| 2698 | + if not global_analysis and not vba_code_only: | |
| 2699 | + # analyse each module's code, filtered to avoid false positives: | |
| 2700 | + #TODO: remove this option | |
| 2701 | + curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings) | |
| 2702 | + macros.append(curr_macro) | |
| 2703 | + if global_analysis and not vba_code_only: | |
| 2704 | + # analyse the code from all modules at once: | |
| 2705 | + result['analysis'] = self.print_analysis_json(show_decoded_strings) | |
| 2706 | + if show_deobfuscated_code: | |
| 2707 | + result['code_deobfuscated'] = self.reveal() | |
| 2708 | + result['macros'] = macros | |
| 2709 | + result['json_conversion_successful'] = True | |
| 2710 | + except KeyboardInterrupt: | |
| 2711 | + # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2712 | + raise | |
| 2713 | + except: #TypeError: | |
| 2714 | + #raise | |
| 2715 | + #TODO: print more info if debug mode | |
| 2716 | + #print sys.exc_value | |
| 2717 | + # display the exception with full stack trace for debugging, but do not stop: | |
| 2718 | + traceback.print_exc() | |
| 2719 | + | |
| 2720 | + return result | |
| 2721 | + | |
| 2722 | + | |
| 2723 | + def process_file_triage(self, show_decoded_strings=False): | |
| 2596 | 2724 | """ |
| 2597 | 2725 | Process a file in triage mode, showing only summary results on one line. |
| 2598 | 2726 | """ |
| ... | ... | @@ -2691,10 +2819,19 @@ def main(): |
| 2691 | 2819 | help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)') |
| 2692 | 2820 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', |
| 2693 | 2821 | help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') |
| 2694 | - parser.add_option("-t", '--triage', action="store_true", dest="triage_mode", | |
| 2695 | - help='triage mode, display results as a summary table (default for multiple files)') | |
| 2696 | - parser.add_option("-d", '--detailed', action="store_true", dest="detailed_mode", | |
| 2697 | - help='detailed mode, display full results (default for single file)') | |
| 2822 | + # output mode; could make this even simpler with add_option(type='choice') but that would make | |
| 2823 | + # cmd line interface incompatible... | |
| 2824 | + modes = optparse.OptionGroup(parser, title='Output mode (mutually exclusive)') | |
| 2825 | + modes.add_option("-t", '--triage', action="store_const", dest="output_mode", | |
| 2826 | + const='triage', default='unspecified', | |
| 2827 | + help='triage mode, display results as a summary table (default for multiple files)') | |
| 2828 | + modes.add_option("-d", '--detailed', action="store_const", dest="output_mode", | |
| 2829 | + const='detailed', default='unspecified', | |
| 2830 | + help='detailed mode, display full results (default for single file)') | |
| 2831 | + modes.add_option("-j", '--json', action="store_const", dest="output_mode", | |
| 2832 | + const='json', default='unspecified', | |
| 2833 | + help='json mode, detailed in json format (never default)') | |
| 2834 | + parser.add_option_group(modes) | |
| 2698 | 2835 | parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True, |
| 2699 | 2836 | help='display only analysis results, not the macro source code') |
| 2700 | 2837 | parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False, |
| ... | ... | @@ -2726,8 +2863,13 @@ def main(): |
| 2726 | 2863 | parser.print_help() |
| 2727 | 2864 | sys.exit() |
| 2728 | 2865 | |
| 2729 | - # print banner with version | |
| 2730 | - print 'olevba %s - http://decalage.info/python/oletools' % __version__ | |
| 2866 | + # provide info about tool and its version | |
| 2867 | + if options.output_mode == 'json': | |
| 2868 | + json_results = [dict(script_name='olevba', version=__version__, | |
| 2869 | + url='http://decalage.info/python/oletools', | |
| 2870 | + type='MetaInformation'), ] | |
| 2871 | + else: | |
| 2872 | + print 'olevba %s - http://decalage.info/python/oletools' % __version__ | |
| 2731 | 2873 | |
| 2732 | 2874 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 2733 | 2875 | # enable logging in the modules: |
| ... | ... | @@ -2750,8 +2892,9 @@ def main(): |
| 2750 | 2892 | if options.skip_deobfuscate and options.show_deobfuscated_code: |
| 2751 | 2893 | logging.warning('Ignoring option --reveal since option -n / --no-deobfuscate is present!') |
| 2752 | 2894 | |
| 2753 | - # Column headers (except if detailed mode) | |
| 2754 | - if not options.detailed_mode or options.triage_mode: | |
| 2895 | + # Column headers (do not know how many files there will be yet, so if no output_mode | |
| 2896 | + # was specified, we will print triage for first file --> need these headers) | |
| 2897 | + if options.output_mode in ('triage', 'unspecified'): | |
| 2755 | 2898 | print '%-12s %-65s' % ('Flags', 'Filename') |
| 2756 | 2899 | print '%-12s %-65s' % ('-' * 11, '-' * 65) |
| 2757 | 2900 | |
| ... | ... | @@ -2766,14 +2909,14 @@ def main(): |
| 2766 | 2909 | continue |
| 2767 | 2910 | # Open the file |
| 2768 | 2911 | vba_parser = VBA_Parser_CLI(filename, data=data, container=container) |
| 2769 | - if options.detailed_mode and not options.triage_mode: | |
| 2912 | + if options.output_mode == 'detailed': | |
| 2770 | 2913 | # fully detailed output |
| 2771 | 2914 | vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, |
| 2772 | 2915 | display_code=options.display_code, global_analysis=True, #options.global_analysis, |
| 2773 | 2916 | hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, |
| 2774 | 2917 | show_deobfuscated_code=options.show_deobfuscated_code, |
| 2775 | 2918 | skip_deobfuscate=options.skip_deobfuscate) |
| 2776 | - else: | |
| 2919 | + elif options.output_mode in ('triage', 'unspecified'): | |
| 2777 | 2920 | # print container name when it changes: |
| 2778 | 2921 | if container != previous_container: |
| 2779 | 2922 | if container is not None: |
| ... | ... | @@ -2782,20 +2925,59 @@ def main(): |
| 2782 | 2925 | # summarized output for triage: |
| 2783 | 2926 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, |
| 2784 | 2927 | skip_deobfuscate=options.skip_deobfuscate) |
| 2928 | + elif options.output_mode == 'json': | |
| 2929 | + json_results.append( | |
| 2930 | + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, | |
| 2931 | + display_code=options.display_code, global_analysis=True, #options.global_analysis, | |
| 2932 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 2933 | + show_deobfuscated_code=options.show_deobfuscated_code)) | |
| 2934 | + else: # (should be impossible) | |
| 2935 | + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) | |
| 2785 | 2936 | count += 1 |
| 2786 | - if not options.detailed_mode or options.triage_mode: | |
| 2937 | + if options.output_mode == 'triage': | |
| 2787 | 2938 | print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ |
| 2788 | 2939 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ |
| 2789 | 2940 | 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' |
| 2790 | 2941 | |
| 2791 | - if count == 1 and not options.triage_mode and not options.detailed_mode: | |
| 2792 | - # if options -t and -d were not specified and it's a single file, print details: | |
| 2942 | + if count == 1 and options.output_mode == 'unspecified': | |
| 2943 | + # if options -t, -d and -j were not specified and it's a single file, print details: | |
| 2793 | 2944 | vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, |
| 2794 | 2945 | display_code=options.display_code, global_analysis=True, #options.global_analysis, |
| 2795 | 2946 | hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, |
| 2796 | 2947 | show_deobfuscated_code=options.show_deobfuscated_code, |
| 2797 | 2948 | skip_deobfuscate=options.skip_deobfuscate) |
| 2798 | 2949 | |
| 2950 | + if options.output_mode == 'json': | |
| 2951 | + json_options = dict(check_circular=False, indent=4, ensure_ascii=False) | |
| 2952 | + | |
| 2953 | + # json.dump[s] cannot deal with unicode objects that are not properly | |
| 2954 | + # encoded --> encode in own function: | |
| 2955 | + json_results = json2ascii(json_results) | |
| 2956 | + #print_json(json_results) | |
| 2957 | + | |
| 2958 | + if False: # options.outfile: # (option currently commented out) | |
| 2959 | + with open(outfile, 'w') as write_handle: | |
| 2960 | + json.dump(write_handle, **json_options) | |
| 2961 | + else: | |
| 2962 | + print json.dumps(json_results, **json_options) | |
| 2963 | + | |
| 2964 | + | |
| 2965 | +def print_json(j): | |
| 2966 | + if isinstance(j, dict): | |
| 2967 | + for key, val in j.items(): | |
| 2968 | + print_json(key) | |
| 2969 | + print_json(val) | |
| 2970 | + elif isinstance(j, list): | |
| 2971 | + for elem in j: | |
| 2972 | + print_json(elem) | |
| 2973 | + else: | |
| 2974 | + try: | |
| 2975 | + if len(j) > 20: | |
| 2976 | + print type(j), repr(j[:20]), '...(len {0})'.format(len(j)) | |
| 2977 | + else: | |
| 2978 | + print type(j), repr(j) | |
| 2979 | + except TypeError: | |
| 2980 | + print type(j), repr(j) | |
| 2799 | 2981 | |
| 2800 | 2982 | if __name__ == '__main__': |
| 2801 | 2983 | main() | ... | ... |
oletools/rtfobj.py
| ... | ... | @@ -15,7 +15,7 @@ http://www.decalage.info/python/oletools |
| 15 | 15 | |
| 16 | 16 | #=== LICENSE ================================================================= |
| 17 | 17 | |
| 18 | -# rtfobj is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) | |
| 18 | +# rtfobj is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info) | |
| 19 | 19 | # All rights reserved. |
| 20 | 20 | # |
| 21 | 21 | # Redistribution and use in source and binary forms, with or without modification, |
| ... | ... | @@ -46,8 +46,9 @@ http://www.decalage.info/python/oletools |
| 46 | 46 | # 2015-12-09 v0.03 PL: - configurable logging, CLI options |
| 47 | 47 | # - extract OLE 1.0 objects |
| 48 | 48 | # - extract files from OLE Package objects |
| 49 | +# 2016-04-01 v0.04 PL: - fixed logging output to use stdout instead of stderr | |
| 49 | 50 | |
| 50 | -__version__ = '0.03' | |
| 51 | +__version__ = '0.04' | |
| 51 | 52 | |
| 52 | 53 | #------------------------------------------------------------------------------ |
| 53 | 54 | # TODO: |
| ... | ... | @@ -338,8 +339,11 @@ if __name__ == '__main__': |
| 338 | 339 | parser.print_help() |
| 339 | 340 | sys.exit() |
| 340 | 341 | |
| 341 | - # setup logging to the console | |
| 342 | - logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') | |
| 342 | + # Setup logging to the console: | |
| 343 | + # here we use stdout instead of stderr by default, so that the output | |
| 344 | + # can be redirected properly. | |
| 345 | + logging.basicConfig(level=LOG_LEVELS[options.loglevel], stream=sys.stdout, | |
| 346 | + format='%(levelname)-8s %(message)s') | |
| 343 | 347 | # enable logging in the modules: |
| 344 | 348 | log.setLevel(logging.NOTSET) |
| 345 | 349 | oleobj.log.setLevel(logging.NOTSET) | ... | ... |
setup.py
| ... | ... | @@ -38,7 +38,7 @@ import sys, os, fnmatch |
| 38 | 38 | #--- METADATA ----------------------------------------------------------------- |
| 39 | 39 | |
| 40 | 40 | name = "oletools" |
| 41 | -version = '0.43' | |
| 41 | +version = '0.44' | |
| 42 | 42 | desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" |
| 43 | 43 | long_desc = open('oletools/README.rst').read() |
| 44 | 44 | author ="Philippe Lagadec" | ... | ... |