Commit 20e6670e5587fff790b904623520d9fd2f513bdc
olevba: added JSON output (by Christian Herdtweck)
rtfobj: fixed logging output to use stdout instead of stderr
Showing
2 changed files
with
203 additions
and
18 deletions
oletools/olevba.py
| ... | ... | @@ -164,8 +164,9 @@ https://github.com/unixfreak0037/officeparser |
| 164 | 164 | # 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr |
| 165 | 165 | # 2016-02-29 PL: - added Workbook_Activate to suspicious keywords |
| 166 | 166 | # 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis |
| 167 | +# 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck) | |
| 167 | 168 | |
| 168 | -__version__ = '0.44' | |
| 169 | +__version__ = '0.45' | |
| 169 | 170 | |
| 170 | 171 | #------------------------------------------------------------------------------ |
| 171 | 172 | # TODO: |
| ... | ... | @@ -215,6 +216,7 @@ import traceback |
| 215 | 216 | import zlib |
| 216 | 217 | import email # for MHTML parsing |
| 217 | 218 | import string # for printable |
| 219 | +import json # for json output mode (argument --json) | |
| 218 | 220 | |
| 219 | 221 | # import lxml or ElementTree for XML parsing: |
| 220 | 222 | try: |
| ... | ... | @@ -1655,6 +1657,42 @@ def detect_vba_strings(vba_code): |
| 1655 | 1657 | return results |
| 1656 | 1658 | |
| 1657 | 1659 | |
| 1660 | +def json2ascii(json_obj, encoding='utf8', errors='replace'): | |
| 1661 | + """ ensure there is no unicode in json and all strings are safe to decode | |
| 1662 | + | |
| 1663 | + works recursively, decodes and re-encodes every string to/from unicode | |
| 1664 | + to ensure there will be no trouble in loading the dumped json output | |
| 1665 | + """ | |
| 1666 | + if json_obj is None: | |
| 1667 | + pass | |
| 1668 | + elif isinstance(json_obj, (bool, int, float)): | |
| 1669 | + pass | |
| 1670 | + elif isinstance(json_obj, str): | |
| 1671 | + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) | |
| 1672 | + if dencoded != str: | |
| 1673 | + logging.info('json2ascii: replaced: {0} (len {1})' | |
| 1674 | + .format(json_obj, len(json_obj))) | |
| 1675 | + logging.info('json2ascii: with: {0} (len {1})' | |
| 1676 | + .format(dencoded, len(dencoded))) | |
| 1677 | + return dencoded | |
| 1678 | + elif isinstance(json_obj, unicode): | |
| 1679 | + logging.info('json2ascii: replaced: {0}' | |
| 1680 | + .format(json_obj.encode(encoding, errors))) | |
| 1681 | + # cannot put original into logger | |
| 1682 | + # print 'original: ' json_obj | |
| 1683 | + return json_obj.encode(encoding, errors) | |
| 1684 | + elif isinstance(json_obj, dict): | |
| 1685 | + for key in json_obj: | |
| 1686 | + json_obj[key] = json2ascii(json_obj[key]) | |
| 1687 | + elif isinstance(json_obj, (list,tuple)): | |
| 1688 | + for item in json_obj: | |
| 1689 | + item = json2ascii(item) | |
| 1690 | + else: | |
| 1691 | + logging.debug('unexpected type in json2ascii: {0} -- leave as is' | |
| 1692 | + .format(type(json_obj))) | |
| 1693 | + return json_obj | |
| 1694 | + | |
| 1695 | + | |
| 1658 | 1696 | class VBA_Scanner(object): |
| 1659 | 1697 | """ |
| 1660 | 1698 | Class to scan the source code of a VBA module to find obfuscated strings, |
| ... | ... | @@ -2506,6 +2544,20 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2506 | 2544 | else: |
| 2507 | 2545 | print 'No suspicious keyword or IOC found.' |
| 2508 | 2546 | |
| 2547 | + def print_analysis_json(self, show_decoded_strings=False): | |
| 2548 | + """ | |
| 2549 | + Analyze the provided VBA code, and return the results in json format | |
| 2550 | + | |
| 2551 | + :param vba_code: str, VBA source code to be analyzed | |
| 2552 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 2553 | + :return: dict | |
| 2554 | + """ | |
| 2555 | + # print a waiting message only if the output is not redirected to a file: | |
| 2556 | + if sys.stdout.isatty(): | |
| 2557 | + print 'Analysis...\r', | |
| 2558 | + sys.stdout.flush() | |
| 2559 | + return [dict(type=kw_type, keyword=keyword, description=description) | |
| 2560 | + for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)] | |
| 2509 | 2561 | |
| 2510 | 2562 | def process_file(self, show_decoded_strings=False, |
| 2511 | 2563 | display_code=True, global_analysis=True, hide_attributes=True, |
| ... | ... | @@ -2584,6 +2636,81 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2584 | 2636 | print '' |
| 2585 | 2637 | |
| 2586 | 2638 | |
| 2639 | + def process_file_json(self, show_decoded_strings=False, | |
| 2640 | + display_code=True, global_analysis=True, hide_attributes=True, | |
| 2641 | + vba_code_only=False, show_deobfuscated_code=False): | |
| 2642 | + """ | |
| 2643 | + Process a single file | |
| 2644 | + | |
| 2645 | + every "show" or "print" here is to be translated as "add to json" | |
| 2646 | + | |
| 2647 | + :param filename: str, path and filename of file on disk, or within the container. | |
| 2648 | + :param data: bytes, content of the file if it is in a container, None if it is a file on disk. | |
| 2649 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 2650 | + :param display_code: bool, if False VBA source code is not displayed (default True) | |
| 2651 | + :param global_analysis: bool, if True all modules are merged for a single analysis (default), | |
| 2652 | + otherwise each module is analyzed separately (old behaviour) | |
| 2653 | + :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) | |
| 2654 | + """ | |
| 2655 | + #TODO: fix conflicting parameters (?) | |
| 2656 | + | |
| 2657 | + if vba_code_only and not display_code: | |
| 2658 | + display_code = True | |
| 2659 | + | |
| 2660 | + result = {} | |
| 2661 | + | |
| 2662 | + if self.container: | |
| 2663 | + result['container'] = self.container | |
| 2664 | + else: | |
| 2665 | + result['container'] = None | |
| 2666 | + result['file'] = self.filename | |
| 2667 | + result['json_conversion_successful'] = False | |
| 2668 | + result['analysis'] = None | |
| 2669 | + result['code_deobfuscated'] = None | |
| 2670 | + | |
| 2671 | + try: | |
| 2672 | + #TODO: handle olefile errors, when an OLE file is malformed | |
| 2673 | + result['type'] = self.type | |
| 2674 | + macros = [] | |
| 2675 | + if self.detect_vba_macros(): | |
| 2676 | + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): | |
| 2677 | + curr_macro = {} | |
| 2678 | + if hide_attributes: | |
| 2679 | + # hide attribute lines: | |
| 2680 | + vba_code_filtered = filter_vba(vba_code) | |
| 2681 | + else: | |
| 2682 | + vba_code_filtered = vba_code | |
| 2683 | + | |
| 2684 | + curr_macro['vba_filename'] = vba_filename | |
| 2685 | + curr_macro['subfilename'] = subfilename | |
| 2686 | + curr_macro['ole_stream'] = stream_path | |
| 2687 | + if display_code: | |
| 2688 | + curr_macro['code'] = vba_code_filtered.strip() | |
| 2689 | + if not global_analysis and not vba_code_only: | |
| 2690 | + # analyse each module's code, filtered to avoid false positives: | |
| 2691 | + #TODO: remove this option | |
| 2692 | + curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings) | |
| 2693 | + macros.append(curr_macro) | |
| 2694 | + if global_analysis and not vba_code_only: | |
| 2695 | + # analyse the code from all modules at once: | |
| 2696 | + result['analysis'] = self.print_analysis_json(show_decoded_strings) | |
| 2697 | + if show_deobfuscated_code: | |
| 2698 | + result['code_deobfuscated'] = self.reveal() | |
| 2699 | + result['macros'] = macros | |
| 2700 | + result['json_conversion_successful'] = True | |
| 2701 | + except KeyboardInterrupt: | |
| 2702 | + # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2703 | + raise | |
| 2704 | + except: #TypeError: | |
| 2705 | + #raise | |
| 2706 | + #TODO: print more info if debug mode | |
| 2707 | + #print sys.exc_value | |
| 2708 | + # display the exception with full stack trace for debugging, but do not stop: | |
| 2709 | + traceback.print_exc() | |
| 2710 | + | |
| 2711 | + return result | |
| 2712 | + | |
| 2713 | + | |
| 2587 | 2714 | def process_file_triage(self, show_decoded_strings=False): |
| 2588 | 2715 | """ |
| 2589 | 2716 | Process a file in triage mode, showing only summary results on one line. |
| ... | ... | @@ -2682,10 +2809,19 @@ def main(): |
| 2682 | 2809 | help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)') |
| 2683 | 2810 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', |
| 2684 | 2811 | help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') |
| 2685 | - parser.add_option("-t", '--triage', action="store_true", dest="triage_mode", | |
| 2686 | - help='triage mode, display results as a summary table (default for multiple files)') | |
| 2687 | - parser.add_option("-d", '--detailed', action="store_true", dest="detailed_mode", | |
| 2688 | - help='detailed mode, display full results (default for single file)') | |
| 2812 | + # output mode; could make this even simpler with add_option(type='choice') but that would make | |
| 2813 | + # cmd line interface incompatible... | |
| 2814 | + modes = optparse.OptionGroup(parser, title='Output mode (mutually exclusive)') | |
| 2815 | + modes.add_option("-t", '--triage', action="store_const", dest="output_mode", | |
| 2816 | + const='triage', default='unspecified', | |
| 2817 | + help='triage mode, display results as a summary table (default for multiple files)') | |
| 2818 | + modes.add_option("-d", '--detailed', action="store_const", dest="output_mode", | |
| 2819 | + const='detailed', default='unspecified', | |
| 2820 | + help='detailed mode, display full results (default for single file)') | |
| 2821 | + modes.add_option("-j", '--json', action="store_const", dest="output_mode", | |
| 2822 | + const='json', default='unspecified', | |
| 2823 | + help='json mode, detailed in json format (never default)') | |
| 2824 | + parser.add_option_group(modes) | |
| 2689 | 2825 | parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True, |
| 2690 | 2826 | help='display only analysis results, not the macro source code') |
| 2691 | 2827 | parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False, |
| ... | ... | @@ -2715,8 +2851,13 @@ def main(): |
| 2715 | 2851 | parser.print_help() |
| 2716 | 2852 | sys.exit() |
| 2717 | 2853 | |
| 2718 | - # print banner with version | |
| 2719 | - print 'olevba %s - http://decalage.info/python/oletools' % __version__ | |
| 2854 | + # provide info about tool and its version | |
| 2855 | + if options.output_mode == 'json': | |
| 2856 | + json_results = [dict(script_name='olevba', version=__version__, | |
| 2857 | + url='http://decalage.info/python/oletools', | |
| 2858 | + type='MetaInformation'), ] | |
| 2859 | + else: | |
| 2860 | + print 'olevba %s - http://decalage.info/python/oletools' % __version__ | |
| 2720 | 2861 | |
| 2721 | 2862 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 2722 | 2863 | # enable logging in the modules: |
| ... | ... | @@ -2735,8 +2876,9 @@ def main(): |
| 2735 | 2876 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') |
| 2736 | 2877 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) |
| 2737 | 2878 | |
| 2738 | - # Column headers (except if detailed mode) | |
| 2739 | - if not options.detailed_mode or options.triage_mode: | |
| 2879 | + # Column headers (do not know how many files there will be yet, so if no output_mode | |
| 2880 | + # was specified, we will print triage for first file --> need these headers) | |
| 2881 | + if options.output_mode in ('triage', 'unspecified'): | |
| 2740 | 2882 | print '%-12s %-65s' % ('Flags', 'Filename') |
| 2741 | 2883 | print '%-12s %-65s' % ('-' * 11, '-' * 65) |
| 2742 | 2884 | |
| ... | ... | @@ -2751,13 +2893,13 @@ def main(): |
| 2751 | 2893 | continue |
| 2752 | 2894 | # Open the file |
| 2753 | 2895 | vba_parser = VBA_Parser_CLI(filename, data=data, container=container) |
| 2754 | - if options.detailed_mode and not options.triage_mode: | |
| 2896 | + if options.output_mode == 'detailed': | |
| 2755 | 2897 | # fully detailed output |
| 2756 | 2898 | vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, |
| 2757 | 2899 | display_code=options.display_code, global_analysis=True, #options.global_analysis, |
| 2758 | 2900 | hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, |
| 2759 | 2901 | show_deobfuscated_code=options.show_deobfuscated_code) |
| 2760 | - else: | |
| 2902 | + elif options.output_mode in ('triage', 'unspecified'): | |
| 2761 | 2903 | # print container name when it changes: |
| 2762 | 2904 | if container != previous_container: |
| 2763 | 2905 | if container is not None: |
| ... | ... | @@ -2765,19 +2907,58 @@ def main(): |
| 2765 | 2907 | previous_container = container |
| 2766 | 2908 | # summarized output for triage: |
| 2767 | 2909 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings) |
| 2910 | + elif options.output_mode == 'json': | |
| 2911 | + json_results.append( | |
| 2912 | + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, | |
| 2913 | + display_code=options.display_code, global_analysis=True, #options.global_analysis, | |
| 2914 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 2915 | + show_deobfuscated_code=options.show_deobfuscated_code)) | |
| 2916 | + else: # (should be impossible) | |
| 2917 | + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) | |
| 2768 | 2918 | count += 1 |
| 2769 | - if not options.detailed_mode or options.triage_mode: | |
| 2919 | + if options.output_mode == 'triage': | |
| 2770 | 2920 | print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ |
| 2771 | 2921 | 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ |
| 2772 | 2922 | 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' |
| 2773 | 2923 | |
| 2774 | - if count == 1 and not options.triage_mode and not options.detailed_mode: | |
| 2775 | - # if options -t and -d were not specified and it's a single file, print details: | |
| 2924 | + if count == 1 and options.output_mode == 'unspecified': | |
| 2925 | + # if options -t, -d and -j were not specified and it's a single file, print details: | |
| 2776 | 2926 | vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, |
| 2777 | 2927 | display_code=options.display_code, global_analysis=True, #options.global_analysis, |
| 2778 | 2928 | hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, |
| 2779 | 2929 | show_deobfuscated_code=options.show_deobfuscated_code) |
| 2780 | 2930 | |
| 2931 | + if options.output_mode == 'json': | |
| 2932 | + json_options = dict(check_circular=False, indent=4, ensure_ascii=False) | |
| 2933 | + | |
| 2934 | + # json.dump[s] cannot deal with unicode objects that are not properly | |
| 2935 | + # encoded --> encode in own function: | |
| 2936 | + json_results = json2ascii(json_results) | |
| 2937 | + #print_json(json_results) | |
| 2938 | + | |
| 2939 | + if False: # options.outfile: # (option currently commented out) | |
| 2940 | + with open(outfile, 'w') as write_handle: | |
| 2941 | + json.dump(write_handle, **json_options) | |
| 2942 | + else: | |
| 2943 | + print json.dumps(json_results, **json_options) | |
| 2944 | + | |
| 2945 | + | |
| 2946 | +def print_json(j): | |
| 2947 | + if isinstance(j, dict): | |
| 2948 | + for key, val in j.items(): | |
| 2949 | + print_json(key) | |
| 2950 | + print_json(val) | |
| 2951 | + elif isinstance(j, list): | |
| 2952 | + for elem in j: | |
| 2953 | + print_json(elem) | |
| 2954 | + else: | |
| 2955 | + try: | |
| 2956 | + if len(j) > 20: | |
| 2957 | + print type(j), repr(j[:20]), '...(len {0})'.format(len(j)) | |
| 2958 | + else: | |
| 2959 | + print type(j), repr(j) | |
| 2960 | + except TypeError: | |
| 2961 | + print type(j), repr(j) | |
| 2781 | 2962 | |
| 2782 | 2963 | if __name__ == '__main__': |
| 2783 | 2964 | main() | ... | ... |
oletools/rtfobj.py
| ... | ... | @@ -15,7 +15,7 @@ http://www.decalage.info/python/oletools |
| 15 | 15 | |
| 16 | 16 | #=== LICENSE ================================================================= |
| 17 | 17 | |
| 18 | -# rtfobj is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) | |
| 18 | +# rtfobj is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info) | |
| 19 | 19 | # All rights reserved. |
| 20 | 20 | # |
| 21 | 21 | # Redistribution and use in source and binary forms, with or without modification, |
| ... | ... | @@ -46,8 +46,9 @@ http://www.decalage.info/python/oletools |
| 46 | 46 | # 2015-12-09 v0.03 PL: - configurable logging, CLI options |
| 47 | 47 | # - extract OLE 1.0 objects |
| 48 | 48 | # - extract files from OLE Package objects |
| 49 | +# 2016-04-01 v0.04 PL: - fixed logging output to use stdout instead of stderr | |
| 49 | 50 | |
| 50 | -__version__ = '0.03' | |
| 51 | +__version__ = '0.04' | |
| 51 | 52 | |
| 52 | 53 | #------------------------------------------------------------------------------ |
| 53 | 54 | # TODO: |
| ... | ... | @@ -338,8 +339,11 @@ if __name__ == '__main__': |
| 338 | 339 | parser.print_help() |
| 339 | 340 | sys.exit() |
| 340 | 341 | |
| 341 | - # setup logging to the console | |
| 342 | - logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') | |
| 342 | + # Setup logging to the console: | |
| 343 | + # here we use stdout instead of stderr by default, so that the output | |
| 344 | + # can be redirected properly. | |
| 345 | + logging.basicConfig(level=LOG_LEVELS[options.loglevel], stream=sys.stdout, | |
| 346 | + format='%(levelname)-8s %(message)s') | |
| 343 | 347 | # enable logging in the modules: |
| 344 | 348 | log.setLevel(logging.NOTSET) |
| 345 | 349 | oleobj.log.setLevel(logging.NOTSET) | ... | ... |