Commit 4539d6b15509fe2ac9982269f05a0d9b7dea482c

Authored by Philippe Lagadec
2 parents 3ac3fd00 20e6670e

olevba: added option --no-deobfuscate (temporary)

oletools/olevba.py
... ... @@ -164,8 +164,10 @@ https://github.com/unixfreak0037/officeparser
164 164 # 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr
165 165 # 2016-02-29 PL: - added Workbook_Activate to suspicious keywords
166 166 # 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis
  167 +# 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck)
  168 +# 2016-03-16 CH: - added option --no-deobfuscate (temporary)
167 169  
168   -__version__ = '0.44'
  170 +__version__ = '0.45'
169 171  
170 172 #------------------------------------------------------------------------------
171 173 # TODO:
... ... @@ -215,6 +217,7 @@ import traceback
215 217 import zlib
216 218 import email # for MHTML parsing
217 219 import string # for printable
  220 +import json # for json output mode (argument --json)
218 221  
219 222 # import lxml or ElementTree for XML parsing:
220 223 try:
... ... @@ -1655,6 +1658,42 @@ def detect_vba_strings(vba_code):
1655 1658 return results
1656 1659  
1657 1660  
  1661 +def json2ascii(json_obj, encoding='utf8', errors='replace'):
  1662 + """ ensure there is no unicode in json and all strings are safe to decode
  1663 +
  1664 + works recursively, decodes and re-encodes every string to/from unicode
  1665 + to ensure there will be no trouble in loading the dumped json output
  1666 + """
  1667 + if json_obj is None:
  1668 + pass
  1669 + elif isinstance(json_obj, (bool, int, float)):
  1670 + pass
  1671 + elif isinstance(json_obj, str):
  1672 + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors)
  1673 + if dencoded != str:
  1674 + logging.info('json2ascii: replaced: {0} (len {1})'
  1675 + .format(json_obj, len(json_obj)))
  1676 + logging.info('json2ascii: with: {0} (len {1})'
  1677 + .format(dencoded, len(dencoded)))
  1678 + return dencoded
  1679 + elif isinstance(json_obj, unicode):
  1680 + logging.info('json2ascii: replaced: {0}'
  1681 + .format(json_obj.encode(encoding, errors)))
  1682 + # cannot put original into logger
  1683 + # print 'original: ' json_obj
  1684 + return json_obj.encode(encoding, errors)
  1685 + elif isinstance(json_obj, dict):
  1686 + for key in json_obj:
  1687 + json_obj[key] = json2ascii(json_obj[key])
  1688 + elif isinstance(json_obj, (list,tuple)):
  1689 + for item in json_obj:
  1690 + item = json2ascii(item)
  1691 + else:
  1692 + logging.debug('unexpected type in json2ascii: {0} -- leave as is'
  1693 + .format(type(json_obj)))
  1694 + return json_obj
  1695 +
  1696 +
1658 1697 class VBA_Scanner(object):
1659 1698 """
1660 1699 Class to scan the source code of a VBA module to find obfuscated strings,
... ... @@ -2512,6 +2551,20 @@ class VBA_Parser_CLI(VBA_Parser):
2512 2551 else:
2513 2552 print 'No suspicious keyword or IOC found.'
2514 2553  
  2554 + def print_analysis_json(self, show_decoded_strings=False):
  2555 + """
  2556 + Analyze the provided VBA code, and return the results in json format
  2557 +
  2558 + :param vba_code: str, VBA source code to be analyzed
  2559 + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
  2560 + :return: dict
  2561 + """
  2562 + # print a waiting message only if the output is not redirected to a file:
  2563 + if sys.stdout.isatty():
  2564 + print 'Analysis...\r',
  2565 + sys.stdout.flush()
  2566 + return [dict(type=kw_type, keyword=keyword, description=description)
  2567 + for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)]
2515 2568  
2516 2569 def process_file(self, show_decoded_strings=False,
2517 2570 display_code=True, global_analysis=True, hide_attributes=True,
... ... @@ -2592,7 +2645,82 @@ class VBA_Parser_CLI(VBA_Parser):
2592 2645 print ''
2593 2646  
2594 2647  
2595   - def process_file_triage(self, show_decoded_strings=False, skip_deobfuscate=False):
  2648 + def process_file_json(self, show_decoded_strings=False,
  2649 + display_code=True, global_analysis=True, hide_attributes=True,
  2650 + vba_code_only=False, show_deobfuscated_code=False):
  2651 + """
  2652 + Process a single file
  2653 +
  2654 + every "show" or "print" here is to be translated as "add to json"
  2655 +
  2656 + :param filename: str, path and filename of file on disk, or within the container.
  2657 + :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
  2658 + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
  2659 + :param display_code: bool, if False VBA source code is not displayed (default True)
  2660 + :param global_analysis: bool, if True all modules are merged for a single analysis (default),
  2661 + otherwise each module is analyzed separately (old behaviour)
  2662 + :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
  2663 + """
  2664 + #TODO: fix conflicting parameters (?)
  2665 +
  2666 + if vba_code_only and not display_code:
  2667 + display_code = True
  2668 +
  2669 + result = {}
  2670 +
  2671 + if self.container:
  2672 + result['container'] = self.container
  2673 + else:
  2674 + result['container'] = None
  2675 + result['file'] = self.filename
  2676 + result['json_conversion_successful'] = False
  2677 + result['analysis'] = None
  2678 + result['code_deobfuscated'] = None
  2679 +
  2680 + try:
  2681 + #TODO: handle olefile errors, when an OLE file is malformed
  2682 + result['type'] = self.type
  2683 + macros = []
  2684 + if self.detect_vba_macros():
  2685 + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
  2686 + curr_macro = {}
  2687 + if hide_attributes:
  2688 + # hide attribute lines:
  2689 + vba_code_filtered = filter_vba(vba_code)
  2690 + else:
  2691 + vba_code_filtered = vba_code
  2692 +
  2693 + curr_macro['vba_filename'] = vba_filename
  2694 + curr_macro['subfilename'] = subfilename
  2695 + curr_macro['ole_stream'] = stream_path
  2696 + if display_code:
  2697 + curr_macro['code'] = vba_code_filtered.strip()
  2698 + if not global_analysis and not vba_code_only:
  2699 + # analyse each module's code, filtered to avoid false positives:
  2700 + #TODO: remove this option
  2701 + curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings)
  2702 + macros.append(curr_macro)
  2703 + if global_analysis and not vba_code_only:
  2704 + # analyse the code from all modules at once:
  2705 + result['analysis'] = self.print_analysis_json(show_decoded_strings)
  2706 + if show_deobfuscated_code:
  2707 + result['code_deobfuscated'] = self.reveal()
  2708 + result['macros'] = macros
  2709 + result['json_conversion_successful'] = True
  2710 + except KeyboardInterrupt:
  2711 + # do not ignore exceptions when the user presses Ctrl+C/Pause:
  2712 + raise
  2713 + except: #TypeError:
  2714 + #raise
  2715 + #TODO: print more info if debug mode
  2716 + #print sys.exc_value
  2717 + # display the exception with full stack trace for debugging, but do not stop:
  2718 + traceback.print_exc()
  2719 +
  2720 + return result
  2721 +
  2722 +
  2723 + def process_file_triage(self, show_decoded_strings=False):
2596 2724 """
2597 2725 Process a file in triage mode, showing only summary results on one line.
2598 2726 """
... ... @@ -2691,10 +2819,19 @@ def main():
2691 2819 help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
2692 2820 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
2693 2821 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
2694   - parser.add_option("-t", '--triage', action="store_true", dest="triage_mode",
2695   - help='triage mode, display results as a summary table (default for multiple files)')
2696   - parser.add_option("-d", '--detailed', action="store_true", dest="detailed_mode",
2697   - help='detailed mode, display full results (default for single file)')
  2822 + # output mode; could make this even simpler with add_option(type='choice') but that would make
  2823 + # cmd line interface incompatible...
  2824 + modes = optparse.OptionGroup(parser, title='Output mode (mutually exclusive)')
  2825 + modes.add_option("-t", '--triage', action="store_const", dest="output_mode",
  2826 + const='triage', default='unspecified',
  2827 + help='triage mode, display results as a summary table (default for multiple files)')
  2828 + modes.add_option("-d", '--detailed', action="store_const", dest="output_mode",
  2829 + const='detailed', default='unspecified',
  2830 + help='detailed mode, display full results (default for single file)')
  2831 + modes.add_option("-j", '--json', action="store_const", dest="output_mode",
  2832 + const='json', default='unspecified',
  2833 + help='json mode, detailed in json format (never default)')
  2834 + parser.add_option_group(modes)
2698 2835 parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True,
2699 2836 help='display only analysis results, not the macro source code')
2700 2837 parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False,
... ... @@ -2726,8 +2863,13 @@ def main():
2726 2863 parser.print_help()
2727 2864 sys.exit()
2728 2865  
2729   - # print banner with version
2730   - print 'olevba %s - http://decalage.info/python/oletools' % __version__
  2866 + # provide info about tool and its version
  2867 + if options.output_mode == 'json':
  2868 + json_results = [dict(script_name='olevba', version=__version__,
  2869 + url='http://decalage.info/python/oletools',
  2870 + type='MetaInformation'), ]
  2871 + else:
  2872 + print 'olevba %s - http://decalage.info/python/oletools' % __version__
2731 2873  
2732 2874 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
2733 2875 # enable logging in the modules:
... ... @@ -2750,8 +2892,9 @@ def main():
2750 2892 if options.skip_deobfuscate and options.show_deobfuscated_code:
2751 2893 logging.warning('Ignoring option --reveal since option -n / --no-deobfuscate is present!')
2752 2894  
2753   - # Column headers (except if detailed mode)
2754   - if not options.detailed_mode or options.triage_mode:
  2895 + # Column headers (do not know how many files there will be yet, so if no output_mode
  2896 + # was specified, we will print triage for first file --> need these headers)
  2897 + if options.output_mode in ('triage', 'unspecified'):
2755 2898 print '%-12s %-65s' % ('Flags', 'Filename')
2756 2899 print '%-12s %-65s' % ('-' * 11, '-' * 65)
2757 2900  
... ... @@ -2766,14 +2909,14 @@ def main():
2766 2909 continue
2767 2910 # Open the file
2768 2911 vba_parser = VBA_Parser_CLI(filename, data=data, container=container)
2769   - if options.detailed_mode and not options.triage_mode:
  2912 + if options.output_mode == 'detailed':
2770 2913 # fully detailed output
2771 2914 vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
2772 2915 display_code=options.display_code, global_analysis=True, #options.global_analysis,
2773 2916 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2774 2917 show_deobfuscated_code=options.show_deobfuscated_code,
2775 2918 skip_deobfuscate=options.skip_deobfuscate)
2776   - else:
  2919 + elif options.output_mode in ('triage', 'unspecified'):
2777 2920 # print container name when it changes:
2778 2921 if container != previous_container:
2779 2922 if container is not None:
... ... @@ -2782,20 +2925,59 @@ def main():
2782 2925 # summarized output for triage:
2783 2926 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
2784 2927 skip_deobfuscate=options.skip_deobfuscate)
  2928 + elif options.output_mode == 'json':
  2929 + json_results.append(
  2930 + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
  2931 + display_code=options.display_code, global_analysis=True, #options.global_analysis,
  2932 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  2933 + show_deobfuscated_code=options.show_deobfuscated_code))
  2934 + else: # (should be impossible)
  2935 + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
2785 2936 count += 1
2786   - if not options.detailed_mode or options.triage_mode:
  2937 + if options.output_mode == 'triage':
2787 2938 print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
2788 2939 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
2789 2940 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n'
2790 2941  
2791   - if count == 1 and not options.triage_mode and not options.detailed_mode:
2792   - # if options -t and -d were not specified and it's a single file, print details:
  2942 + if count == 1 and options.output_mode == 'unspecified':
  2943 + # if options -t, -d and -j were not specified and it's a single file, print details:
2793 2944 vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
2794 2945 display_code=options.display_code, global_analysis=True, #options.global_analysis,
2795 2946 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2796 2947 show_deobfuscated_code=options.show_deobfuscated_code,
2797 2948 skip_deobfuscate=options.skip_deobfuscate)
2798 2949  
  2950 + if options.output_mode == 'json':
  2951 + json_options = dict(check_circular=False, indent=4, ensure_ascii=False)
  2952 +
  2953 + # json.dump[s] cannot deal with unicode objects that are not properly
  2954 + # encoded --> encode in own function:
  2955 + json_results = json2ascii(json_results)
  2956 + #print_json(json_results)
  2957 +
  2958 + if False: # options.outfile: # (option currently commented out)
  2959 + with open(outfile, 'w') as write_handle:
  2960 + json.dump(write_handle, **json_options)
  2961 + else:
  2962 + print json.dumps(json_results, **json_options)
  2963 +
  2964 +
  2965 +def print_json(j):
  2966 + if isinstance(j, dict):
  2967 + for key, val in j.items():
  2968 + print_json(key)
  2969 + print_json(val)
  2970 + elif isinstance(j, list):
  2971 + for elem in j:
  2972 + print_json(elem)
  2973 + else:
  2974 + try:
  2975 + if len(j) > 20:
  2976 + print type(j), repr(j[:20]), '...(len {0})'.format(len(j))
  2977 + else:
  2978 + print type(j), repr(j)
  2979 + except TypeError:
  2980 + print type(j), repr(j)
2799 2981  
2800 2982 if __name__ == '__main__':
2801 2983 main()
... ...
oletools/rtfobj.py
... ... @@ -15,7 +15,7 @@ http://www.decalage.info/python/oletools
15 15  
16 16 #=== LICENSE =================================================================
17 17  
18   -# rtfobj is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info)
  18 +# rtfobj is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info)
19 19 # All rights reserved.
20 20 #
21 21 # Redistribution and use in source and binary forms, with or without modification,
... ... @@ -46,8 +46,9 @@ http://www.decalage.info/python/oletools
46 46 # 2015-12-09 v0.03 PL: - configurable logging, CLI options
47 47 # - extract OLE 1.0 objects
48 48 # - extract files from OLE Package objects
  49 +# 2016-04-01 v0.04 PL: - fixed logging output to use stdout instead of stderr
49 50  
50   -__version__ = '0.03'
  51 +__version__ = '0.04'
51 52  
52 53 #------------------------------------------------------------------------------
53 54 # TODO:
... ... @@ -338,8 +339,11 @@ if __name__ == '__main__':
338 339 parser.print_help()
339 340 sys.exit()
340 341  
341   - # setup logging to the console
342   - logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
  342 + # Setup logging to the console:
  343 + # here we use stdout instead of stderr by default, so that the output
  344 + # can be redirected properly.
  345 + logging.basicConfig(level=LOG_LEVELS[options.loglevel], stream=sys.stdout,
  346 + format='%(levelname)-8s %(message)s')
343 347 # enable logging in the modules:
344 348 log.setLevel(logging.NOTSET)
345 349 oleobj.log.setLevel(logging.NOTSET)
... ...
setup.py
... ... @@ -38,7 +38,7 @@ import sys, os, fnmatch
38 38 #--- METADATA -----------------------------------------------------------------
39 39  
40 40 name = "oletools"
41   -version = '0.43'
  41 +version = '0.44'
42 42 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
43 43 long_desc = open('oletools/README.rst').read()
44 44 author ="Philippe Lagadec"
... ...