Commit 4539d6b15509fe2ac9982269f05a0d9b7dea482c

Authored by Philippe Lagadec
2 parents 3ac3fd00 20e6670e

olevba: added option --no-deobfuscate (temporary)

oletools/olevba.py
@@ -164,8 +164,10 @@ https://github.com/unixfreak0037/officeparser @@ -164,8 +164,10 @@ https://github.com/unixfreak0037/officeparser
164 # 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr 164 # 2016-02-20 v0.43 PL: - fixed issue #34 in the VBA parser and vba_chr
165 # 2016-02-29 PL: - added Workbook_Activate to suspicious keywords 165 # 2016-02-29 PL: - added Workbook_Activate to suspicious keywords
166 # 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis 166 # 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis
  167 +# 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck)
  168 +# 2016-03-16 CH: - added option --no-deobfuscate (temporary)
167 169
168 -__version__ = '0.44' 170 +__version__ = '0.45'
169 171
170 #------------------------------------------------------------------------------ 172 #------------------------------------------------------------------------------
171 # TODO: 173 # TODO:
@@ -215,6 +217,7 @@ import traceback @@ -215,6 +217,7 @@ import traceback
215 import zlib 217 import zlib
216 import email # for MHTML parsing 218 import email # for MHTML parsing
217 import string # for printable 219 import string # for printable
  220 +import json # for json output mode (argument --json)
218 221
219 # import lxml or ElementTree for XML parsing: 222 # import lxml or ElementTree for XML parsing:
220 try: 223 try:
@@ -1655,6 +1658,42 @@ def detect_vba_strings(vba_code): @@ -1655,6 +1658,42 @@ def detect_vba_strings(vba_code):
1655 return results 1658 return results
1656 1659
1657 1660
  1661 +def json2ascii(json_obj, encoding='utf8', errors='replace'):
  1662 + """ ensure there is no unicode in json and all strings are safe to decode
  1663 +
  1664 + works recursively, decodes and re-encodes every string to/from unicode
  1665 + to ensure there will be no trouble in loading the dumped json output
  1666 + """
  1667 + if json_obj is None:
  1668 + pass
  1669 + elif isinstance(json_obj, (bool, int, float)):
  1670 + pass
  1671 + elif isinstance(json_obj, str):
  1672 + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors)
  1673 + if dencoded != str:
  1674 + logging.info('json2ascii: replaced: {0} (len {1})'
  1675 + .format(json_obj, len(json_obj)))
  1676 + logging.info('json2ascii: with: {0} (len {1})'
  1677 + .format(dencoded, len(dencoded)))
  1678 + return dencoded
  1679 + elif isinstance(json_obj, unicode):
  1680 + logging.info('json2ascii: replaced: {0}'
  1681 + .format(json_obj.encode(encoding, errors)))
  1682 + # cannot put original into logger
  1683 + # print 'original: ' json_obj
  1684 + return json_obj.encode(encoding, errors)
  1685 + elif isinstance(json_obj, dict):
  1686 + for key in json_obj:
  1687 + json_obj[key] = json2ascii(json_obj[key])
  1688 + elif isinstance(json_obj, (list,tuple)):
  1689 + for item in json_obj:
  1690 + item = json2ascii(item)
  1691 + else:
  1692 + logging.debug('unexpected type in json2ascii: {0} -- leave as is'
  1693 + .format(type(json_obj)))
  1694 + return json_obj
  1695 +
  1696 +
1658 class VBA_Scanner(object): 1697 class VBA_Scanner(object):
1659 """ 1698 """
1660 Class to scan the source code of a VBA module to find obfuscated strings, 1699 Class to scan the source code of a VBA module to find obfuscated strings,
@@ -2512,6 +2551,20 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2512,6 +2551,20 @@ class VBA_Parser_CLI(VBA_Parser):
2512 else: 2551 else:
2513 print 'No suspicious keyword or IOC found.' 2552 print 'No suspicious keyword or IOC found.'
2514 2553
  2554 + def print_analysis_json(self, show_decoded_strings=False):
  2555 + """
  2556 + Analyze the provided VBA code, and return the results in json format
  2557 +
  2558 + :param vba_code: str, VBA source code to be analyzed
  2559 + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
  2560 + :return: dict
  2561 + """
  2562 + # print a waiting message only if the output is not redirected to a file:
  2563 + if sys.stdout.isatty():
  2564 + print 'Analysis...\r',
  2565 + sys.stdout.flush()
  2566 + return [dict(type=kw_type, keyword=keyword, description=description)
  2567 + for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)]
2515 2568
2516 def process_file(self, show_decoded_strings=False, 2569 def process_file(self, show_decoded_strings=False,
2517 display_code=True, global_analysis=True, hide_attributes=True, 2570 display_code=True, global_analysis=True, hide_attributes=True,
@@ -2592,7 +2645,82 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2592,7 +2645,82 @@ class VBA_Parser_CLI(VBA_Parser):
2592 print '' 2645 print ''
2593 2646
2594 2647
2595 - def process_file_triage(self, show_decoded_strings=False, skip_deobfuscate=False): 2648 + def process_file_json(self, show_decoded_strings=False,
  2649 + display_code=True, global_analysis=True, hide_attributes=True,
  2650 + vba_code_only=False, show_deobfuscated_code=False):
  2651 + """
  2652 + Process a single file
  2653 +
  2654 + every "show" or "print" here is to be translated as "add to json"
  2655 +
  2656 + :param filename: str, path and filename of file on disk, or within the container.
  2657 + :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
  2658 + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
  2659 + :param display_code: bool, if False VBA source code is not displayed (default True)
  2660 + :param global_analysis: bool, if True all modules are merged for a single analysis (default),
  2661 + otherwise each module is analyzed separately (old behaviour)
  2662 + :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
  2663 + """
  2664 + #TODO: fix conflicting parameters (?)
  2665 +
  2666 + if vba_code_only and not display_code:
  2667 + display_code = True
  2668 +
  2669 + result = {}
  2670 +
  2671 + if self.container:
  2672 + result['container'] = self.container
  2673 + else:
  2674 + result['container'] = None
  2675 + result['file'] = self.filename
  2676 + result['json_conversion_successful'] = False
  2677 + result['analysis'] = None
  2678 + result['code_deobfuscated'] = None
  2679 +
  2680 + try:
  2681 + #TODO: handle olefile errors, when an OLE file is malformed
  2682 + result['type'] = self.type
  2683 + macros = []
  2684 + if self.detect_vba_macros():
  2685 + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
  2686 + curr_macro = {}
  2687 + if hide_attributes:
  2688 + # hide attribute lines:
  2689 + vba_code_filtered = filter_vba(vba_code)
  2690 + else:
  2691 + vba_code_filtered = vba_code
  2692 +
  2693 + curr_macro['vba_filename'] = vba_filename
  2694 + curr_macro['subfilename'] = subfilename
  2695 + curr_macro['ole_stream'] = stream_path
  2696 + if display_code:
  2697 + curr_macro['code'] = vba_code_filtered.strip()
  2698 + if not global_analysis and not vba_code_only:
  2699 + # analyse each module's code, filtered to avoid false positives:
  2700 + #TODO: remove this option
  2701 + curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings)
  2702 + macros.append(curr_macro)
  2703 + if global_analysis and not vba_code_only:
  2704 + # analyse the code from all modules at once:
  2705 + result['analysis'] = self.print_analysis_json(show_decoded_strings)
  2706 + if show_deobfuscated_code:
  2707 + result['code_deobfuscated'] = self.reveal()
  2708 + result['macros'] = macros
  2709 + result['json_conversion_successful'] = True
  2710 + except KeyboardInterrupt:
  2711 + # do not ignore exceptions when the user presses Ctrl+C/Pause:
  2712 + raise
  2713 + except: #TypeError:
  2714 + #raise
  2715 + #TODO: print more info if debug mode
  2716 + #print sys.exc_value
  2717 + # display the exception with full stack trace for debugging, but do not stop:
  2718 + traceback.print_exc()
  2719 +
  2720 + return result
  2721 +
  2722 +
  2723 + def process_file_triage(self, show_decoded_strings=False):
2596 """ 2724 """
2597 Process a file in triage mode, showing only summary results on one line. 2725 Process a file in triage mode, showing only summary results on one line.
2598 """ 2726 """
@@ -2691,10 +2819,19 @@ def main(): @@ -2691,10 +2819,19 @@ def main():
2691 help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)') 2819 help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)')
2692 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', 2820 parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
2693 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') 2821 help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
2694 - parser.add_option("-t", '--triage', action="store_true", dest="triage_mode",  
2695 - help='triage mode, display results as a summary table (default for multiple files)')  
2696 - parser.add_option("-d", '--detailed', action="store_true", dest="detailed_mode",  
2697 - help='detailed mode, display full results (default for single file)') 2822 + # output mode; could make this even simpler with add_option(type='choice') but that would make
  2823 + # cmd line interface incompatible...
  2824 + modes = optparse.OptionGroup(parser, title='Output mode (mutually exclusive)')
  2825 + modes.add_option("-t", '--triage', action="store_const", dest="output_mode",
  2826 + const='triage', default='unspecified',
  2827 + help='triage mode, display results as a summary table (default for multiple files)')
  2828 + modes.add_option("-d", '--detailed', action="store_const", dest="output_mode",
  2829 + const='detailed', default='unspecified',
  2830 + help='detailed mode, display full results (default for single file)')
  2831 + modes.add_option("-j", '--json', action="store_const", dest="output_mode",
  2832 + const='json', default='unspecified',
  2833 + help='json mode, detailed in json format (never default)')
  2834 + parser.add_option_group(modes)
2698 parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True, 2835 parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True,
2699 help='display only analysis results, not the macro source code') 2836 help='display only analysis results, not the macro source code')
2700 parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False, 2837 parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False,
@@ -2726,8 +2863,13 @@ def main(): @@ -2726,8 +2863,13 @@ def main():
2726 parser.print_help() 2863 parser.print_help()
2727 sys.exit() 2864 sys.exit()
2728 2865
2729 - # print banner with version  
2730 - print 'olevba %s - http://decalage.info/python/oletools' % __version__ 2866 + # provide info about tool and its version
  2867 + if options.output_mode == 'json':
  2868 + json_results = [dict(script_name='olevba', version=__version__,
  2869 + url='http://decalage.info/python/oletools',
  2870 + type='MetaInformation'), ]
  2871 + else:
  2872 + print 'olevba %s - http://decalage.info/python/oletools' % __version__
2731 2873
2732 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') 2874 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
2733 # enable logging in the modules: 2875 # enable logging in the modules:
@@ -2750,8 +2892,9 @@ def main(): @@ -2750,8 +2892,9 @@ def main():
2750 if options.skip_deobfuscate and options.show_deobfuscated_code: 2892 if options.skip_deobfuscate and options.show_deobfuscated_code:
2751 logging.warning('Ignoring option --reveal since option -n / --no-deobfuscate is present!') 2893 logging.warning('Ignoring option --reveal since option -n / --no-deobfuscate is present!')
2752 2894
2753 - # Column headers (except if detailed mode)  
2754 - if not options.detailed_mode or options.triage_mode: 2895 + # Column headers (do not know how many files there will be yet, so if no output_mode
  2896 + # was specified, we will print triage for first file --> need these headers)
  2897 + if options.output_mode in ('triage', 'unspecified'):
2755 print '%-12s %-65s' % ('Flags', 'Filename') 2898 print '%-12s %-65s' % ('Flags', 'Filename')
2756 print '%-12s %-65s' % ('-' * 11, '-' * 65) 2899 print '%-12s %-65s' % ('-' * 11, '-' * 65)
2757 2900
@@ -2766,14 +2909,14 @@ def main(): @@ -2766,14 +2909,14 @@ def main():
2766 continue 2909 continue
2767 # Open the file 2910 # Open the file
2768 vba_parser = VBA_Parser_CLI(filename, data=data, container=container) 2911 vba_parser = VBA_Parser_CLI(filename, data=data, container=container)
2769 - if options.detailed_mode and not options.triage_mode: 2912 + if options.output_mode == 'detailed':
2770 # fully detailed output 2913 # fully detailed output
2771 vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, 2914 vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
2772 display_code=options.display_code, global_analysis=True, #options.global_analysis, 2915 display_code=options.display_code, global_analysis=True, #options.global_analysis,
2773 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, 2916 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2774 show_deobfuscated_code=options.show_deobfuscated_code, 2917 show_deobfuscated_code=options.show_deobfuscated_code,
2775 skip_deobfuscate=options.skip_deobfuscate) 2918 skip_deobfuscate=options.skip_deobfuscate)
2776 - else: 2919 + elif options.output_mode in ('triage', 'unspecified'):
2777 # print container name when it changes: 2920 # print container name when it changes:
2778 if container != previous_container: 2921 if container != previous_container:
2779 if container is not None: 2922 if container is not None:
@@ -2782,20 +2925,59 @@ def main(): @@ -2782,20 +2925,59 @@ def main():
2782 # summarized output for triage: 2925 # summarized output for triage:
2783 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, 2926 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
2784 skip_deobfuscate=options.skip_deobfuscate) 2927 skip_deobfuscate=options.skip_deobfuscate)
  2928 + elif options.output_mode == 'json':
  2929 + json_results.append(
  2930 + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
  2931 + display_code=options.display_code, global_analysis=True, #options.global_analysis,
  2932 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  2933 + show_deobfuscated_code=options.show_deobfuscated_code))
  2934 + else: # (should be impossible)
  2935 + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
2785 count += 1 2936 count += 1
2786 - if not options.detailed_mode or options.triage_mode: 2937 + if options.output_mode == 'triage':
2787 print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ 2938 print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
2788 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ 2939 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
2789 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' 2940 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n'
2790 2941
2791 - if count == 1 and not options.triage_mode and not options.detailed_mode:  
2792 - # if options -t and -d were not specified and it's a single file, print details: 2942 + if count == 1 and options.output_mode == 'unspecified':
  2943 + # if options -t, -d and -j were not specified and it's a single file, print details:
2793 vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, 2944 vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
2794 display_code=options.display_code, global_analysis=True, #options.global_analysis, 2945 display_code=options.display_code, global_analysis=True, #options.global_analysis,
2795 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, 2946 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2796 show_deobfuscated_code=options.show_deobfuscated_code, 2947 show_deobfuscated_code=options.show_deobfuscated_code,
2797 skip_deobfuscate=options.skip_deobfuscate) 2948 skip_deobfuscate=options.skip_deobfuscate)
2798 2949
  2950 + if options.output_mode == 'json':
  2951 + json_options = dict(check_circular=False, indent=4, ensure_ascii=False)
  2952 +
  2953 + # json.dump[s] cannot deal with unicode objects that are not properly
  2954 + # encoded --> encode in own function:
  2955 + json_results = json2ascii(json_results)
  2956 + #print_json(json_results)
  2957 +
  2958 + if False: # options.outfile: # (option currently commented out)
  2959 + with open(outfile, 'w') as write_handle:
  2960 + json.dump(write_handle, **json_options)
  2961 + else:
  2962 + print json.dumps(json_results, **json_options)
  2963 +
  2964 +
  2965 +def print_json(j):
  2966 + if isinstance(j, dict):
  2967 + for key, val in j.items():
  2968 + print_json(key)
  2969 + print_json(val)
  2970 + elif isinstance(j, list):
  2971 + for elem in j:
  2972 + print_json(elem)
  2973 + else:
  2974 + try:
  2975 + if len(j) > 20:
  2976 + print type(j), repr(j[:20]), '...(len {0})'.format(len(j))
  2977 + else:
  2978 + print type(j), repr(j)
  2979 + except TypeError:
  2980 + print type(j), repr(j)
2799 2981
2800 if __name__ == '__main__': 2982 if __name__ == '__main__':
2801 main() 2983 main()
oletools/rtfobj.py
@@ -15,7 +15,7 @@ http://www.decalage.info/python/oletools @@ -15,7 +15,7 @@ http://www.decalage.info/python/oletools
15 15
16 #=== LICENSE ================================================================= 16 #=== LICENSE =================================================================
17 17
18 -# rtfobj is copyright (c) 2012-2015, Philippe Lagadec (http://www.decalage.info) 18 +# rtfobj is copyright (c) 2012-2016, Philippe Lagadec (http://www.decalage.info)
19 # All rights reserved. 19 # All rights reserved.
20 # 20 #
21 # Redistribution and use in source and binary forms, with or without modification, 21 # Redistribution and use in source and binary forms, with or without modification,
@@ -46,8 +46,9 @@ http://www.decalage.info/python/oletools @@ -46,8 +46,9 @@ http://www.decalage.info/python/oletools
46 # 2015-12-09 v0.03 PL: - configurable logging, CLI options 46 # 2015-12-09 v0.03 PL: - configurable logging, CLI options
47 # - extract OLE 1.0 objects 47 # - extract OLE 1.0 objects
48 # - extract files from OLE Package objects 48 # - extract files from OLE Package objects
  49 +# 2016-04-01 v0.04 PL: - fixed logging output to use stdout instead of stderr
49 50
50 -__version__ = '0.03' 51 +__version__ = '0.04'
51 52
52 #------------------------------------------------------------------------------ 53 #------------------------------------------------------------------------------
53 # TODO: 54 # TODO:
@@ -338,8 +339,11 @@ if __name__ == '__main__': @@ -338,8 +339,11 @@ if __name__ == '__main__':
338 parser.print_help() 339 parser.print_help()
339 sys.exit() 340 sys.exit()
340 341
341 - # setup logging to the console  
342 - logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') 342 + # Setup logging to the console:
  343 + # here we use stdout instead of stderr by default, so that the output
  344 + # can be redirected properly.
  345 + logging.basicConfig(level=LOG_LEVELS[options.loglevel], stream=sys.stdout,
  346 + format='%(levelname)-8s %(message)s')
343 # enable logging in the modules: 347 # enable logging in the modules:
344 log.setLevel(logging.NOTSET) 348 log.setLevel(logging.NOTSET)
345 oleobj.log.setLevel(logging.NOTSET) 349 oleobj.log.setLevel(logging.NOTSET)
setup.py
@@ -38,7 +38,7 @@ import sys, os, fnmatch @@ -38,7 +38,7 @@ import sys, os, fnmatch
38 #--- METADATA ----------------------------------------------------------------- 38 #--- METADATA -----------------------------------------------------------------
39 39
40 name = "oletools" 40 name = "oletools"
41 -version = '0.43' 41 +version = '0.44'
42 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" 42 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
43 long_desc = open('oletools/README.rst').read() 43 long_desc = open('oletools/README.rst').read()
44 author ="Philippe Lagadec" 44 author ="Philippe Lagadec"