Commit 08c23bdecbf9c7ceae6046cf521e91d75f5703ef

Authored by Philippe Lagadec
1 parent 99fcb31b

olevba: new option --deobf instead of --no-deobfuscate (no deobfuscation by default)

Showing 2 changed files with 53 additions and 64 deletions
oletools/olevba.py
@@ -166,12 +166,12 @@ https://github.com/unixfreak0037/officeparser @@ -166,12 +166,12 @@ https://github.com/unixfreak0037/officeparser
166 # 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis 166 # 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis
167 # 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck) 167 # 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck)
168 # 2016-03-16 CH: - added option --no-deobfuscate (temporary) 168 # 2016-03-16 CH: - added option --no-deobfuscate (temporary)
  169 +# 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate
169 170
170 -__version__ = '0.45' 171 +__version__ = '0.46'
171 172
172 #------------------------------------------------------------------------------ 173 #------------------------------------------------------------------------------
173 # TODO: 174 # TODO:
174 -# + option --fast to disable VBA expressions parsing  
175 # + setup logging (common with other oletools) 175 # + setup logging (common with other oletools)
176 # + add xor bruteforcing like bbharvest 176 # + add xor bruteforcing like bbharvest
177 # + options -a and -c should imply -d 177 # + options -a and -c should imply -d
@@ -902,6 +902,29 @@ def is_printable(s): @@ -902,6 +902,29 @@ def is_printable(s):
902 return set(s).issubset(_PRINTABLE_SET) 902 return set(s).issubset(_PRINTABLE_SET)
903 903
904 904
  905 +def print_json(j):
  906 + """
  907 + Print a dictionary, a list or any other object to stdout
  908 + :param j: object to be printed
  909 + :return:
  910 + """
  911 + if isinstance(j, dict):
  912 + for key, val in j.items():
  913 + print_json(key)
  914 + print_json(val)
  915 + elif isinstance(j, list):
  916 + for elem in j:
  917 + print_json(elem)
  918 + else:
  919 + try:
  920 + if len(j) > 20:
  921 + print type(j), repr(j[:20]), '...(len {0})'.format(len(j))
  922 + else:
  923 + print type(j), repr(j)
  924 + except TypeError:
  925 + print type(j), repr(j)
  926 +
  927 +
905 def copytoken_help(decompressed_current, decompressed_chunk_start): 928 def copytoken_help(decompressed_current, decompressed_chunk_start):
906 """ 929 """
907 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help 930 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help
@@ -1726,14 +1749,14 @@ class VBA_Scanner(object): @@ -1726,14 +1749,14 @@ class VBA_Scanner(object):
1726 self.vba_strings = None 1749 self.vba_strings = None
1727 1750
1728 1751
1729 - def scan(self, include_decoded_strings=False, skip_deobfuscate=False): 1752 + def scan(self, include_decoded_strings=False, deobfuscate=False):
1730 """ 1753 """
1731 Analyze the provided VBA code to detect suspicious keywords, 1754 Analyze the provided VBA code to detect suspicious keywords,
1732 auto-executable macros, IOC patterns, obfuscation patterns 1755 auto-executable macros, IOC patterns, obfuscation patterns
1733 such as hex-encoded strings. 1756 such as hex-encoded strings.
1734 1757
1735 :param include_decoded_strings: bool, if True, all encoded strings will be included with their decoded content. 1758 :param include_decoded_strings: bool, if True, all encoded strings will be included with their decoded content.
1736 - :param skip_deobfuscate: bool, if True do not try to deobfuscate code (faster but less secure) 1759 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
1737 :return: list of tuples (type, keyword, description) 1760 :return: list of tuples (type, keyword, description)
1738 (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String') 1761 (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String')
1739 """ 1762 """
@@ -1762,10 +1785,10 @@ class VBA_Scanner(object): @@ -1762,10 +1785,10 @@ class VBA_Scanner(object):
1762 for encoded, decoded in self.dridex_strings: 1785 for encoded, decoded in self.dridex_strings:
1763 self.code_dridex += '\n' + decoded 1786 self.code_dridex += '\n' + decoded
1764 # Detect obfuscated strings in VBA expressions 1787 # Detect obfuscated strings in VBA expressions
1765 - if skip_deobfuscate:  
1766 - self.vba_strings = []  
1767 - else: 1788 + if deobfuscate:
1768 self.vba_strings = detect_vba_strings(self.code) 1789 self.vba_strings = detect_vba_strings(self.code)
  1790 + else:
  1791 + self.vba_strings = []
1769 for encoded, decoded in self.vba_strings: 1792 for encoded, decoded in self.vba_strings:
1770 self.code_vba += '\n' + decoded 1793 self.code_vba += '\n' + decoded
1771 results = [] 1794 results = []
@@ -1849,7 +1872,7 @@ class VBA_Scanner(object): @@ -1849,7 +1872,7 @@ class VBA_Scanner(object):
1849 len(self.dridex_strings), len(self.vba_strings)) 1872 len(self.dridex_strings), len(self.vba_strings))
1850 1873
1851 1874
1852 -def scan_vba(vba_code, include_decoded_strings, skip_deobfuscate=False): 1875 +def scan_vba(vba_code, include_decoded_strings, deobfuscate=False):
1853 """ 1876 """
1854 Analyze the provided VBA code to detect suspicious keywords, 1877 Analyze the provided VBA code to detect suspicious keywords,
1855 auto-executable macros, IOC patterns, obfuscation patterns 1878 auto-executable macros, IOC patterns, obfuscation patterns
@@ -1858,11 +1881,11 @@ def scan_vba(vba_code, include_decoded_strings, skip_deobfuscate=False): @@ -1858,11 +1881,11 @@ def scan_vba(vba_code, include_decoded_strings, skip_deobfuscate=False):
1858 1881
1859 :param vba_code: str, VBA source code to be analyzed 1882 :param vba_code: str, VBA source code to be analyzed
1860 :param include_decoded_strings: bool, if True all encoded strings will be included with their decoded content. 1883 :param include_decoded_strings: bool, if True all encoded strings will be included with their decoded content.
1861 - :param skip_deobfuscate: do not deobfuscate code; much faster but less secure 1884 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
1862 :return: list of tuples (type, keyword, description) 1885 :return: list of tuples (type, keyword, description)
1863 (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String') 1886 (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String')
1864 """ 1887 """
1865 - return VBA_Scanner(vba_code).scan(include_decoded_strings, skip_deobfuscate) 1888 + return VBA_Scanner(vba_code).scan(include_decoded_strings, deobfuscate)
1866 1889
1867 1890
1868 #=== CLASSES ================================================================= 1891 #=== CLASSES =================================================================
@@ -2338,7 +2361,7 @@ class VBA_Parser(object): @@ -2338,7 +2361,7 @@ class VBA_Parser(object):
2338 2361
2339 2362
2340 2363
2341 - def analyze_macros(self, show_decoded_strings=False, skip_deobfuscate=False): 2364 + def analyze_macros(self, show_decoded_strings=False, deobfuscate=False):
2342 """ 2365 """
2343 runs extract_macros and analyze the source code of all VBA macros 2366 runs extract_macros and analyze the source code of all VBA macros
2344 found in the file. 2367 found in the file.
@@ -2357,7 +2380,7 @@ class VBA_Parser(object): @@ -2357,7 +2380,7 @@ class VBA_Parser(object):
2357 self.vba_code_all_modules += form_string + '\n' 2380 self.vba_code_all_modules += form_string + '\n'
2358 # Analyze the whole code at once: 2381 # Analyze the whole code at once:
2359 scanner = VBA_Scanner(self.vba_code_all_modules) 2382 scanner = VBA_Scanner(self.vba_code_all_modules)
2360 - self.analysis_results = scanner.scan(show_decoded_strings, skip_deobfuscate) 2383 + self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate)
2361 autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary() 2384 autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary()
2362 self.nb_autoexec += autoexec 2385 self.nb_autoexec += autoexec
2363 self.nb_suspicious += suspicious 2386 self.nb_suspicious += suspicious
@@ -2520,20 +2543,20 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2520,20 +2543,20 @@ class VBA_Parser_CLI(VBA_Parser):
2520 pass 2543 pass
2521 2544
2522 2545
2523 - def print_analysis(self, show_decoded_strings=False, skip_deobfuscate=False): 2546 + def print_analysis(self, show_decoded_strings=False, deobfuscate=False):
2524 """ 2547 """
2525 Analyze the provided VBA code, and print the results in a table 2548 Analyze the provided VBA code, and print the results in a table
2526 2549
2527 :param vba_code: str, VBA source code to be analyzed 2550 :param vba_code: str, VBA source code to be analyzed
2528 :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. 2551 :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
2529 - :param skip_deobfuscate: bool, if True do not try to deobfuscate code (faster but less secure) 2552 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
2530 :return: None 2553 :return: None
2531 """ 2554 """
2532 # print a waiting message only if the output is not redirected to a file: 2555 # print a waiting message only if the output is not redirected to a file:
2533 if sys.stdout.isatty(): 2556 if sys.stdout.isatty():
2534 print 'Analysis...\r', 2557 print 'Analysis...\r',
2535 sys.stdout.flush() 2558 sys.stdout.flush()
2536 - results = self.analyze_macros(show_decoded_strings, skip_deobfuscate) 2559 + results = self.analyze_macros(show_decoded_strings, deobfuscate)
2537 if results: 2560 if results:
2538 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description')) 2561 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
2539 t.align = 'l' 2562 t.align = 'l'
@@ -2569,7 +2592,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2569,7 +2592,7 @@ class VBA_Parser_CLI(VBA_Parser):
2569 def process_file(self, show_decoded_strings=False, 2592 def process_file(self, show_decoded_strings=False,
2570 display_code=True, global_analysis=True, hide_attributes=True, 2593 display_code=True, global_analysis=True, hide_attributes=True,
2571 vba_code_only=False, show_deobfuscated_code=False, 2594 vba_code_only=False, show_deobfuscated_code=False,
2572 - skip_deobfuscate=False): 2595 + deobfuscate=False):
2573 """ 2596 """
2574 Process a single file 2597 Process a single file
2575 2598
@@ -2580,7 +2603,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2580,7 +2603,7 @@ class VBA_Parser_CLI(VBA_Parser):
2580 :param global_analysis: bool, if True all modules are merged for a single analysis (default), 2603 :param global_analysis: bool, if True all modules are merged for a single analysis (default),
2581 otherwise each module is analyzed separately (old behaviour) 2604 otherwise each module is analyzed separately (old behaviour)
2582 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) 2605 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
2583 - :param skip_deobfuscate: bool, if True do not try to deobfuscate code (faster but less secure) 2606 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
2584 """ 2607 """
2585 #TODO: replace print by writing to a provided output file (sys.stdout by default) 2608 #TODO: replace print by writing to a provided output file (sys.stdout by default)
2586 # fix conflicting parameters: 2609 # fix conflicting parameters:
@@ -2619,7 +2642,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2619,7 +2642,7 @@ class VBA_Parser_CLI(VBA_Parser):
2619 print '- ' * 39 2642 print '- ' * 39
2620 print 'ANALYSIS:' 2643 print 'ANALYSIS:'
2621 # analyse each module's code, filtered to avoid false positives: 2644 # analyse each module's code, filtered to avoid false positives:
2622 - self.print_analysis(show_decoded_strings, skip_deobfuscate) 2645 + self.print_analysis(show_decoded_strings, deobfuscate)
2623 for (subfilename, stream_path, form_string) in self.extract_form_strings(): 2646 for (subfilename, stream_path, form_string) in self.extract_form_strings():
2624 print '-' * 79 2647 print '-' * 79
2625 print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path) 2648 print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)
@@ -2627,7 +2650,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2627,7 +2650,7 @@ class VBA_Parser_CLI(VBA_Parser):
2627 print form_string 2650 print form_string
2628 if global_analysis and not vba_code_only: 2651 if global_analysis and not vba_code_only:
2629 # analyse the code from all modules at once: 2652 # analyse the code from all modules at once:
2630 - self.print_analysis(show_decoded_strings, skip_deobfuscate) 2653 + self.print_analysis(show_decoded_strings, deobfuscate)
2631 if show_deobfuscated_code: 2654 if show_deobfuscated_code:
2632 print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n' 2655 print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n'
2633 print self.reveal() 2656 print self.reveal()
@@ -2720,7 +2743,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2720,7 +2743,7 @@ class VBA_Parser_CLI(VBA_Parser):
2720 return result 2743 return result
2721 2744
2722 2745
2723 - def process_file_triage(self, show_decoded_strings=False, skip_deobfuscate=False): 2746 + def process_file_triage(self, show_decoded_strings=False, deobfuscate=False):
2724 """ 2747 """
2725 Process a file in triage mode, showing only summary results on one line. 2748 Process a file in triage mode, showing only summary results on one line.
2726 """ 2749 """
@@ -2735,7 +2758,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2735,7 +2758,7 @@ class VBA_Parser_CLI(VBA_Parser):
2735 print 'Analysis...\r', 2758 print 'Analysis...\r',
2736 sys.stdout.flush() 2759 sys.stdout.flush()
2737 self.analyze_macros(show_decoded_strings=show_decoded_strings, 2760 self.analyze_macros(show_decoded_strings=show_decoded_strings,
2738 - skip_deobfuscate=skip_deobfuscate) 2761 + deobfuscate=deobfuscate)
2739 flags = TYPE2TAG[self.type] 2762 flags = TYPE2TAG[self.type]
2740 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' 2763 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
2741 if self.contains_macros: macros = 'M' 2764 if self.contains_macros: macros = 'M'
@@ -2844,16 +2867,8 @@ def main(): @@ -2844,16 +2867,8 @@ def main():
2844 help='display the macro source code after replacing all the obfuscated strings by their decoded content.') 2867 help='display the macro source code after replacing all the obfuscated strings by their decoded content.')
2845 parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, 2868 parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
2846 help="logging level debug/info/warning/error/critical (default=%default)") 2869 help="logging level debug/info/warning/error/critical (default=%default)")
2847 - parser.add_option('-n', '--no-deobfuscate', dest="skip_deobfuscate", action="store_true", default=False,  
2848 - help="skip deobfuscation (much faster but less secure)")  
2849 -  
2850 - # Disabled options:  
2851 - # parser.add_option("--each", action="store_false", dest="global_analysis", default=True,  
2852 - # help='analyze each VBA module separately')  
2853 - # parser.add_option("-i", "--input", dest='input', type='str', default=None,  
2854 - # help='input file containing VBA source code to be analyzed (no parsing)')  
2855 -  
2856 - # TODO: --novba to disable VBA expressions parsing 2870 + parser.add_option('--deobf', dest="deobfuscate", action="store_true", default=False,
  2871 + help="Attempt to deobfuscate VBA expressions (slow)")
2857 2872
2858 (options, args) = parser.parse_args() 2873 (options, args) = parser.parse_args()
2859 2874
@@ -2875,22 +2890,13 @@ def main(): @@ -2875,22 +2890,13 @@ def main():
2875 # enable logging in the modules: 2890 # enable logging in the modules:
2876 log.setLevel(logging.NOTSET) 2891 log.setLevel(logging.NOTSET)
2877 2892
2878 - # if options.input:  
2879 - # #TODO: remove this option  
2880 - # raise NotImplementedError  
2881 - # # input file provided with VBA source code to be analyzed directly:  
2882 - # print 'Analysis of VBA source code from %s:' % options.input  
2883 - # vba_code = open(options.input).read()  
2884 - # print_analysis(vba_code, show_decoded_strings=options.show_decoded_strings)  
2885 - # skip_deobfuscate=options.skip_deobfuscate)  
2886 - # sys.exit()  
2887 -  
2888 # Old display with number of items detected: 2893 # Old display with number of items detected:
2889 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') 2894 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
2890 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) 2895 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
2891 2896
2892 - if options.skip_deobfuscate and options.show_deobfuscated_code:  
2893 - logging.warning('Ignoring option --reveal since option -n / --no-deobfuscate is present!') 2897 + # with the option --reveal, make sure --deobf is also enabled:
  2898 + if options.show_deobfuscated_code and not options.deobfuscate:
  2899 + options.deobfuscate = True
2894 2900
2895 # Column headers (do not know how many files there will be yet, so if no output_mode 2901 # Column headers (do not know how many files there will be yet, so if no output_mode
2896 # was specified, we will print triage for first file --> need these headers) 2902 # was specified, we will print triage for first file --> need these headers)
@@ -2915,7 +2921,7 @@ def main(): @@ -2915,7 +2921,7 @@ def main():
2915 display_code=options.display_code, global_analysis=True, #options.global_analysis, 2921 display_code=options.display_code, global_analysis=True, #options.global_analysis,
2916 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, 2922 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2917 show_deobfuscated_code=options.show_deobfuscated_code, 2923 show_deobfuscated_code=options.show_deobfuscated_code,
2918 - skip_deobfuscate=options.skip_deobfuscate) 2924 + deobfuscate=options.deobfuscate)
2919 elif options.output_mode in ('triage', 'unspecified'): 2925 elif options.output_mode in ('triage', 'unspecified'):
2920 # print container name when it changes: 2926 # print container name when it changes:
2921 if container != previous_container: 2927 if container != previous_container:
@@ -2924,7 +2930,7 @@ def main(): @@ -2924,7 +2930,7 @@ def main():
2924 previous_container = container 2930 previous_container = container
2925 # summarized output for triage: 2931 # summarized output for triage:
2926 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, 2932 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
2927 - skip_deobfuscate=options.skip_deobfuscate) 2933 + deobfuscate=options.deobfuscate)
2928 elif options.output_mode == 'json': 2934 elif options.output_mode == 'json':
2929 json_results.append( 2935 json_results.append(
2930 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, 2936 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
@@ -2945,7 +2951,7 @@ def main(): @@ -2945,7 +2951,7 @@ def main():
2945 display_code=options.display_code, global_analysis=True, #options.global_analysis, 2951 display_code=options.display_code, global_analysis=True, #options.global_analysis,
2946 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, 2952 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2947 show_deobfuscated_code=options.show_deobfuscated_code, 2953 show_deobfuscated_code=options.show_deobfuscated_code,
2948 - skip_deobfuscate=options.skip_deobfuscate) 2954 + deobfuscate=options.deobfuscate)
2949 2955
2950 if options.output_mode == 'json': 2956 if options.output_mode == 'json':
2951 json_options = dict(check_circular=False, indent=4, ensure_ascii=False) 2957 json_options = dict(check_circular=False, indent=4, ensure_ascii=False)
@@ -2962,23 +2968,6 @@ def main(): @@ -2962,23 +2968,6 @@ def main():
2962 print json.dumps(json_results, **json_options) 2968 print json.dumps(json_results, **json_options)
2963 2969
2964 2970
2965 -def print_json(j):  
2966 - if isinstance(j, dict):  
2967 - for key, val in j.items():  
2968 - print_json(key)  
2969 - print_json(val)  
2970 - elif isinstance(j, list):  
2971 - for elem in j:  
2972 - print_json(elem)  
2973 - else:  
2974 - try:  
2975 - if len(j) > 20:  
2976 - print type(j), repr(j[:20]), '...(len {0})'.format(len(j))  
2977 - else:  
2978 - print type(j), repr(j)  
2979 - except TypeError:  
2980 - print type(j), repr(j)  
2981 -  
2982 if __name__ == '__main__': 2971 if __name__ == '__main__':
2983 main() 2972 main()
2984 2973
setup.py 100644 → 100755
@@ -38,7 +38,7 @@ import sys, os, fnmatch @@ -38,7 +38,7 @@ import sys, os, fnmatch
38 #--- METADATA ----------------------------------------------------------------- 38 #--- METADATA -----------------------------------------------------------------
39 39
40 name = "oletools" 40 name = "oletools"
41 -version = '0.45' 41 +version = '0.46'
42 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR" 42 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
43 long_desc = open('oletools/README.rst').read() 43 long_desc = open('oletools/README.rst').read()
44 author ="Philippe Lagadec" 44 author ="Philippe Lagadec"