Commit 08c23bdecbf9c7ceae6046cf521e91d75f5703ef

Authored by Philippe Lagadec
1 parent 99fcb31b

olevba: new option --deobf instead of --no-deobfuscate (no deobfuscation by default)

Showing 2 changed files with 53 additions and 64 deletions
oletools/olevba.py
... ... @@ -166,12 +166,12 @@ https://github.com/unixfreak0037/officeparser
166 166 # 2016-03-08 v0.44 PL: - added VBA Form strings extraction and analysis
167 167 # 2016-03-04 v0.45 CH: - added JSON output (by Christian Herdtweck)
168 168 # 2016-03-16 CH: - added option --no-deobfuscate (temporary)
  169 +# 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate
169 170  
170   -__version__ = '0.45'
  171 +__version__ = '0.46'
171 172  
172 173 #------------------------------------------------------------------------------
173 174 # TODO:
174   -# + option --fast to disable VBA expressions parsing
175 175 # + setup logging (common with other oletools)
176 176 # + add xor bruteforcing like bbharvest
177 177 # + options -a and -c should imply -d
... ... @@ -902,6 +902,29 @@ def is_printable(s):
902 902 return set(s).issubset(_PRINTABLE_SET)
903 903  
904 904  
  905 +def print_json(j):
  906 + """
  907 + Print a dictionary, a list or any other object to stdout
  908 + :param j: object to be printed
  909 + :return:
  910 + """
  911 + if isinstance(j, dict):
  912 + for key, val in j.items():
  913 + print_json(key)
  914 + print_json(val)
  915 + elif isinstance(j, list):
  916 + for elem in j:
  917 + print_json(elem)
  918 + else:
  919 + try:
  920 + if len(j) > 20:
  921 + print type(j), repr(j[:20]), '...(len {0})'.format(len(j))
  922 + else:
  923 + print type(j), repr(j)
  924 + except TypeError:
  925 + print type(j), repr(j)
  926 +
  927 +
905 928 def copytoken_help(decompressed_current, decompressed_chunk_start):
906 929 """
907 930 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help
... ... @@ -1726,14 +1749,14 @@ class VBA_Scanner(object):
1726 1749 self.vba_strings = None
1727 1750  
1728 1751  
1729   - def scan(self, include_decoded_strings=False, skip_deobfuscate=False):
  1752 + def scan(self, include_decoded_strings=False, deobfuscate=False):
1730 1753 """
1731 1754 Analyze the provided VBA code to detect suspicious keywords,
1732 1755 auto-executable macros, IOC patterns, obfuscation patterns
1733 1756 such as hex-encoded strings.
1734 1757  
1735 1758 :param include_decoded_strings: bool, if True, all encoded strings will be included with their decoded content.
1736   - :param skip_deobfuscate: bool, if True do not try to deobfuscate code (faster but less secure)
  1759 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
1737 1760 :return: list of tuples (type, keyword, description)
1738 1761 (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String')
1739 1762 """
... ... @@ -1762,10 +1785,10 @@ class VBA_Scanner(object):
1762 1785 for encoded, decoded in self.dridex_strings:
1763 1786 self.code_dridex += '\n' + decoded
1764 1787 # Detect obfuscated strings in VBA expressions
1765   - if skip_deobfuscate:
1766   - self.vba_strings = []
1767   - else:
  1788 + if deobfuscate:
1768 1789 self.vba_strings = detect_vba_strings(self.code)
  1790 + else:
  1791 + self.vba_strings = []
1769 1792 for encoded, decoded in self.vba_strings:
1770 1793 self.code_vba += '\n' + decoded
1771 1794 results = []
... ... @@ -1849,7 +1872,7 @@ class VBA_Scanner(object):
1849 1872 len(self.dridex_strings), len(self.vba_strings))
1850 1873  
1851 1874  
1852   -def scan_vba(vba_code, include_decoded_strings, skip_deobfuscate=False):
  1875 +def scan_vba(vba_code, include_decoded_strings, deobfuscate=False):
1853 1876 """
1854 1877 Analyze the provided VBA code to detect suspicious keywords,
1855 1878 auto-executable macros, IOC patterns, obfuscation patterns
... ... @@ -1858,11 +1881,11 @@ def scan_vba(vba_code, include_decoded_strings, skip_deobfuscate=False):
1858 1881  
1859 1882 :param vba_code: str, VBA source code to be analyzed
1860 1883 :param include_decoded_strings: bool, if True all encoded strings will be included with their decoded content.
1861   - :param skip_deobfuscate: do not deobfuscate code; much faster but less secure
  1884 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
1862 1885 :return: list of tuples (type, keyword, description)
1863 1886 (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String')
1864 1887 """
1865   - return VBA_Scanner(vba_code).scan(include_decoded_strings, skip_deobfuscate)
  1888 + return VBA_Scanner(vba_code).scan(include_decoded_strings, deobfuscate)
1866 1889  
1867 1890  
1868 1891 #=== CLASSES =================================================================
... ... @@ -2338,7 +2361,7 @@ class VBA_Parser(object):
2338 2361  
2339 2362  
2340 2363  
2341   - def analyze_macros(self, show_decoded_strings=False, skip_deobfuscate=False):
  2364 + def analyze_macros(self, show_decoded_strings=False, deobfuscate=False):
2342 2365 """
2343 2366 runs extract_macros and analyze the source code of all VBA macros
2344 2367 found in the file.
... ... @@ -2357,7 +2380,7 @@ class VBA_Parser(object):
2357 2380 self.vba_code_all_modules += form_string + '\n'
2358 2381 # Analyze the whole code at once:
2359 2382 scanner = VBA_Scanner(self.vba_code_all_modules)
2360   - self.analysis_results = scanner.scan(show_decoded_strings, skip_deobfuscate)
  2383 + self.analysis_results = scanner.scan(show_decoded_strings, deobfuscate)
2361 2384 autoexec, suspicious, iocs, hexstrings, base64strings, dridex, vbastrings = scanner.scan_summary()
2362 2385 self.nb_autoexec += autoexec
2363 2386 self.nb_suspicious += suspicious
... ... @@ -2520,20 +2543,20 @@ class VBA_Parser_CLI(VBA_Parser):
2520 2543 pass
2521 2544  
2522 2545  
2523   - def print_analysis(self, show_decoded_strings=False, skip_deobfuscate=False):
  2546 + def print_analysis(self, show_decoded_strings=False, deobfuscate=False):
2524 2547 """
2525 2548 Analyze the provided VBA code, and print the results in a table
2526 2549  
2527 2550 :param vba_code: str, VBA source code to be analyzed
2528 2551 :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
2529   - :param skip_deobfuscate: bool, if True do not try to deobfuscate code (faster but less secure)
  2552 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
2530 2553 :return: None
2531 2554 """
2532 2555 # print a waiting message only if the output is not redirected to a file:
2533 2556 if sys.stdout.isatty():
2534 2557 print 'Analysis...\r',
2535 2558 sys.stdout.flush()
2536   - results = self.analyze_macros(show_decoded_strings, skip_deobfuscate)
  2559 + results = self.analyze_macros(show_decoded_strings, deobfuscate)
2537 2560 if results:
2538 2561 t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
2539 2562 t.align = 'l'
... ... @@ -2569,7 +2592,7 @@ class VBA_Parser_CLI(VBA_Parser):
2569 2592 def process_file(self, show_decoded_strings=False,
2570 2593 display_code=True, global_analysis=True, hide_attributes=True,
2571 2594 vba_code_only=False, show_deobfuscated_code=False,
2572   - skip_deobfuscate=False):
  2595 + deobfuscate=False):
2573 2596 """
2574 2597 Process a single file
2575 2598  
... ... @@ -2580,7 +2603,7 @@ class VBA_Parser_CLI(VBA_Parser):
2580 2603 :param global_analysis: bool, if True all modules are merged for a single analysis (default),
2581 2604 otherwise each module is analyzed separately (old behaviour)
2582 2605 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
2583   - :param skip_deobfuscate: bool, if True do not try to deobfuscate code (faster but less secure)
  2606 + :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
2584 2607 """
2585 2608 #TODO: replace print by writing to a provided output file (sys.stdout by default)
2586 2609 # fix conflicting parameters:
... ... @@ -2619,7 +2642,7 @@ class VBA_Parser_CLI(VBA_Parser):
2619 2642 print '- ' * 39
2620 2643 print 'ANALYSIS:'
2621 2644 # analyse each module's code, filtered to avoid false positives:
2622   - self.print_analysis(show_decoded_strings, skip_deobfuscate)
  2645 + self.print_analysis(show_decoded_strings, deobfuscate)
2623 2646 for (subfilename, stream_path, form_string) in self.extract_form_strings():
2624 2647 print '-' * 79
2625 2648 print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)
... ... @@ -2627,7 +2650,7 @@ class VBA_Parser_CLI(VBA_Parser):
2627 2650 print form_string
2628 2651 if global_analysis and not vba_code_only:
2629 2652 # analyse the code from all modules at once:
2630   - self.print_analysis(show_decoded_strings, skip_deobfuscate)
  2653 + self.print_analysis(show_decoded_strings, deobfuscate)
2631 2654 if show_deobfuscated_code:
2632 2655 print 'MACRO SOURCE CODE WITH DEOBFUSCATED VBA STRINGS (EXPERIMENTAL):\n\n'
2633 2656 print self.reveal()
... ... @@ -2720,7 +2743,7 @@ class VBA_Parser_CLI(VBA_Parser):
2720 2743 return result
2721 2744  
2722 2745  
2723   - def process_file_triage(self, show_decoded_strings=False, skip_deobfuscate=False):
  2746 + def process_file_triage(self, show_decoded_strings=False, deobfuscate=False):
2724 2747 """
2725 2748 Process a file in triage mode, showing only summary results on one line.
2726 2749 """
... ... @@ -2735,7 +2758,7 @@ class VBA_Parser_CLI(VBA_Parser):
2735 2758 print 'Analysis...\r',
2736 2759 sys.stdout.flush()
2737 2760 self.analyze_macros(show_decoded_strings=show_decoded_strings,
2738   - skip_deobfuscate=skip_deobfuscate)
  2761 + deobfuscate=deobfuscate)
2739 2762 flags = TYPE2TAG[self.type]
2740 2763 macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
2741 2764 if self.contains_macros: macros = 'M'
... ... @@ -2844,16 +2867,8 @@ def main():
2844 2867 help='display the macro source code after replacing all the obfuscated strings by their decoded content.')
2845 2868 parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
2846 2869 help="logging level debug/info/warning/error/critical (default=%default)")
2847   - parser.add_option('-n', '--no-deobfuscate', dest="skip_deobfuscate", action="store_true", default=False,
2848   - help="skip deobfuscation (much faster but less secure)")
2849   -
2850   - # Disabled options:
2851   - # parser.add_option("--each", action="store_false", dest="global_analysis", default=True,
2852   - # help='analyze each VBA module separately')
2853   - # parser.add_option("-i", "--input", dest='input', type='str', default=None,
2854   - # help='input file containing VBA source code to be analyzed (no parsing)')
2855   -
2856   - # TODO: --novba to disable VBA expressions parsing
  2870 + parser.add_option('--deobf', dest="deobfuscate", action="store_true", default=False,
  2871 + help="Attempt to deobfuscate VBA expressions (slow)")
2857 2872  
2858 2873 (options, args) = parser.parse_args()
2859 2874  
... ... @@ -2875,22 +2890,13 @@ def main():
2875 2890 # enable logging in the modules:
2876 2891 log.setLevel(logging.NOTSET)
2877 2892  
2878   - # if options.input:
2879   - # #TODO: remove this option
2880   - # raise NotImplementedError
2881   - # # input file provided with VBA source code to be analyzed directly:
2882   - # print 'Analysis of VBA source code from %s:' % options.input
2883   - # vba_code = open(options.input).read()
2884   - # print_analysis(vba_code, show_decoded_strings=options.show_decoded_strings)
2885   - # skip_deobfuscate=options.skip_deobfuscate)
2886   - # sys.exit()
2887   -
2888 2893 # Old display with number of items detected:
2889 2894 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
2890 2895 # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
2891 2896  
2892   - if options.skip_deobfuscate and options.show_deobfuscated_code:
2893   - logging.warning('Ignoring option --reveal since option -n / --no-deobfuscate is present!')
  2897 + # with the option --reveal, make sure --deobf is also enabled:
  2898 + if options.show_deobfuscated_code and not options.deobfuscate:
  2899 + options.deobfuscate = True
2894 2900  
2895 2901 # Column headers (do not know how many files there will be yet, so if no output_mode
2896 2902 # was specified, we will print triage for first file --> need these headers)
... ... @@ -2915,7 +2921,7 @@ def main():
2915 2921 display_code=options.display_code, global_analysis=True, #options.global_analysis,
2916 2922 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2917 2923 show_deobfuscated_code=options.show_deobfuscated_code,
2918   - skip_deobfuscate=options.skip_deobfuscate)
  2924 + deobfuscate=options.deobfuscate)
2919 2925 elif options.output_mode in ('triage', 'unspecified'):
2920 2926 # print container name when it changes:
2921 2927 if container != previous_container:
... ... @@ -2924,7 +2930,7 @@ def main():
2924 2930 previous_container = container
2925 2931 # summarized output for triage:
2926 2932 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
2927   - skip_deobfuscate=options.skip_deobfuscate)
  2933 + deobfuscate=options.deobfuscate)
2928 2934 elif options.output_mode == 'json':
2929 2935 json_results.append(
2930 2936 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
... ... @@ -2945,7 +2951,7 @@ def main():
2945 2951 display_code=options.display_code, global_analysis=True, #options.global_analysis,
2946 2952 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2947 2953 show_deobfuscated_code=options.show_deobfuscated_code,
2948   - skip_deobfuscate=options.skip_deobfuscate)
  2954 + deobfuscate=options.deobfuscate)
2949 2955  
2950 2956 if options.output_mode == 'json':
2951 2957 json_options = dict(check_circular=False, indent=4, ensure_ascii=False)
... ... @@ -2962,23 +2968,6 @@ def main():
2962 2968 print json.dumps(json_results, **json_options)
2963 2969  
2964 2970  
2965   -def print_json(j):
2966   - if isinstance(j, dict):
2967   - for key, val in j.items():
2968   - print_json(key)
2969   - print_json(val)
2970   - elif isinstance(j, list):
2971   - for elem in j:
2972   - print_json(elem)
2973   - else:
2974   - try:
2975   - if len(j) > 20:
2976   - print type(j), repr(j[:20]), '...(len {0})'.format(len(j))
2977   - else:
2978   - print type(j), repr(j)
2979   - except TypeError:
2980   - print type(j), repr(j)
2981   -
2982 2971 if __name__ == '__main__':
2983 2972 main()
2984 2973  
... ...
setup.py 100644 → 100755
... ... @@ -38,7 +38,7 @@ import sys, os, fnmatch
38 38 #--- METADATA -----------------------------------------------------------------
39 39  
40 40 name = "oletools"
41   -version = '0.45'
  41 +version = '0.46'
42 42 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
43 43 long_desc = open('oletools/README.rst').read()
44 44 author ="Philippe Lagadec"
... ...