Commit 2813b67da7278aa434940d816c2989804086a97b

Authored by Christian Herdtweck
1 parent aee53f45

Implemented json output for olevba, trying to stay as close as reasonable to original functions

Showing 1 changed file with 116 additions and 5 deletions
oletools/olevba.py
@@ -161,6 +161,7 @@ https://github.com/unixfreak0037/officeparser @@ -161,6 +161,7 @@ https://github.com/unixfreak0037/officeparser
161 # 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht 161 # 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht
162 # - fixed issue #32 by monkeypatching email.feedparser 162 # - fixed issue #32 by monkeypatching email.feedparser
163 # 2016-02-07 PL: - KeyboardInterrupt is now raised properly 163 # 2016-02-07 PL: - KeyboardInterrupt is now raised properly
  164 +# 2016-02-26 CH: - Add json output
164 165
165 __version__ = '0.42' 166 __version__ = '0.42'
166 167
@@ -212,6 +213,7 @@ import traceback @@ -212,6 +213,7 @@ import traceback
212 import zlib 213 import zlib
213 import email # for MHTML parsing 214 import email # for MHTML parsing
214 import string # for printable 215 import string # for printable
  216 +import json # for json output mode (argument --json)
215 217
216 # import lxml or ElementTree for XML parsing: 218 # import lxml or ElementTree for XML parsing:
217 try: 219 try:
@@ -2349,6 +2351,20 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2349,6 +2351,20 @@ class VBA_Parser_CLI(VBA_Parser):
2349 else: 2351 else:
2350 print 'No suspicious keyword or IOC found.' 2352 print 'No suspicious keyword or IOC found.'
2351 2353
  2354 + def print_analysis_json(self, show_decoded_strings=False):
  2355 + """
  2356 + Analyze the provided VBA code, and return the results in json format
  2357 +
  2358 + :param vba_code: str, VBA source code to be analyzed
  2359 + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
  2360 + :return: dict
  2361 + """
  2362 + # print a waiting message only if the output is not redirected to a file:
  2363 + if sys.stdout.isatty():
  2364 + print 'Analysis...\r',
  2365 + sys.stdout.flush()
  2366 + return [dict(type=kw_type, keyword=keyword, description=description)
  2367 + for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)]
2352 2368
2353 def process_file(self, show_decoded_strings=False, 2369 def process_file(self, show_decoded_strings=False,
2354 display_code=True, global_analysis=True, hide_attributes=True, 2370 display_code=True, global_analysis=True, hide_attributes=True,
@@ -2422,6 +2438,81 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2422,6 +2438,81 @@ class VBA_Parser_CLI(VBA_Parser):
2422 print '' 2438 print ''
2423 2439
2424 2440
  2441 + def process_file_json(self, show_decoded_strings=False,
  2442 + display_code=True, global_analysis=True, hide_attributes=True,
  2443 + vba_code_only=False, show_deobfuscated_code=False):
  2444 + """
  2445 + Process a single file
  2446 +
  2447 + every "show" or "print" here is to be translated as "add to json"
  2448 +
  2449 + :param filename: str, path and filename of file on disk, or within the container.
  2450 + :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
  2451 + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content.
  2452 + :param display_code: bool, if False VBA source code is not displayed (default True)
  2453 + :param global_analysis: bool, if True all modules are merged for a single analysis (default),
  2454 + otherwise each module is analyzed separately (old behaviour)
  2455 + :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
  2456 + """
  2457 + #TODO: fix conflicting parameters (?)
  2458 +
  2459 + if vba_code_only and not display_code:
  2460 + display_code = True
  2461 +
  2462 + result = {}
  2463 +
  2464 + if self.container:
  2465 + result['container'] = self.container
  2466 + else:
  2467 + result['container'] = None
  2468 + result['file'] = self.filename
  2469 + result['json_conversion_successful'] = False
  2470 + result['analysis'] = None
  2471 + result['code_deobfuscated'] = None
  2472 +
  2473 + try:
  2474 + #TODO: handle olefile errors, when an OLE file is malformed
  2475 + result['type'] = self.type
  2476 + macros = []
  2477 + if self.detect_vba_macros():
  2478 + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
  2479 + curr_macro = {}
  2480 + if hide_attributes:
  2481 + # hide attribute lines:
  2482 + vba_code_filtered = filter_vba(vba_code)
  2483 + else:
  2484 + vba_code_filtered = vba_code
  2485 +
  2486 + curr_macro['vba_filename'] = vba_filename
  2487 + curr_macro['subfilename'] = subfilename
  2488 + curr_macro['ole_stream'] = stream_path
  2489 + if display_code:
  2490 + curr_macro['code'] = vba_code_filtered.strip()
  2491 + if not global_analysis and not vba_code_only:
  2492 + # analyse each module's code, filtered to avoid false positives:
  2493 + #TODO: remove this option
  2494 + curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings)
  2495 + macros.append(curr_macro)
  2496 + if global_analysis and not vba_code_only:
  2497 + # analyse the code from all modules at once:
  2498 + result['analysis'] = self.print_analysis_json(show_decoded_strings)
  2499 + if show_deobfuscated_code:
  2500 + result['code_deobfuscated'] = self.reveal()
  2501 + result['macros'] = macros
  2502 + result['json_conversion_successful'] = True
  2503 + except KeyboardInterrupt:
  2504 + # do not ignore exceptions when the user presses Ctrl+C/Pause:
  2505 + raise
  2506 + except: #TypeError:
  2507 + #raise
  2508 + #TODO: print more info if debug mode
  2509 + #print sys.exc_value
  2510 + # display the exception with full stack trace for debugging, but do not stop:
  2511 + traceback.print_exc()
  2512 +
  2513 + return result
  2514 +
  2515 +
2425 def process_file_triage(self, show_decoded_strings=False): 2516 def process_file_triage(self, show_decoded_strings=False):
2426 """ 2517 """
2427 Process a file in triage mode, showing only summary results on one line. 2518 Process a file in triage mode, showing only summary results on one line.
@@ -2555,8 +2646,6 @@ def main(): @@ -2555,8 +2646,6 @@ def main():
2555 # TODO: --novba to disable VBA expressions parsing 2646 # TODO: --novba to disable VBA expressions parsing
2556 2647
2557 (options, args) = parser.parse_args() 2648 (options, args) = parser.parse_args()
2558 - print options.output_mode  
2559 - sys.exit()  
2560 2649
2561 # Print help if no arguments are passed 2650 # Print help if no arguments are passed
2562 if len(args) == 0: 2651 if len(args) == 0:
@@ -2564,8 +2653,13 @@ def main(): @@ -2564,8 +2653,13 @@ def main():
2564 parser.print_help() 2653 parser.print_help()
2565 sys.exit() 2654 sys.exit()
2566 2655
2567 - # print banner with version  
2568 - print 'olevba %s - http://decalage.info/python/oletools' % __version__ 2656 + # provide info about tool and its version
  2657 + if options.output_mode == 'json':
  2658 + json_results = [dict(script_name='olevba', version=__version__,
  2659 + url='http://decalage.info/python/oletools',
  2660 + type='MetaInformation'), ]
  2661 + else:
  2662 + print 'olevba %s - http://decalage.info/python/oletools' % __version__
2569 2663
2570 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') 2664 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s')
2571 # enable logging in the modules: 2665 # enable logging in the modules:
@@ -2616,7 +2710,11 @@ def main(): @@ -2616,7 +2710,11 @@ def main():
2616 # summarized output for triage: 2710 # summarized output for triage:
2617 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings) 2711 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings)
2618 elif options.output_mode == 'json': 2712 elif options.output_mode == 'json':
2619 - raise NotImplementedError('about to add json output!') 2713 + json_results.append(
  2714 + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
  2715 + display_code=options.display_code, global_analysis=True, #options.global_analysis,
  2716 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  2717 + show_deobfuscated_code=options.show_deobfuscated_code))
2620 else: # (should be impossible) 2718 else: # (should be impossible)
2621 raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) 2719 raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
2622 count += 1 2720 count += 1
@@ -2632,6 +2730,19 @@ def main(): @@ -2632,6 +2730,19 @@ def main():
2632 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, 2730 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2633 show_deobfuscated_code=options.show_deobfuscated_code) 2731 show_deobfuscated_code=options.show_deobfuscated_code)
2634 2732
  2733 + if options.output_mode == 'json':
  2734 + json_options = dict(check_circular=False, indent=4, ensure_ascii=False)
  2735 + # from python json doc for ensure_ascii=False: "unless [target for json
  2736 + # output] explicitly understands unicode (as in codecs.getwriter())
  2737 + # this is likely to cause an error."
  2738 + # If option --decode is given, data is likely to contain non-ascii data
  2739 +
  2740 + if False: # options.outfile: # (option currently commented out)
  2741 + with open(outfile, 'w') as write_handle:
  2742 + json.dump(write_handle, **json_options)
  2743 + else:
  2744 + print json.dumps(json_results, **json_options)
  2745 +
2635 2746
2636 if __name__ == '__main__': 2747 if __name__ == '__main__':
2637 main() 2748 main()