Commit 2813b67da7278aa434940d816c2989804086a97b
1 parent
aee53f45
Implemented json output for olevba, trying to stay as close as reasonable to original functions
Showing
1 changed file
with
116 additions
and
5 deletions
oletools/olevba.py
| ... | ... | @@ -161,6 +161,7 @@ https://github.com/unixfreak0037/officeparser |
| 161 | 161 | # 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht |
| 162 | 162 | # - fixed issue #32 by monkeypatching email.feedparser |
| 163 | 163 | # 2016-02-07 PL: - KeyboardInterrupt is now raised properly |
| 164 | +# 2016-02-26 CH: - Add json output | |
| 164 | 165 | |
| 165 | 166 | __version__ = '0.42' |
| 166 | 167 | |
| ... | ... | @@ -212,6 +213,7 @@ import traceback |
| 212 | 213 | import zlib |
| 213 | 214 | import email # for MHTML parsing |
| 214 | 215 | import string # for printable |
| 216 | +import json # for json output mode (argument --json) | |
| 215 | 217 | |
| 216 | 218 | # import lxml or ElementTree for XML parsing: |
| 217 | 219 | try: |
| ... | ... | @@ -2349,6 +2351,20 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2349 | 2351 | else: |
| 2350 | 2352 | print 'No suspicious keyword or IOC found.' |
| 2351 | 2353 | |
| 2354 | + def print_analysis_json(self, show_decoded_strings=False): | |
| 2355 | + """ | |
| 2356 | + Analyze the provided VBA code, and return the results in json format | |
| 2357 | + | |
| 2358 | + :param vba_code: str, VBA source code to be analyzed | |
| 2359 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 2360 | + :return: dict | |
| 2361 | + """ | |
| 2362 | + # print a waiting message only if the output is not redirected to a file: | |
| 2363 | + if sys.stdout.isatty(): | |
| 2364 | + print 'Analysis...\r', | |
| 2365 | + sys.stdout.flush() | |
| 2366 | + return [dict(type=kw_type, keyword=keyword, description=description) | |
| 2367 | + for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)] | |
| 2352 | 2368 | |
| 2353 | 2369 | def process_file(self, show_decoded_strings=False, |
| 2354 | 2370 | display_code=True, global_analysis=True, hide_attributes=True, |
| ... | ... | @@ -2422,6 +2438,81 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2422 | 2438 | print '' |
| 2423 | 2439 | |
| 2424 | 2440 | |
| 2441 | + def process_file_json(self, show_decoded_strings=False, | |
| 2442 | + display_code=True, global_analysis=True, hide_attributes=True, | |
| 2443 | + vba_code_only=False, show_deobfuscated_code=False): | |
| 2444 | + """ | |
| 2445 | + Process a single file | |
| 2446 | + | |
| 2447 | + every "show" or "print" here is to be translated as "add to json" | |
| 2448 | + | |
| 2449 | + :param filename: str, path and filename of file on disk, or within the container. | |
| 2450 | + :param data: bytes, content of the file if it is in a container, None if it is a file on disk. | |
| 2451 | + :param show_decoded_strings: bool, if True hex-encoded strings will be displayed with their decoded content. | |
| 2452 | + :param display_code: bool, if False VBA source code is not displayed (default True) | |
| 2453 | + :param global_analysis: bool, if True all modules are merged for a single analysis (default), | |
| 2454 | + otherwise each module is analyzed separately (old behaviour) | |
| 2455 | + :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default) | |
| 2456 | + """ | |
| 2457 | + #TODO: fix conflicting parameters (?) | |
| 2458 | + | |
| 2459 | + if vba_code_only and not display_code: | |
| 2460 | + display_code = True | |
| 2461 | + | |
| 2462 | + result = {} | |
| 2463 | + | |
| 2464 | + if self.container: | |
| 2465 | + result['container'] = self.container | |
| 2466 | + else: | |
| 2467 | + result['container'] = None | |
| 2468 | + result['file'] = self.filename | |
| 2469 | + result['json_conversion_successful'] = False | |
| 2470 | + result['analysis'] = None | |
| 2471 | + result['code_deobfuscated'] = None | |
| 2472 | + | |
| 2473 | + try: | |
| 2474 | + #TODO: handle olefile errors, when an OLE file is malformed | |
| 2475 | + result['type'] = self.type | |
| 2476 | + macros = [] | |
| 2477 | + if self.detect_vba_macros(): | |
| 2478 | + for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): | |
| 2479 | + curr_macro = {} | |
| 2480 | + if hide_attributes: | |
| 2481 | + # hide attribute lines: | |
| 2482 | + vba_code_filtered = filter_vba(vba_code) | |
| 2483 | + else: | |
| 2484 | + vba_code_filtered = vba_code | |
| 2485 | + | |
| 2486 | + curr_macro['vba_filename'] = vba_filename | |
| 2487 | + curr_macro['subfilename'] = subfilename | |
| 2488 | + curr_macro['ole_stream'] = stream_path | |
| 2489 | + if display_code: | |
| 2490 | + curr_macro['code'] = vba_code_filtered.strip() | |
| 2491 | + if not global_analysis and not vba_code_only: | |
| 2492 | + # analyse each module's code, filtered to avoid false positives: | |
| 2493 | + #TODO: remove this option | |
| 2494 | + curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings) | |
| 2495 | + macros.append(curr_macro) | |
| 2496 | + if global_analysis and not vba_code_only: | |
| 2497 | + # analyse the code from all modules at once: | |
| 2498 | + result['analysis'] = self.print_analysis_json(show_decoded_strings) | |
| 2499 | + if show_deobfuscated_code: | |
| 2500 | + result['code_deobfuscated'] = self.reveal() | |
| 2501 | + result['macros'] = macros | |
| 2502 | + result['json_conversion_successful'] = True | |
| 2503 | + except KeyboardInterrupt: | |
| 2504 | + # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2505 | + raise | |
| 2506 | + except: #TypeError: | |
| 2507 | + #raise | |
| 2508 | + #TODO: print more info if debug mode | |
| 2509 | + #print sys.exc_value | |
| 2510 | + # display the exception with full stack trace for debugging, but do not stop: | |
| 2511 | + traceback.print_exc() | |
| 2512 | + | |
| 2513 | + return result | |
| 2514 | + | |
| 2515 | + | |
| 2425 | 2516 | def process_file_triage(self, show_decoded_strings=False): |
| 2426 | 2517 | """ |
| 2427 | 2518 | Process a file in triage mode, showing only summary results on one line. |
| ... | ... | @@ -2555,8 +2646,6 @@ def main(): |
| 2555 | 2646 | # TODO: --novba to disable VBA expressions parsing |
| 2556 | 2647 | |
| 2557 | 2648 | (options, args) = parser.parse_args() |
| 2558 | - print options.output_mode | |
| 2559 | - sys.exit() | |
| 2560 | 2649 | |
| 2561 | 2650 | # Print help if no arguments are passed |
| 2562 | 2651 | if len(args) == 0: |
| ... | ... | @@ -2564,8 +2653,13 @@ def main(): |
| 2564 | 2653 | parser.print_help() |
| 2565 | 2654 | sys.exit() |
| 2566 | 2655 | |
| 2567 | - # print banner with version | |
| 2568 | - print 'olevba %s - http://decalage.info/python/oletools' % __version__ | |
| 2656 | + # provide info about tool and its version | |
| 2657 | + if options.output_mode == 'json': | |
| 2658 | + json_results = [dict(script_name='olevba', version=__version__, | |
| 2659 | + url='http://decalage.info/python/oletools', | |
| 2660 | + type='MetaInformation'), ] | |
| 2661 | + else: | |
| 2662 | + print 'olevba %s - http://decalage.info/python/oletools' % __version__ | |
| 2569 | 2663 | |
| 2570 | 2664 | logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') |
| 2571 | 2665 | # enable logging in the modules: |
| ... | ... | @@ -2616,7 +2710,11 @@ def main(): |
| 2616 | 2710 | # summarized output for triage: |
| 2617 | 2711 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings) |
| 2618 | 2712 | elif options.output_mode == 'json': |
| 2619 | - raise NotImplementedError('about to add json output!') | |
| 2713 | + json_results.append( | |
| 2714 | + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, | |
| 2715 | + display_code=options.display_code, global_analysis=True, #options.global_analysis, | |
| 2716 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 2717 | + show_deobfuscated_code=options.show_deobfuscated_code)) | |
| 2620 | 2718 | else: # (should be impossible) |
| 2621 | 2719 | raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) |
| 2622 | 2720 | count += 1 |
| ... | ... | @@ -2632,6 +2730,19 @@ def main(): |
| 2632 | 2730 | hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, |
| 2633 | 2731 | show_deobfuscated_code=options.show_deobfuscated_code) |
| 2634 | 2732 | |
| 2733 | + if options.output_mode == 'json': | |
| 2734 | + json_options = dict(check_circular=False, indent=4, ensure_ascii=False) | |
| 2735 | + # from python json doc for ensure_ascii=False: "unless [target for json | |
| 2736 | + # output] explicitly understands unicode (as in codecs.getwriter()) | |
| 2737 | + # this is likely to cause an error." | |
| 2738 | + # If option --decode is given, data is likely to contain non-ascii data | |
| 2739 | + | |
| 2740 | + if False: # options.outfile: # (option currently commented out) | |
| 2741 | + with open(outfile, 'w') as write_handle: | |
| 2742 | + json.dump(write_handle, **json_options) | |
| 2743 | + else: | |
| 2744 | + print json.dumps(json_results, **json_options) | |
| 2745 | + | |
| 2635 | 2746 | |
| 2636 | 2747 | if __name__ == '__main__': |
| 2637 | 2748 | main() | ... | ... |