Commit 4c1151eac83b4c71633f55cdd765c2821b191eb9
1 parent
82503a9e
iterative json printing: print json data for each file;
re-use old debug-function print_json for proper purpose now copied from commits 99d5d56d65ee624022f8f9f5577f680f9f386660 and ccb7870314db6fbf1b9104606888ba67ab535a32 from different branch and even improved on them (no dict() necessary in print_json args
Showing
1 changed file
with
54 additions
and
48 deletions
oletools/olevba.py
| @@ -951,29 +951,6 @@ def is_printable(s): | @@ -951,29 +951,6 @@ def is_printable(s): | ||
| 951 | return set(s).issubset(_PRINTABLE_SET) | 951 | return set(s).issubset(_PRINTABLE_SET) |
| 952 | 952 | ||
| 953 | 953 | ||
| 954 | -def print_json(j): | ||
| 955 | - """ | ||
| 956 | - Print a dictionary, a list or any other object to stdout | ||
| 957 | - :param j: object to be printed | ||
| 958 | - :return: | ||
| 959 | - """ | ||
| 960 | - if isinstance(j, dict): | ||
| 961 | - for key, val in j.items(): | ||
| 962 | - print_json(key) | ||
| 963 | - print_json(val) | ||
| 964 | - elif isinstance(j, list): | ||
| 965 | - for elem in j: | ||
| 966 | - print_json(elem) | ||
| 967 | - else: | ||
| 968 | - try: | ||
| 969 | - if len(j) > 20: | ||
| 970 | - print type(j), repr(j[:20]), '...(len {0})'.format(len(j)) | ||
| 971 | - else: | ||
| 972 | - print type(j), repr(j) | ||
| 973 | - except TypeError: | ||
| 974 | - print type(j), repr(j) | ||
| 975 | - | ||
| 976 | - | ||
| 977 | def copytoken_help(decompressed_current, decompressed_chunk_start): | 954 | def copytoken_help(decompressed_current, decompressed_chunk_start): |
| 978 | """ | 955 | """ |
| 979 | compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help | 956 | compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help |
| @@ -1806,6 +1783,45 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): | @@ -1806,6 +1783,45 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): | ||
| 1806 | return json_obj | 1783 | return json_obj |
| 1807 | 1784 | ||
| 1808 | 1785 | ||
| 1786 | +_have_printed_json_start = False | ||
| 1787 | + | ||
| 1788 | +def print_json(json_dict=None, is_last=False, **json_parts): | ||
| 1789 | + """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1 | ||
| 1790 | + | ||
| 1791 | + can use in two ways: | ||
| 1792 | + (1) print_json(some_dict) | ||
| 1793 | + (2) print_json(key1=value1, key2=value2, ...) | ||
| 1794 | + | ||
| 1795 | + :param bool is_last: set to True only for very last entry to complete | ||
| 1796 | + the top-level json-list | ||
| 1797 | + """ | ||
| 1798 | + global _have_printed_json_start | ||
| 1799 | + | ||
| 1800 | + if json_dict and json_parts: | ||
| 1801 | + raise ValueError('Invalid json argument: want either single dict or ' | ||
| 1802 | + 'key=value parts but got both)') | ||
| 1803 | + elif (json_dict is not None) and (not isinstance(json_dict, dict)): | ||
| 1804 | + raise ValueError('Invalid json argument: want either single dict or ' | ||
| 1805 | + 'key=value parts but got {} instead of dict)' | ||
| 1806 | + .format(type(json_dict))) | ||
| 1807 | + if json_parts: | ||
| 1808 | + json_dict = json_parts | ||
| 1809 | + | ||
| 1810 | + if not _have_printed_json_start: | ||
| 1811 | + print '[' | ||
| 1812 | + _have_printed_json_start = True | ||
| 1813 | + | ||
| 1814 | + lines = json.dumps(json2ascii(json_dict), check_circular=False, | ||
| 1815 | + indent=4, ensure_ascii=False).splitlines() | ||
| 1816 | + for line in lines[:-1]: | ||
| 1817 | + print ' {}'.format(line) | ||
| 1818 | + if is_last: | ||
| 1819 | + print ' {}'.format(lines[-1]) # print last line without comma | ||
| 1820 | + print ']' | ||
| 1821 | + else: | ||
| 1822 | + print ' {},'.format(lines[-1]) # print last line with comma | ||
| 1823 | + | ||
| 1824 | + | ||
| 1809 | class VBA_Scanner(object): | 1825 | class VBA_Scanner(object): |
| 1810 | """ | 1826 | """ |
| 1811 | Class to scan the source code of a VBA module to find obfuscated strings, | 1827 | Class to scan the source code of a VBA module to find obfuscated strings, |
| @@ -2903,9 +2919,10 @@ def main(): | @@ -2903,9 +2919,10 @@ def main(): | ||
| 2903 | 2919 | ||
| 2904 | # provide info about tool and its version | 2920 | # provide info about tool and its version |
| 2905 | if options.output_mode == 'json': | 2921 | if options.output_mode == 'json': |
| 2906 | - json_results = [dict(script_name='olevba', version=__version__, | ||
| 2907 | - url='http://decalage.info/python/oletools', | ||
| 2908 | - type='MetaInformation'), ] | 2922 | + # prints opening [ |
| 2923 | + print_json(script_name='olevba', version=__version__, | ||
| 2924 | + url='http://decalage.info/python/oletools', | ||
| 2925 | + type='MetaInformation') | ||
| 2909 | else: | 2926 | else: |
| 2910 | print 'olevba %s - http://decalage.info/python/oletools' % __version__ | 2927 | print 'olevba %s - http://decalage.info/python/oletools' % __version__ |
| 2911 | 2928 | ||
| @@ -2957,9 +2974,8 @@ def main(): | @@ -2957,9 +2974,8 @@ def main(): | ||
| 2957 | return_code = RETURN_XGLOB_ERR if return_code == 0 \ | 2974 | return_code = RETURN_XGLOB_ERR if return_code == 0 \ |
| 2958 | else RETURN_SEVERAL_ERRS | 2975 | else RETURN_SEVERAL_ERRS |
| 2959 | if options.output_mode == 'json': | 2976 | if options.output_mode == 'json': |
| 2960 | - json_results.append(dict(file=filename, type='error', | ||
| 2961 | - error=type(data).__name__, | ||
| 2962 | - message=str(data))) | 2977 | + print_json(file=filename, type='error', |
| 2978 | + error=type(data).__name__, message=str(data)) | ||
| 2963 | continue | 2979 | continue |
| 2964 | 2980 | ||
| 2965 | try: | 2981 | try: |
| @@ -2983,7 +2999,7 @@ def main(): | @@ -2983,7 +2999,7 @@ def main(): | ||
| 2983 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, | 2999 | vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, |
| 2984 | deobfuscate=options.deobfuscate) | 3000 | deobfuscate=options.deobfuscate) |
| 2985 | elif options.output_mode == 'json': | 3001 | elif options.output_mode == 'json': |
| 2986 | - json_results.append( | 3002 | + print_json( |
| 2987 | vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, | 3003 | vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, |
| 2988 | display_code=options.display_code, | 3004 | display_code=options.display_code, |
| 2989 | hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | 3005 | hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, |
| @@ -2998,9 +3014,8 @@ def main(): | @@ -2998,9 +3014,8 @@ def main(): | ||
| 2998 | else: | 3014 | else: |
| 2999 | log.exception('Failed to open %s -- probably not supported!' % filename) | 3015 | log.exception('Failed to open %s -- probably not supported!' % filename) |
| 3000 | if options.output_mode == 'json': | 3016 | if options.output_mode == 'json': |
| 3001 | - json_results.append(dict(file=filename, type='error', | ||
| 3002 | - error=type(exc).__name__, | ||
| 3003 | - message=str(exc))) | 3017 | + print_json(file=filename, type='error', |
| 3018 | + error=type(exc).__name__, message=str(exc)) | ||
| 3004 | return_code = RETURN_OPEN_ERROR if return_code == 0 \ | 3019 | return_code = RETURN_OPEN_ERROR if return_code == 0 \ |
| 3005 | else RETURN_SEVERAL_ERRS | 3020 | else RETURN_SEVERAL_ERRS |
| 3006 | except ProcessingError as exc: | 3021 | except ProcessingError as exc: |
| @@ -3010,9 +3025,9 @@ def main(): | @@ -3010,9 +3025,9 @@ def main(): | ||
| 3010 | log.exception('Error processing file %s (%s)!' | 3025 | log.exception('Error processing file %s (%s)!' |
| 3011 | % (filename, exc.orig_exception)) | 3026 | % (filename, exc.orig_exception)) |
| 3012 | if options.output_mode == 'json': | 3027 | if options.output_mode == 'json': |
| 3013 | - json_results.append(dict(file=filename, type='error', | ||
| 3014 | - error=type(exc).__name__, | ||
| 3015 | - message=str(exc.orig_exception))) | 3028 | + print_json(file=filename, type='error', |
| 3029 | + error=type(exc).__name__, | ||
| 3030 | + message=str(exc.orig_exception)) | ||
| 3016 | return_code = RETURN_PARSE_ERROR if return_code == 0 \ | 3031 | return_code = RETURN_PARSE_ERROR if return_code == 0 \ |
| 3017 | else RETURN_SEVERAL_ERRS | 3032 | else RETURN_SEVERAL_ERRS |
| 3018 | finally: | 3033 | finally: |
| @@ -3033,18 +3048,9 @@ def main(): | @@ -3033,18 +3048,9 @@ def main(): | ||
| 3033 | deobfuscate=options.deobfuscate) | 3048 | deobfuscate=options.deobfuscate) |
| 3034 | 3049 | ||
| 3035 | if options.output_mode == 'json': | 3050 | if options.output_mode == 'json': |
| 3036 | - json_options = dict(check_circular=False, indent=4, ensure_ascii=False) | ||
| 3037 | - | ||
| 3038 | - # json.dump[s] cannot deal with unicode objects that are not properly | ||
| 3039 | - # encoded --> encode in own function: | ||
| 3040 | - json_results = json2ascii(json_results) | ||
| 3041 | - #print_json(json_results) | ||
| 3042 | - | ||
| 3043 | - # if False: # options.outfile: # (option currently commented out) | ||
| 3044 | - # with open(outfile, 'w') as write_handle: | ||
| 3045 | - # json.dump(write_handle, **json_options) | ||
| 3046 | - # else: | ||
| 3047 | - print json.dumps(json_results, **json_options) | 3051 | + # print last json entry (a last one without a comma) and closing ] |
| 3052 | + print_json(type='MetaInformation', return_code=return_code, | ||
| 3053 | + is_last=True) | ||
| 3048 | 3054 | ||
| 3049 | except Exception as exc: | 3055 | except Exception as exc: |
| 3050 | # some unexpected error, maybe some of the types caught in except clauses | 3056 | # some unexpected error, maybe some of the types caught in except clauses |