Commit 4c1151eac83b4c71633f55cdd765c2821b191eb9

Authored by Christian Herdtweck
1 parent 82503a9e

iterative json printing: print json data for each file;

re-use old debug-function print_json for proper purpose now

copied from commits 99d5d56d65ee624022f8f9f5577f680f9f386660 and
ccb7870314db6fbf1b9104606888ba67ab535a32 from different branch
and even improved on them (no dict() necessary in print_json args
Showing 1 changed file with 54 additions and 48 deletions
oletools/olevba.py
@@ -951,29 +951,6 @@ def is_printable(s): @@ -951,29 +951,6 @@ def is_printable(s):
951 return set(s).issubset(_PRINTABLE_SET) 951 return set(s).issubset(_PRINTABLE_SET)
952 952
953 953
954 -def print_json(j):  
955 - """  
956 - Print a dictionary, a list or any other object to stdout  
957 - :param j: object to be printed  
958 - :return:  
959 - """  
960 - if isinstance(j, dict):  
961 - for key, val in j.items():  
962 - print_json(key)  
963 - print_json(val)  
964 - elif isinstance(j, list):  
965 - for elem in j:  
966 - print_json(elem)  
967 - else:  
968 - try:  
969 - if len(j) > 20:  
970 - print type(j), repr(j[:20]), '...(len {0})'.format(len(j))  
971 - else:  
972 - print type(j), repr(j)  
973 - except TypeError:  
974 - print type(j), repr(j)  
975 -  
976 -  
977 def copytoken_help(decompressed_current, decompressed_chunk_start): 954 def copytoken_help(decompressed_current, decompressed_chunk_start):
978 """ 955 """
979 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help 956 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help
@@ -1806,6 +1783,45 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): @@ -1806,6 +1783,45 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'):
1806 return json_obj 1783 return json_obj
1807 1784
1808 1785
  1786 +_have_printed_json_start = False
  1787 +
  1788 +def print_json(json_dict=None, is_last=False, **json_parts):
  1789 + """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1
  1790 +
  1791 + can use in two ways:
  1792 + (1) print_json(some_dict)
  1793 + (2) print_json(key1=value1, key2=value2, ...)
  1794 +
  1795 + :param bool is_last: set to True only for very last entry to complete
  1796 + the top-level json-list
  1797 + """
  1798 + global _have_printed_json_start
  1799 +
  1800 + if json_dict and json_parts:
  1801 + raise ValueError('Invalid json argument: want either single dict or '
  1802 + 'key=value parts but got both)')
  1803 + elif (json_dict is not None) and (not isinstance(json_dict, dict)):
  1804 + raise ValueError('Invalid json argument: want either single dict or '
  1805 + 'key=value parts but got {} instead of dict)'
  1806 + .format(type(json_dict)))
  1807 + if json_parts:
  1808 + json_dict = json_parts
  1809 +
  1810 + if not _have_printed_json_start:
  1811 + print '['
  1812 + _have_printed_json_start = True
  1813 +
  1814 + lines = json.dumps(json2ascii(json_dict), check_circular=False,
  1815 + indent=4, ensure_ascii=False).splitlines()
  1816 + for line in lines[:-1]:
  1817 + print ' {}'.format(line)
  1818 + if is_last:
  1819 + print ' {}'.format(lines[-1]) # print last line without comma
  1820 + print ']'
  1821 + else:
  1822 + print ' {},'.format(lines[-1]) # print last line with comma
  1823 +
  1824 +
1809 class VBA_Scanner(object): 1825 class VBA_Scanner(object):
1810 """ 1826 """
1811 Class to scan the source code of a VBA module to find obfuscated strings, 1827 Class to scan the source code of a VBA module to find obfuscated strings,
@@ -2903,9 +2919,10 @@ def main(): @@ -2903,9 +2919,10 @@ def main():
2903 2919
2904 # provide info about tool and its version 2920 # provide info about tool and its version
2905 if options.output_mode == 'json': 2921 if options.output_mode == 'json':
2906 - json_results = [dict(script_name='olevba', version=__version__,  
2907 - url='http://decalage.info/python/oletools',  
2908 - type='MetaInformation'), ] 2922 + # prints opening [
  2923 + print_json(script_name='olevba', version=__version__,
  2924 + url='http://decalage.info/python/oletools',
  2925 + type='MetaInformation')
2909 else: 2926 else:
2910 print 'olevba %s - http://decalage.info/python/oletools' % __version__ 2927 print 'olevba %s - http://decalage.info/python/oletools' % __version__
2911 2928
@@ -2957,9 +2974,8 @@ def main(): @@ -2957,9 +2974,8 @@ def main():
2957 return_code = RETURN_XGLOB_ERR if return_code == 0 \ 2974 return_code = RETURN_XGLOB_ERR if return_code == 0 \
2958 else RETURN_SEVERAL_ERRS 2975 else RETURN_SEVERAL_ERRS
2959 if options.output_mode == 'json': 2976 if options.output_mode == 'json':
2960 - json_results.append(dict(file=filename, type='error',  
2961 - error=type(data).__name__,  
2962 - message=str(data))) 2977 + print_json(file=filename, type='error',
  2978 + error=type(data).__name__, message=str(data))
2963 continue 2979 continue
2964 2980
2965 try: 2981 try:
@@ -2983,7 +2999,7 @@ def main(): @@ -2983,7 +2999,7 @@ def main():
2983 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, 2999 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
2984 deobfuscate=options.deobfuscate) 3000 deobfuscate=options.deobfuscate)
2985 elif options.output_mode == 'json': 3001 elif options.output_mode == 'json':
2986 - json_results.append( 3002 + print_json(
2987 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, 3003 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
2988 display_code=options.display_code, 3004 display_code=options.display_code,
2989 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, 3005 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
@@ -2998,9 +3014,8 @@ def main(): @@ -2998,9 +3014,8 @@ def main():
2998 else: 3014 else:
2999 log.exception('Failed to open %s -- probably not supported!' % filename) 3015 log.exception('Failed to open %s -- probably not supported!' % filename)
3000 if options.output_mode == 'json': 3016 if options.output_mode == 'json':
3001 - json_results.append(dict(file=filename, type='error',  
3002 - error=type(exc).__name__,  
3003 - message=str(exc))) 3017 + print_json(file=filename, type='error',
  3018 + error=type(exc).__name__, message=str(exc))
3004 return_code = RETURN_OPEN_ERROR if return_code == 0 \ 3019 return_code = RETURN_OPEN_ERROR if return_code == 0 \
3005 else RETURN_SEVERAL_ERRS 3020 else RETURN_SEVERAL_ERRS
3006 except ProcessingError as exc: 3021 except ProcessingError as exc:
@@ -3010,9 +3025,9 @@ def main(): @@ -3010,9 +3025,9 @@ def main():
3010 log.exception('Error processing file %s (%s)!' 3025 log.exception('Error processing file %s (%s)!'
3011 % (filename, exc.orig_exception)) 3026 % (filename, exc.orig_exception))
3012 if options.output_mode == 'json': 3027 if options.output_mode == 'json':
3013 - json_results.append(dict(file=filename, type='error',  
3014 - error=type(exc).__name__,  
3015 - message=str(exc.orig_exception))) 3028 + print_json(file=filename, type='error',
  3029 + error=type(exc).__name__,
  3030 + message=str(exc.orig_exception))
3016 return_code = RETURN_PARSE_ERROR if return_code == 0 \ 3031 return_code = RETURN_PARSE_ERROR if return_code == 0 \
3017 else RETURN_SEVERAL_ERRS 3032 else RETURN_SEVERAL_ERRS
3018 finally: 3033 finally:
@@ -3033,18 +3048,9 @@ def main(): @@ -3033,18 +3048,9 @@ def main():
3033 deobfuscate=options.deobfuscate) 3048 deobfuscate=options.deobfuscate)
3034 3049
3035 if options.output_mode == 'json': 3050 if options.output_mode == 'json':
3036 - json_options = dict(check_circular=False, indent=4, ensure_ascii=False)  
3037 -  
3038 - # json.dump[s] cannot deal with unicode objects that are not properly  
3039 - # encoded --> encode in own function:  
3040 - json_results = json2ascii(json_results)  
3041 - #print_json(json_results)  
3042 -  
3043 - # if False: # options.outfile: # (option currently commented out)  
3044 - # with open(outfile, 'w') as write_handle:  
3045 - # json.dump(write_handle, **json_options)  
3046 - # else:  
3047 - print json.dumps(json_results, **json_options) 3051 + # print last json entry (a last one without a comma) and closing ]
  3052 + print_json(type='MetaInformation', return_code=return_code,
  3053 + is_last=True)
3048 3054
3049 except Exception as exc: 3055 except Exception as exc:
3050 # some unexpected error, maybe some of the types caught in except clauses 3056 # some unexpected error, maybe some of the types caught in except clauses