Commit 4c1151eac83b4c71633f55cdd765c2821b191eb9

Authored by Christian Herdtweck
1 parent 82503a9e

iterative json printing: print json data for each file;

re-use old debug-function print_json for proper purpose now

copied from commits 99d5d56d65ee624022f8f9f5577f680f9f386660 and
ccb7870314db6fbf1b9104606888ba67ab535a32 from different branch
and even improved on them (no dict() necessary in print_json args
Showing 1 changed file with 54 additions and 48 deletions
oletools/olevba.py
... ... @@ -951,29 +951,6 @@ def is_printable(s):
951 951 return set(s).issubset(_PRINTABLE_SET)
952 952  
953 953  
954   -def print_json(j):
955   - """
956   - Print a dictionary, a list or any other object to stdout
957   - :param j: object to be printed
958   - :return:
959   - """
960   - if isinstance(j, dict):
961   - for key, val in j.items():
962   - print_json(key)
963   - print_json(val)
964   - elif isinstance(j, list):
965   - for elem in j:
966   - print_json(elem)
967   - else:
968   - try:
969   - if len(j) > 20:
970   - print type(j), repr(j[:20]), '...(len {0})'.format(len(j))
971   - else:
972   - print type(j), repr(j)
973   - except TypeError:
974   - print type(j), repr(j)
975   -
976   -
977 954 def copytoken_help(decompressed_current, decompressed_chunk_start):
978 955 """
979 956 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help
... ... @@ -1806,6 +1783,45 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'):
1806 1783 return json_obj
1807 1784  
1808 1785  
  1786 +_have_printed_json_start = False
  1787 +
  1788 +def print_json(json_dict=None, is_last=False, **json_parts):
  1789 + """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1
  1790 +
  1791 + can use in two ways:
  1792 + (1) print_json(some_dict)
  1793 + (2) print_json(key1=value1, key2=value2, ...)
  1794 +
  1795 + :param bool is_last: set to True only for very last entry to complete
  1796 + the top-level json-list
  1797 + """
  1798 + global _have_printed_json_start
  1799 +
  1800 + if json_dict and json_parts:
  1801 + raise ValueError('Invalid json argument: want either single dict or '
  1802 + 'key=value parts but got both)')
  1803 + elif (json_dict is not None) and (not isinstance(json_dict, dict)):
  1804 + raise ValueError('Invalid json argument: want either single dict or '
  1805 + 'key=value parts but got {} instead of dict)'
  1806 + .format(type(json_dict)))
  1807 + if json_parts:
  1808 + json_dict = json_parts
  1809 +
  1810 + if not _have_printed_json_start:
  1811 + print '['
  1812 + _have_printed_json_start = True
  1813 +
  1814 + lines = json.dumps(json2ascii(json_dict), check_circular=False,
  1815 + indent=4, ensure_ascii=False).splitlines()
  1816 + for line in lines[:-1]:
  1817 + print ' {}'.format(line)
  1818 + if is_last:
  1819 + print ' {}'.format(lines[-1]) # print last line without comma
  1820 + print ']'
  1821 + else:
  1822 + print ' {},'.format(lines[-1]) # print last line with comma
  1823 +
  1824 +
1809 1825 class VBA_Scanner(object):
1810 1826 """
1811 1827 Class to scan the source code of a VBA module to find obfuscated strings,
... ... @@ -2903,9 +2919,10 @@ def main():
2903 2919  
2904 2920 # provide info about tool and its version
2905 2921 if options.output_mode == 'json':
2906   - json_results = [dict(script_name='olevba', version=__version__,
2907   - url='http://decalage.info/python/oletools',
2908   - type='MetaInformation'), ]
  2922 + # prints opening [
  2923 + print_json(script_name='olevba', version=__version__,
  2924 + url='http://decalage.info/python/oletools',
  2925 + type='MetaInformation')
2909 2926 else:
2910 2927 print 'olevba %s - http://decalage.info/python/oletools' % __version__
2911 2928  
... ... @@ -2957,9 +2974,8 @@ def main():
2957 2974 return_code = RETURN_XGLOB_ERR if return_code == 0 \
2958 2975 else RETURN_SEVERAL_ERRS
2959 2976 if options.output_mode == 'json':
2960   - json_results.append(dict(file=filename, type='error',
2961   - error=type(data).__name__,
2962   - message=str(data)))
  2977 + print_json(file=filename, type='error',
  2978 + error=type(data).__name__, message=str(data))
2963 2979 continue
2964 2980  
2965 2981 try:
... ... @@ -2983,7 +2999,7 @@ def main():
2983 2999 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
2984 3000 deobfuscate=options.deobfuscate)
2985 3001 elif options.output_mode == 'json':
2986   - json_results.append(
  3002 + print_json(
2987 3003 vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
2988 3004 display_code=options.display_code,
2989 3005 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
... ... @@ -2998,9 +3014,8 @@ def main():
2998 3014 else:
2999 3015 log.exception('Failed to open %s -- probably not supported!' % filename)
3000 3016 if options.output_mode == 'json':
3001   - json_results.append(dict(file=filename, type='error',
3002   - error=type(exc).__name__,
3003   - message=str(exc)))
  3017 + print_json(file=filename, type='error',
  3018 + error=type(exc).__name__, message=str(exc))
3004 3019 return_code = RETURN_OPEN_ERROR if return_code == 0 \
3005 3020 else RETURN_SEVERAL_ERRS
3006 3021 except ProcessingError as exc:
... ... @@ -3010,9 +3025,9 @@ def main():
3010 3025 log.exception('Error processing file %s (%s)!'
3011 3026 % (filename, exc.orig_exception))
3012 3027 if options.output_mode == 'json':
3013   - json_results.append(dict(file=filename, type='error',
3014   - error=type(exc).__name__,
3015   - message=str(exc.orig_exception)))
  3028 + print_json(file=filename, type='error',
  3029 + error=type(exc).__name__,
  3030 + message=str(exc.orig_exception))
3016 3031 return_code = RETURN_PARSE_ERROR if return_code == 0 \
3017 3032 else RETURN_SEVERAL_ERRS
3018 3033 finally:
... ... @@ -3033,18 +3048,9 @@ def main():
3033 3048 deobfuscate=options.deobfuscate)
3034 3049  
3035 3050 if options.output_mode == 'json':
3036   - json_options = dict(check_circular=False, indent=4, ensure_ascii=False)
3037   -
3038   - # json.dump[s] cannot deal with unicode objects that are not properly
3039   - # encoded --> encode in own function:
3040   - json_results = json2ascii(json_results)
3041   - #print_json(json_results)
3042   -
3043   - # if False: # options.outfile: # (option currently commented out)
3044   - # with open(outfile, 'w') as write_handle:
3045   - # json.dump(write_handle, **json_options)
3046   - # else:
3047   - print json.dumps(json_results, **json_options)
  3051 + # print last json entry (a last one without a comma) and closing ]
  3052 + print_json(type='MetaInformation', return_code=return_code,
  3053 + is_last=True)
3048 3054  
3049 3055 except Exception as exc:
3050 3056 # some unexpected error, maybe some of the types caught in except clauses
... ...