Commit 1ab73c64b6ee898bbf97b51931c541072ef715bc

Authored by Christian Herdtweck
1 parent 6e5aa20c

ensure loading of dumped json output will cause no encoding trouble by de- and r…

…e-encoding of strings
Showing 1 changed file with 35 additions and 3 deletions
oletools/olevba.py
@@ -1594,12 +1594,26 @@ def detect_vba_strings(vba_code): @@ -1594,12 +1594,26 @@ def detect_vba_strings(vba_code):
1594 1594
1595 1595
1596 def json2ascii(json_obj, encoding='utf8', errors='replace'): 1596 def json2ascii(json_obj, encoding='utf8', errors='replace'):
  1597 + """ ensure there is no unicode in json and all strings are safe to decode
  1598 +
  1599 + works recursively, decodes and re-encodes every string to/from unicode
  1600 + to ensure there will be no trouble in loading the dumped json output
  1601 + """
1597 if json_obj is None: 1602 if json_obj is None:
1598 pass 1603 pass
1599 - elif isinstance(json_obj, (str, bool, int, float)): 1604 + elif isinstance(json_obj, (bool, int, float)):
1600 pass 1605 pass
  1606 + elif isinstance(json_obj, str):
  1607 + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors)
  1608 + if dencoded != str:
  1609 + logging.info('json2ascii: replaced: {0} (len {1})'
  1610 + .format(json_obj, len(json_obj)))
  1611 + logging.info('json2ascii: with: {0} (len {1})'
  1612 + .format(dencoded, len(dencoded)))
  1613 + return dencoded
1601 elif isinstance(json_obj, unicode): 1614 elif isinstance(json_obj, unicode):
1602 - logging.debug('json2ascii: replaced: {0}'.format(json_obj.encode(encoding, errors))) 1615 + logging.info('json2ascii: replaced: {0}'
  1616 + .format(json_obj.encode(encoding, errors)))
1603 # cannot put original into logger 1617 # cannot put original into logger
1604 # print 'original: ' json_obj 1618 # print 'original: ' json_obj
1605 return json_obj.encode(encoding, errors) 1619 return json_obj.encode(encoding, errors)
@@ -2759,7 +2773,8 @@ def main(): @@ -2759,7 +2773,8 @@ def main():
2759 # json.dump[s] cannot deal with unicode objects that are not properly 2773 # json.dump[s] cannot deal with unicode objects that are not properly
2760 # encoded --> encode in own function: 2774 # encoded --> encode in own function:
2761 json_results = json2ascii(json_results) 2775 json_results = json2ascii(json_results)
2762 - 2776 + #print_json(json_results)
  2777 +
2763 if False: # options.outfile: # (option currently commented out) 2778 if False: # options.outfile: # (option currently commented out)
2764 with open(outfile, 'w') as write_handle: 2779 with open(outfile, 'w') as write_handle:
2765 json.dump(write_handle, **json_options) 2780 json.dump(write_handle, **json_options)
@@ -2767,6 +2782,23 @@ def main(): @@ -2767,6 +2782,23 @@ def main():
2767 print json.dumps(json_results, **json_options) 2782 print json.dumps(json_results, **json_options)
2768 2783
2769 2784
  2785 +def print_json(j):
  2786 + if isinstance(j, dict):
  2787 + for key, val in j.items():
  2788 + print_json(key)
  2789 + print_json(val)
  2790 + elif isinstance(j, list):
  2791 + for elem in j:
  2792 + print_json(elem)
  2793 + else:
  2794 + try:
  2795 + if len(j) > 20:
  2796 + print type(j), repr(j[:20]), '...(len {0})'.format(len(j))
  2797 + else:
  2798 + print type(j), repr(j)
  2799 + except TypeError:
  2800 + print type(j), repr(j)
  2801 +
2770 if __name__ == '__main__': 2802 if __name__ == '__main__':
2771 main() 2803 main()
2772 2804