Commit 6e5aa20c607825bd9dfed9e5de6e4be3adc74ace

Authored by Christian Herdtweck
1 parent 2813b67d

created json2ascii in olevba.py to encode non-ascii chars before json-export

Showing 1 changed file with 27 additions and 4 deletions
oletools/olevba.py
... ... @@ -162,6 +162,7 @@ https://github.com/unixfreak0037/officeparser
162 162 # - fixed issue #32 by monkeypatching email.feedparser
163 163 # 2016-02-07 PL: - KeyboardInterrupt is now raised properly
164 164 # 2016-02-26 CH: - Add json output
  165 +# 2016-03-04 CH: - convert json to ascii in own function
165 166  
166 167 __version__ = '0.42'
167 168  
... ... @@ -1592,6 +1593,28 @@ def detect_vba_strings(vba_code):
1592 1593 return results
1593 1594  
1594 1595  
  1596 +def json2ascii(json_obj, encoding='utf8', errors='replace'):
  1597 + if json_obj is None:
  1598 + pass
  1599 + elif isinstance(json_obj, (str, bool, int, float)):
  1600 + pass
  1601 + elif isinstance(json_obj, unicode):
  1602 + logging.debug('json2ascii: replaced: {0}'.format(json_obj.encode(encoding, errors)))
  1603 + # cannot put original into logger
  1604 + # print 'original: ' json_obj
  1605 + return json_obj.encode(encoding, errors)
  1606 + elif isinstance(json_obj, dict):
  1607 + for key in json_obj:
  1608 + json_obj[key] = json2ascii(json_obj[key])
  1609 + elif isinstance(json_obj, (list,tuple)):
  1610 + for item in json_obj:
  1611 + item = json2ascii(item)
  1612 + else:
  1613 + logging.debug('unexpected type in json2ascii: {0} -- leave as is'
  1614 + .format(type(json_obj)))
  1615 + return json_obj
  1616 +
  1617 +
1595 1618 class VBA_Scanner(object):
1596 1619 """
1597 1620 Class to scan the source code of a VBA module to find obfuscated strings,
... ... @@ -2732,10 +2755,10 @@ def main():
2732 2755  
2733 2756 if options.output_mode == 'json':
2734 2757 json_options = dict(check_circular=False, indent=4, ensure_ascii=False)
2735   - # from python json doc for ensure_ascii=False: "unless [target for json
2736   - # output] explicitly understands unicode (as in codecs.getwriter())
2737   - # this is likely to cause an error."
2738   - # If option --decode is given, data is likely to contain non-ascii data
  2758 +
  2759 + # json.dump[s] cannot deal with unicode objects that are not properly
  2760 + # encoded --> encode in own function:
  2761 + json_results = json2ascii(json_results)
2739 2762  
2740 2763 if False: # options.outfile: # (option currently commented out)
2741 2764 with open(outfile, 'w') as write_handle:
... ...