Commit 6e5aa20c607825bd9dfed9e5de6e4be3adc74ace
1 parent
2813b67d
created json2ascii in olevba.py to encode non-ascii chars before json-export
Showing
1 changed file
with
27 additions
and
4 deletions
oletools/olevba.py
| @@ -162,6 +162,7 @@ https://github.com/unixfreak0037/officeparser | @@ -162,6 +162,7 @@ https://github.com/unixfreak0037/officeparser | ||
| 162 | # - fixed issue #32 by monkeypatching email.feedparser | 162 | # - fixed issue #32 by monkeypatching email.feedparser |
| 163 | # 2016-02-07 PL: - KeyboardInterrupt is now raised properly | 163 | # 2016-02-07 PL: - KeyboardInterrupt is now raised properly |
| 164 | # 2016-02-26 CH: - Add json output | 164 | # 2016-02-26 CH: - Add json output |
| 165 | +# 2016-03-04 CH: - convert json to ascii in own function | ||
| 165 | 166 | ||
| 166 | __version__ = '0.42' | 167 | __version__ = '0.42' |
| 167 | 168 | ||
| @@ -1592,6 +1593,28 @@ def detect_vba_strings(vba_code): | @@ -1592,6 +1593,28 @@ def detect_vba_strings(vba_code): | ||
| 1592 | return results | 1593 | return results |
| 1593 | 1594 | ||
| 1594 | 1595 | ||
| 1596 | +def json2ascii(json_obj, encoding='utf8', errors='replace'): | ||
| 1597 | + if json_obj is None: | ||
| 1598 | + pass | ||
| 1599 | + elif isinstance(json_obj, (str, bool, int, float)): | ||
| 1600 | + pass | ||
| 1601 | + elif isinstance(json_obj, unicode): | ||
| 1602 | + logging.debug('json2ascii: replaced: {0}'.format(json_obj.encode(encoding, errors))) | ||
| 1603 | + # cannot put original into logger | ||
| 1604 | + # print 'original: ' json_obj | ||
| 1605 | + return json_obj.encode(encoding, errors) | ||
| 1606 | + elif isinstance(json_obj, dict): | ||
| 1607 | + for key in json_obj: | ||
| 1608 | + json_obj[key] = json2ascii(json_obj[key]) | ||
| 1609 | + elif isinstance(json_obj, (list,tuple)): | ||
| 1610 | + for item in json_obj: | ||
| 1611 | + item = json2ascii(item) | ||
| 1612 | + else: | ||
| 1613 | + logging.debug('unexpected type in json2ascii: {0} -- leave as is' | ||
| 1614 | + .format(type(json_obj))) | ||
| 1615 | + return json_obj | ||
| 1616 | + | ||
| 1617 | + | ||
| 1595 | class VBA_Scanner(object): | 1618 | class VBA_Scanner(object): |
| 1596 | """ | 1619 | """ |
| 1597 | Class to scan the source code of a VBA module to find obfuscated strings, | 1620 | Class to scan the source code of a VBA module to find obfuscated strings, |
| @@ -2732,10 +2755,10 @@ def main(): | @@ -2732,10 +2755,10 @@ def main(): | ||
| 2732 | 2755 | ||
| 2733 | if options.output_mode == 'json': | 2756 | if options.output_mode == 'json': |
| 2734 | json_options = dict(check_circular=False, indent=4, ensure_ascii=False) | 2757 | json_options = dict(check_circular=False, indent=4, ensure_ascii=False) |
| 2735 | - # from python json doc for ensure_ascii=False: "unless [target for json | ||
| 2736 | - # output] explicitly understands unicode (as in codecs.getwriter()) | ||
| 2737 | - # this is likely to cause an error." | ||
| 2738 | - # If option --decode is given, data is likely to contain non-ascii data | 2758 | + |
| 2759 | + # json.dump[s] cannot deal with unicode objects that are not properly | ||
| 2760 | + # encoded --> encode in own function: | ||
| 2761 | + json_results = json2ascii(json_results) | ||
| 2739 | 2762 | ||
| 2740 | if False: # options.outfile: # (option currently commented out) | 2763 | if False: # options.outfile: # (option currently commented out) |
| 2741 | with open(outfile, 'w') as write_handle: | 2764 | with open(outfile, 'w') as write_handle: |