Commit 1ab73c64b6ee898bbf97b51931c541072ef715bc
1 parent
6e5aa20c
ensure loading of dumped json output will cause no encoding trouble by de- and r…
…e-encoding of strings
Showing
1 changed file
with
35 additions
and
3 deletions
oletools/olevba.py
| @@ -1594,12 +1594,26 @@ def detect_vba_strings(vba_code): | @@ -1594,12 +1594,26 @@ def detect_vba_strings(vba_code): | ||
| 1594 | 1594 | ||
| 1595 | 1595 | ||
| 1596 | def json2ascii(json_obj, encoding='utf8', errors='replace'): | 1596 | def json2ascii(json_obj, encoding='utf8', errors='replace'): |
| 1597 | + """ ensure there is no unicode in json and all strings are safe to decode | ||
| 1598 | + | ||
| 1599 | + works recursively, decodes and re-encodes every string to/from unicode | ||
| 1600 | + to ensure there will be no trouble in loading the dumped json output | ||
| 1601 | + """ | ||
| 1597 | if json_obj is None: | 1602 | if json_obj is None: |
| 1598 | pass | 1603 | pass |
| 1599 | - elif isinstance(json_obj, (str, bool, int, float)): | 1604 | + elif isinstance(json_obj, (bool, int, float)): |
| 1600 | pass | 1605 | pass |
| 1606 | + elif isinstance(json_obj, str): | ||
| 1607 | + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) | ||
| 1608 | + if dencoded != str: | ||
| 1609 | + logging.info('json2ascii: replaced: {0} (len {1})' | ||
| 1610 | + .format(json_obj, len(json_obj))) | ||
| 1611 | + logging.info('json2ascii: with: {0} (len {1})' | ||
| 1612 | + .format(dencoded, len(dencoded))) | ||
| 1613 | + return dencoded | ||
| 1601 | elif isinstance(json_obj, unicode): | 1614 | elif isinstance(json_obj, unicode): |
| 1602 | - logging.debug('json2ascii: replaced: {0}'.format(json_obj.encode(encoding, errors))) | 1615 | + logging.info('json2ascii: replaced: {0}' |
| 1616 | + .format(json_obj.encode(encoding, errors))) | ||
| 1603 | # cannot put original into logger | 1617 | # cannot put original into logger |
| 1604 | # print 'original: ' json_obj | 1618 | # print 'original: ' json_obj |
| 1605 | return json_obj.encode(encoding, errors) | 1619 | return json_obj.encode(encoding, errors) |
| @@ -2759,7 +2773,8 @@ def main(): | @@ -2759,7 +2773,8 @@ def main(): | ||
| 2759 | # json.dump[s] cannot deal with unicode objects that are not properly | 2773 | # json.dump[s] cannot deal with unicode objects that are not properly |
| 2760 | # encoded --> encode in own function: | 2774 | # encoded --> encode in own function: |
| 2761 | json_results = json2ascii(json_results) | 2775 | json_results = json2ascii(json_results) |
| 2762 | - | 2776 | + #print_json(json_results) |
| 2777 | + | ||
| 2763 | if False: # options.outfile: # (option currently commented out) | 2778 | if False: # options.outfile: # (option currently commented out) |
| 2764 | with open(outfile, 'w') as write_handle: | 2779 | with open(outfile, 'w') as write_handle: |
| 2765 | json.dump(write_handle, **json_options) | 2780 | json.dump(write_handle, **json_options) |
| @@ -2767,6 +2782,23 @@ def main(): | @@ -2767,6 +2782,23 @@ def main(): | ||
| 2767 | print json.dumps(json_results, **json_options) | 2782 | print json.dumps(json_results, **json_options) |
| 2768 | 2783 | ||
| 2769 | 2784 | ||
| 2785 | +def print_json(j): | ||
| 2786 | + if isinstance(j, dict): | ||
| 2787 | + for key, val in j.items(): | ||
| 2788 | + print_json(key) | ||
| 2789 | + print_json(val) | ||
| 2790 | + elif isinstance(j, list): | ||
| 2791 | + for elem in j: | ||
| 2792 | + print_json(elem) | ||
| 2793 | + else: | ||
| 2794 | + try: | ||
| 2795 | + if len(j) > 20: | ||
| 2796 | + print type(j), repr(j[:20]), '...(len {0})'.format(len(j)) | ||
| 2797 | + else: | ||
| 2798 | + print type(j), repr(j) | ||
| 2799 | + except TypeError: | ||
| 2800 | + print type(j), repr(j) | ||
| 2801 | + | ||
| 2770 | if __name__ == '__main__': | 2802 | if __name__ == '__main__': |
| 2771 | main() | 2803 | main() |
| 2772 | 2804 |