Commit 1ab73c64b6ee898bbf97b51931c541072ef715bc
1 parent
6e5aa20c
ensure loading of dumped json output will cause no encoding trouble by de- and r…
…e-encoding of strings
Showing
1 changed file
with
35 additions
and
3 deletions
oletools/olevba.py
| ... | ... | @@ -1594,12 +1594,26 @@ def detect_vba_strings(vba_code): |
| 1594 | 1594 | |
| 1595 | 1595 | |
| 1596 | 1596 | def json2ascii(json_obj, encoding='utf8', errors='replace'): |
| 1597 | + """ ensure there is no unicode in json and all strings are safe to decode | |
| 1598 | + | |
| 1599 | + works recursively, decodes and re-encodes every string to/from unicode | |
| 1600 | + to ensure there will be no trouble in loading the dumped json output | |
| 1601 | + """ | |
| 1597 | 1602 | if json_obj is None: |
| 1598 | 1603 | pass |
| 1599 | - elif isinstance(json_obj, (str, bool, int, float)): | |
| 1604 | + elif isinstance(json_obj, (bool, int, float)): | |
| 1600 | 1605 | pass |
| 1606 | + elif isinstance(json_obj, str): | |
| 1607 | + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) | |
| 1608 | + if dencoded != str: | |
| 1609 | + logging.info('json2ascii: replaced: {0} (len {1})' | |
| 1610 | + .format(json_obj, len(json_obj))) | |
| 1611 | + logging.info('json2ascii: with: {0} (len {1})' | |
| 1612 | + .format(dencoded, len(dencoded))) | |
| 1613 | + return dencoded | |
| 1601 | 1614 | elif isinstance(json_obj, unicode): |
| 1602 | - logging.debug('json2ascii: replaced: {0}'.format(json_obj.encode(encoding, errors))) | |
| 1615 | + logging.info('json2ascii: replaced: {0}' | |
| 1616 | + .format(json_obj.encode(encoding, errors))) | |
| 1603 | 1617 | # cannot put original into logger |
| 1604 | 1618 | # print 'original: ' json_obj |
| 1605 | 1619 | return json_obj.encode(encoding, errors) |
| ... | ... | @@ -2759,7 +2773,8 @@ def main(): |
| 2759 | 2773 | # json.dump[s] cannot deal with unicode objects that are not properly |
| 2760 | 2774 | # encoded --> encode in own function: |
| 2761 | 2775 | json_results = json2ascii(json_results) |
| 2762 | - | |
| 2776 | + #print_json(json_results) | |
| 2777 | + | |
| 2763 | 2778 | if False: # options.outfile: # (option currently commented out) |
| 2764 | 2779 | with open(outfile, 'w') as write_handle: |
| 2765 | 2780 | json.dump(write_handle, **json_options) |
| ... | ... | @@ -2767,6 +2782,23 @@ def main(): |
| 2767 | 2782 | print json.dumps(json_results, **json_options) |
| 2768 | 2783 | |
| 2769 | 2784 | |
| 2785 | +def print_json(j): | |
| 2786 | + if isinstance(j, dict): | |
| 2787 | + for key, val in j.items(): | |
| 2788 | + print_json(key) | |
| 2789 | + print_json(val) | |
| 2790 | + elif isinstance(j, list): | |
| 2791 | + for elem in j: | |
| 2792 | + print_json(elem) | |
| 2793 | + else: | |
| 2794 | + try: | |
| 2795 | + if len(j) > 20: | |
| 2796 | + print type(j), repr(j[:20]), '...(len {0})'.format(len(j)) | |
| 2797 | + else: | |
| 2798 | + print type(j), repr(j) | |
| 2799 | + except TypeError: | |
| 2800 | + print type(j), repr(j) | |
| 2801 | + | |
| 2770 | 2802 | if __name__ == '__main__': |
| 2771 | 2803 | main() |
| 2772 | 2804 | ... | ... |