From 1ab73c64b6ee898bbf97b51931c541072ef715bc Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Fri, 4 Mar 2016 15:10:23 +0100 Subject: [PATCH] ensure loading of dumped json output will cause no encoding trouble by de- and re-encoding of strings --- oletools/olevba.py | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/oletools/olevba.py b/oletools/olevba.py index 4a9fcc9..b11f72b 100755 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -1594,12 +1594,26 @@ def detect_vba_strings(vba_code): def json2ascii(json_obj, encoding='utf8', errors='replace'): + """ ensure there is no unicode in json and all strings are safe to decode + + works recursively, decodes and re-encodes every string to/from unicode + to ensure there will be no trouble in loading the dumped json output + """ if json_obj is None: pass - elif isinstance(json_obj, (str, bool, int, float)): + elif isinstance(json_obj, (bool, int, float)): pass + elif isinstance(json_obj, str): + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) + if dencoded != str: + logging.info('json2ascii: replaced: {0} (len {1})' + .format(json_obj, len(json_obj))) + logging.info('json2ascii: with: {0} (len {1})' + .format(dencoded, len(dencoded))) + return dencoded elif isinstance(json_obj, unicode): - logging.debug('json2ascii: replaced: {0}'.format(json_obj.encode(encoding, errors))) + logging.info('json2ascii: replaced: {0}' + .format(json_obj.encode(encoding, errors))) # cannot put original into logger # print 'original: ' json_obj return json_obj.encode(encoding, errors) @@ -2759,7 +2773,8 @@ def main(): # json.dump[s] cannot deal with unicode objects that are not properly # encoded --> encode in own function: json_results = json2ascii(json_results) - + #print_json(json_results) + if False: # options.outfile: # (option currently commented out) with open(outfile, 'w') as write_handle: json.dump(write_handle, **json_options) @@ -2767,6 +2782,23 @@ def main(): print json.dumps(json_results, **json_options) +def print_json(j): + if isinstance(j, dict): + for key, val in j.items(): + print_json(key) + print_json(val) + elif isinstance(j, list): + for elem in j: + print_json(elem) + else: + try: + if len(j) > 20: + print type(j), repr(j[:20]), '...(len {0})'.format(len(j)) + else: + print type(j), repr(j) + except TypeError: + print type(j), repr(j) + if __name__ == '__main__': main() -- libgit2 0.21.4