Commit 7d06413c02f30b4781025870d71127de68c0bce0

Authored by decalage2
1 parent 5ec9c149

olevba: fixed json2ascii to support Python 2+3 (issue #106)

Showing 1 changed file with 27 additions and 14 deletions
oletools/olevba.py
@@ -2251,7 +2251,8 @@ def detect_vba_strings(vba_code): @@ -2251,7 +2251,8 @@ def detect_vba_strings(vba_code):
2251 2251
2252 2252
2253 def json2ascii(json_obj, encoding='utf8', errors='replace'): 2253 def json2ascii(json_obj, encoding='utf8', errors='replace'):
2254 - """ ensure there is no unicode in json and all strings are safe to decode 2254 + """
  2255 + ensure there is no unicode in json and all strings are safe to decode
2255 2256
2256 works recursively, decodes and re-encodes every string to/from unicode 2257 works recursively, decodes and re-encodes every string to/from unicode
2257 to ensure there will be no trouble in loading the dumped json output 2258 to ensure there will be no trouble in loading the dumped json output
@@ -2260,21 +2261,33 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): @@ -2260,21 +2261,33 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'):
2260 pass 2261 pass
2261 elif isinstance(json_obj, (bool, int, float)): 2262 elif isinstance(json_obj, (bool, int, float)):
2262 pass 2263 pass
2263 - elif isinstance(json_obj, bytes):  
2264 - # de-code and re-encode  
2265 - dencoded = json_obj.decode(encoding, errors).encode(encoding, errors)  
2266 - if dencoded != json_obj:  
2267 - log.debug('json2ascii: replaced: {0} (len {1})'  
2268 - .format(json_obj, len(json_obj)))  
2269 - log.debug('json2ascii: with: {0} (len {1})'  
2270 - .format(dencoded, len(dencoded)))  
2271 - return dencoded  
2272 - elif isinstance(json_obj, unicode):  
2273 - log.debug('json2ascii: encode unicode: {0}'  
2274 - .format(json_obj.encode(encoding, errors))) 2264 + elif isinstance(json_obj, str):
  2265 + if PYTHON2:
  2266 + # de-code and re-encode
  2267 + dencoded = json_obj.decode(encoding, errors).encode(encoding, errors)
  2268 + if dencoded != json_obj:
  2269 + log.debug('json2ascii: replaced: {0} (len {1})'
  2270 + .format(json_obj, len(json_obj)))
  2271 + log.debug('json2ascii: with: {0} (len {1})'
  2272 + .format(dencoded, len(dencoded)))
  2273 + return dencoded
  2274 + else:
  2275 + # on Python 3, just keep Unicode strings as-is:
  2276 + return json_obj
  2277 + elif isinstance(json_obj, unicode) and PYTHON2:
  2278 + # On Python 2, encode unicode to bytes:
  2279 + json_obj_bytes = json_obj.encode(encoding, errors)
  2280 + log.debug('json2ascii: encode unicode: {0}'.format(json_obj_bytes))
  2281 + # cannot put original into logger
  2282 + # print 'original: ' json_obj
  2283 + return json_obj_bytes
  2284 + elif isinstance(json_obj, bytes) and not PYTHON2:
  2285 + # On Python 3, decode bytes to unicode str
  2286 + json_obj_str = json_obj.decode(encoding, errors)
  2287 + log.debug('json2ascii: encode unicode: {0}'.format(json_obj_str))
2275 # cannot put original into logger 2288 # cannot put original into logger
2276 # print 'original: ' json_obj 2289 # print 'original: ' json_obj
2277 - return json_obj.encode(encoding, errors) 2290 + return json_obj_str
2278 elif isinstance(json_obj, dict): 2291 elif isinstance(json_obj, dict):
2279 for key in json_obj: 2292 for key in json_obj:
2280 json_obj[key] = json2ascii(json_obj[key]) 2293 json_obj[key] = json2ascii(json_obj[key])