Commit e7e7f97bcf14e00ef2788fe63fef67f6832a5ba6

Authored by decalage2
1 parent be57af2f

olevba: enabled relaxed mode by default (issues #477, #593), fixed detect_vba_ma…

…cros to always return VBA code as unicode on Python 3 (issues  #455, #477, #587, #593)
Showing 2 changed files with 17 additions and 7 deletions
oletools/olevba.py
... ... @@ -228,8 +228,11 @@ from __future__ import print_function
228 228 # 2020-03-22 PL: - uses plugin_biff to display DCONN objects and their URL
229 229 # 2020-06-11 PL: - fixed issue #575 when decompressing raw chunks in VBA
230 230 # 2020-09-03 MX: - fixed issue #602 monkeypatch in email package
  231 +# 2020-09-16 PL: - enabled relaxed mode by default (issues #477, #593)
  232 +# - fixed detect_vba_macros to always return VBA code as
  233 +# unicode on Python 3 (issues #455, #477, #587, #593)
231 234  
232   -__version__ = '0.56dev9'
  235 +__version__ = '0.56dev10'
233 236  
234 237 #------------------------------------------------------------------------------
235 238 # TODO:
... ... @@ -1626,7 +1629,7 @@ class VBA_Project(object):
1626 1629 metadata and VBA modules.
1627 1630 """
1628 1631  
1629   - def __init__(self, ole, vba_root, project_path, dir_path, relaxed=False):
  1632 + def __init__(self, ole, vba_root, project_path, dir_path, relaxed=True):
1630 1633 """
1631 1634 Extract VBA macros from an OleFileIO object.
1632 1635  
... ... @@ -2046,7 +2049,7 @@ class VBA_Project(object):
2046 2049  
2047 2050  
2048 2051  
2049   -def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
  2052 +def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=True):
2050 2053 """
2051 2054 Extract VBA macros from an OleFileIO object.
2052 2055 Internal function, do not call directly.
... ... @@ -3361,9 +3364,16 @@ class VBA_Parser(object):
3361 3364 log.debug('Found VBA compressed code at index %X' % start)
3362 3365 compressed_code = data[start:]
3363 3366 try:
3364   - vba_code = decompress_stream(bytearray(compressed_code))
3365   - # TODO vba_code = self.encode_string(vba_code)
3366   - yield (self.filename, d.name, d.name, vba_code)
  3367 + vba_code_bytes = decompress_stream(bytearray(compressed_code))
  3368 + # vba_code_bytes is in bytes, we need to convert it to str
  3369 + # but here we don't know the encoding of the VBA project
  3370 + # (for example code page 1252 or 1251), because it's in the
  3371 + # VBA_Project class and if we're here it may be because
  3372 + # the VBA project parsing failed (e.g. issue #593).
  3373 + # So let's convert using cp1252 as a guess
  3374 + # TODO get the actual encoding from the VBA_Project
  3375 + vba_code_str = bytes2str(vba_code_bytes, encoding='cp1252')
  3376 + yield (self.filename, d.name, d.name, vba_code_str)
3367 3377 except Exception as exc:
3368 3378 # display the exception with full stack trace for debugging
3369 3379 log.debug('Error processing stream %r in file %r (%s)' % (d.name, self.filename, exc))
... ...
setup.py
... ... @@ -52,7 +52,7 @@ import os, fnmatch
52 52 #--- METADATA -----------------------------------------------------------------
53 53  
54 54 name = "oletools"
55   -version = '0.56dev9'
  55 +version = '0.56dev10'
56 56 desc = "Python tools to analyze security characteristics of MS Office and OLE files (also called Structured Storage, Compound File Binary Format or Compound Document File Format), for Malware Analysis and Incident Response #DFIR"
57 57 long_desc = open('oletools/README.rst').read()
58 58 author = "Philippe Lagadec"
... ...