Commit 238d0172ad7f52243c3f6c68cef52d71454c0b9f

Authored by decalage2
1 parent 35d65e6b

olevba: fixed parsing of VBA text files on Python 3 (issue #106)

Showing 1 changed file with 10 additions and 4 deletions
oletools/olevba.py
@@ -2021,9 +2021,13 @@ def vba_collapse_long_lines(vba_code): @@ -2021,9 +2021,13 @@ def vba_collapse_long_lines(vba_code):
2021 :return: str, VBA module code with long lines collapsed 2021 :return: str, VBA module code with long lines collapsed
2022 """ 2022 """
2023 # TODO: use a regex instead, to allow whitespaces after the underscore? 2023 # TODO: use a regex instead, to allow whitespaces after the underscore?
2024 - vba_code = vba_code.replace(' _\r\n', ' ')  
2025 - vba_code = vba_code.replace(' _\r', ' ')  
2026 - vba_code = vba_code.replace(' _\n', ' ') 2024 + try:
  2025 + vba_code = vba_code.replace(' _\r\n', ' ')
  2026 + vba_code = vba_code.replace(' _\r', ' ')
  2027 + vba_code = vba_code.replace(' _\n', ' ')
  2028 + except:
  2029 + log.exception('type(vba_code)=%s' % type(vba_code))
  2030 + raise
2027 return vba_code 2031 return vba_code
2028 2032
2029 2033
@@ -2890,7 +2894,9 @@ class VBA_Parser(object): @@ -2890,7 +2894,9 @@ class VBA_Parser(object):
2890 """ 2894 """
2891 log.info('Opening text file %s' % self.filename) 2895 log.info('Opening text file %s' % self.filename)
2892 # directly store the source code: 2896 # directly store the source code:
2893 - self.vba_code_all_modules = data 2897 + # On Python 2, store it as a raw bytes string
  2898 + # On Python 3, convert it to unicode assuming it was encoded with UTF-8
  2899 + self.vba_code_all_modules = bytes2str(data)
2894 self.contains_macros = True 2900 self.contains_macros = True
2895 # set type only if parsing succeeds 2901 # set type only if parsing succeeds
2896 self.type = TYPE_TEXT 2902 self.type = TYPE_TEXT