Commit fc17c53d6a64cff66106af09393e1fd98d6c06e2

Authored by decalage2
1 parent 3f716009

olevba: VBA_Parser.extract_form_strings now returns native str both on Python 2 and 3 (issue #106)

Showing 1 changed file with 9 additions and 3 deletions
oletools/olevba.py
@@ -3271,11 +3271,12 @@ class VBA_Parser(object): @@ -3271,11 +3271,12 @@ class VBA_Parser(object):
3271 """ 3271 """
3272 Extract printable strings from each VBA Form found in the file 3272 Extract printable strings from each VBA Form found in the file
3273 3273
3274 - Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found 3274 + Iterator: yields (filename, stream_path, form_string) for each printable string found in forms
3275 If the file is OLE, filename is the path of the file. 3275 If the file is OLE, filename is the path of the file.
3276 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros 3276 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
3277 within the zip archive, e.g. word/vbaProject.bin. 3277 within the zip archive, e.g. word/vbaProject.bin.
3278 If the file is PPT, result is as for OpenXML but filename is useless 3278 If the file is PPT, result is as for OpenXML but filename is useless
  3279 + Note: form_string is a raw bytes string on Python 2, a unicode str on Python 3
3279 """ 3280 """
3280 if self.ole_file is None: 3281 if self.ole_file is None:
3281 # This may be either an OpenXML/PPT or a text file: 3282 # This may be either an OpenXML/PPT or a text file:
@@ -3298,8 +3299,13 @@ class VBA_Parser(object): @@ -3298,8 +3299,13 @@ class VBA_Parser(object):
3298 # Extract printable strings from the form object stream "o": 3299 # Extract printable strings from the form object stream "o":
3299 for m in re_printable_string.finditer(form_data): 3300 for m in re_printable_string.finditer(form_data):
3300 log.debug('Printable string found in form: %r' % m.group()) 3301 log.debug('Printable string found in form: %r' % m.group())
3301 - if m.group() != b'Tahoma':  
3302 - yield (self.filename, '/'.join(o_stream), m.group()) 3302 + # On Python 3, convert bytes string to unicode str:
  3303 + if PYTHON2:
  3304 + found_str = m.group()
  3305 + else:
  3306 + found_str = m.group().decode('utf8', errors='replace')
  3307 + if found_str != 'Tahoma':
  3308 + yield (self.filename, '/'.join(o_stream), found_str)
3303 3309
3304 def extract_form_strings_extended(self): 3310 def extract_form_strings_extended(self):
3305 if self.ole_file is None: 3311 if self.ole_file is None: