Commit fc17c53d6a64cff66106af09393e1fd98d6c06e2

Authored by decalage2
1 parent 3f716009

olevba: VBA_Parser.extract_form_strings now returns native str both on Python 2 and 3 (issue #106)

Showing 1 changed file with 9 additions and 3 deletions
oletools/olevba.py
... ... @@ -3271,11 +3271,12 @@ class VBA_Parser(object):
3271 3271 """
3272 3272 Extract printable strings from each VBA Form found in the file
3273 3273  
3274   - Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found
  3274 + Iterator: yields (filename, stream_path, form_string) for each printable string found in forms
3275 3275 If the file is OLE, filename is the path of the file.
3276 3276 If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
3277 3277 within the zip archive, e.g. word/vbaProject.bin.
3278 3278 If the file is PPT, result is as for OpenXML but filename is useless
  3279 + Note: form_string is a raw bytes string on Python 2, a unicode str on Python 3
3279 3280 """
3280 3281 if self.ole_file is None:
3281 3282 # This may be either an OpenXML/PPT or a text file:
... ... @@ -3298,8 +3299,13 @@ class VBA_Parser(object):
3298 3299 # Extract printable strings from the form object stream "o":
3299 3300 for m in re_printable_string.finditer(form_data):
3300 3301 log.debug('Printable string found in form: %r' % m.group())
3301   - if m.group() != b'Tahoma':
3302   - yield (self.filename, '/'.join(o_stream), m.group())
  3302 + # On Python 3, convert bytes string to unicode str:
  3303 + if PYTHON2:
  3304 + found_str = m.group()
  3305 + else:
  3306 + found_str = m.group().decode('utf8', errors='replace')
  3307 + if found_str != 'Tahoma':
  3308 + yield (self.filename, '/'.join(o_stream), found_str)
3303 3309  
3304 3310 def extract_form_strings_extended(self):
3305 3311 if self.ole_file is None:
... ...