From fc17c53d6a64cff66106af09393e1fd98d6c06e2 Mon Sep 17 00:00:00 2001 From: decalage2 Date: Sun, 10 Feb 2019 20:35:12 +0100 Subject: [PATCH] olevba: VBA_Parser.extract_form_strings now returns native str both on Python 2 and 3 (issue #106) --- oletools/olevba.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/oletools/olevba.py b/oletools/olevba.py index f549ad4..11c00ae 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -3271,11 +3271,12 @@ class VBA_Parser(object): """ Extract printable strings from each VBA Form found in the file - Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found + Iterator: yields (filename, stream_path, form_string) for each printable string found in forms If the file is OLE, filename is the path of the file. If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros within the zip archive, e.g. word/vbaProject.bin. If the file is PPT, result is as for OpenXML but filename is useless + Note: form_string is a raw bytes string on Python 2, a unicode str on Python 3 """ if self.ole_file is None: # This may be either an OpenXML/PPT or a text file: @@ -3298,8 +3299,13 @@ class VBA_Parser(object): # Extract printable strings from the form object stream "o": for m in re_printable_string.finditer(form_data): log.debug('Printable string found in form: %r' % m.group()) - if m.group() != b'Tahoma': - yield (self.filename, '/'.join(o_stream), m.group()) + # On Python 3, convert bytes string to unicode str: + if PYTHON2: + found_str = m.group() + else: + found_str = m.group().decode('utf8', errors='replace') + if found_str != 'Tahoma': + yield (self.filename, '/'.join(o_stream), found_str) def extract_form_strings_extended(self): if self.ole_file is None: -- libgit2 0.21.4