diff --git a/oletools/olevba.py b/oletools/olevba.py
index 5d1f71a..439e235 100644
--- a/oletools/olevba.py
+++ b/oletools/olevba.py
@@ -7,14 +7,14 @@ olevba is a script to parse OLE and OpenXML files such as MS Office documents
 and analyze malicious macros.
 
 Supported formats:
-- Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
-- Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
-- PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
-- Word/PowerPoint 2007+ XML (aka Flat OPC)
-- Word 2003 XML (.xml)
-- Word/Excel Single File Web Page / MHTML (.mht)
-- Publisher (.pub)
-- raises an error if run with files encrypted using MS Crypto API RC4
+    - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
+    - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
+    - PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm)
+    - Word/PowerPoint 2007+ XML (aka Flat OPC)
+    - Word 2003 XML (.xml)
+    - Word/Excel Single File Web Page / MHTML (.mht)
+    - Publisher (.pub)
+    - raises an error if run with files encrypted using MS Crypto API RC4
 
 Author: Philippe Lagadec - http://www.decalage.info
 License: BSD, see source code or documentation
@@ -328,6 +328,8 @@ if sys.version_info[0] <= 2:
     # to use ord on bytes/bytearray items the same way in Python 2+3
     # on Python 2, just use the normal ord() because items are bytes
     byte_ord = ord
+    #: Default string encoding for the olevba API
+    DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes)
 else:
     # Python 3.x+
     # to use ord on bytes/bytearray items the same way in Python 2+3
@@ -338,6 +340,8 @@ else:
     # unichr does not exist anymore, only chr:
     unichr = chr
     from functools import reduce
+    #: Default string encoding for the olevba API
+    DEFAULT_API_ENCODING = None # on Python 3: None (unicode)
 
 
 # === PYTHON 3.0 - 3.4 SUPPORT ======================================================
@@ -1338,7 +1342,7 @@ class VBA_Module(object):
         :param olefile.OleStream dir_stream: olefile.OleStream, file object containing the module record
         :param int module_index: int, index of the module in the VBA project list
         """
-        #: store a reference to the VBA project for later use:
+        #: reference to the VBA project for later use
         self.project = project
         #: VBA project name
         self.name = None
@@ -2423,7 +2427,7 @@ def scan_vba(vba_code, include_decoded_strings, deobfuscate=False):
     :param include_decoded_strings: bool, if True all encoded strings will be included with their decoded content.
     :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
     :return: list of tuples (type, keyword, description)
-    (type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String')
+        with type = 'AutoExec', 'Suspicious', 'IOC', 'Hex String', 'Base64 String' or 'Dridex String'
     """
     return VBA_Scanner(vba_code).scan(include_decoded_strings, deobfuscate)
 
@@ -2433,38 +2437,32 @@ def scan_vba(vba_code, include_decoded_strings, deobfuscate=False):
 class VBA_Parser(object):
     """
     Class to parse MS Office files, to detect VBA macros and extract VBA source code
-    Supported file formats:
-    - Word 97-2003 (.doc, .dot)
-    - Word 2007+ (.docm, .dotm)
-    - Word 2003 XML (.xml)
-    - Word MHT - Single File Web Page / MHTML (.mht)
-    - Excel 97-2003 (.xls)
-    - Excel 2007+ (.xlsm, .xlsb)
-    - PowerPoint 97-2003 (.ppt)
-    - PowerPoint 2007+ (.pptm, .ppsm)
     """
 
-    def __init__(self, filename, data=None, container=None, relaxed=False):
+    def __init__(self, filename, data=None, container=None, relaxed=False, encoding=DEFAULT_API_ENCODING):
         """
         Constructor for VBA_Parser
 
-        :param filename: filename or path of file to parse, or file-like object
+        :param str filename: filename or path of file to parse, or file-like object
 
-        :param data: None or bytes str, if None the file will be read from disk (or from the file-like object).
-        If data is provided as a bytes string, it will be parsed as the content of the file in memory,
-        and not read from disk. Note: files must be read in binary mode, i.e. open(f, 'rb').
+        :param bytes data: None or bytes str, if None the file will be read from disk (or from the file-like object).
+            If data is provided as a bytes string, it will be parsed as the content of the file in memory,
+            and not read from disk. Note: files must be read in binary mode, i.e. open(f, 'rb').
 
-        :param container: str, path and filename of container if the file is within
-        a zip archive, None otherwise.
+        :param str container: str, path and filename of container if the file is within
+            a zip archive, None otherwise.
 
-        :param relaxed: if True, treat mal-formed documents and missing streams more like MS office:
-                        do nothing; if False (default), raise errors in these cases
+        :param bool relaxed: if True, treat mal-formed documents and missing streams more like MS office:
+            do nothing; if False (default), raise errors in these cases
 
-        raises a FileOpenError if all attemps to interpret the data header failed
+        :param str encoding: encoding for VBA source code and strings.
+            Default: UTF-8 bytes strings on Python 2, unicode strings on Python 3 (None)
+
+        raises a FileOpenError if all attempts to interpret the data header failed.
         """
-        #TODO: filename should only be a string, data should be used for the file-like object
-        #TODO: filename should be mandatory, optional data is a string or file-like object
-        #TODO: also support olefile and zipfile as input
+        # TODO: filename should only be a string, data should be used for the file-like object
+        # TODO: filename should be mandatory, optional data is a string or file-like object
+        # TODO: also support olefile and zipfile as input
         if data is None:
             # open file from disk:
             _file = filename
@@ -2495,6 +2493,8 @@ class VBA_Parser(object):
         self.nb_base64strings = 0
         self.nb_dridexstrings = 0
         self.nb_vbastrings = 0
+        #: Encoding for VBA source code and strings returned by all methods
+        self.encoding = encoding
 
         # if filename is None:
         #     if isinstance(_file, basestring):
@@ -3000,6 +3000,19 @@ class VBA_Parser(object):
                         raise SubstreamOpenError(self.filename, d.name, exc)
         return self.contains_macros
 
+    def encode_string(self, unicode_str):
+        """
+        Encode a unicode string to bytes or str, using the specified encoding
+        for the VBA_parser. By default, it will be bytes/UTF-8 on Python 2, and
+        a normal unicode string on Python 3.
+        :param str unicode_str: string to be encoded
+        :return: encoded string
+        """
+        if self.encoding is None:
+            return unicode_str
+        else:
+            return unicode_str.encode(self.encoding, errors='replace')
+
     def extract_macros(self):
         """
         Extract and decompress source code for each VBA macro found in the file
@@ -3062,6 +3075,7 @@ class VBA_Parser(object):
                         compressed_code = data[start:]
                         try:
                             vba_code = decompress_stream(bytearray(compressed_code))
+                            # TODO vba_code = self.encode_string(vba_code)
                             yield (self.filename, d.name, d.name, vba_code)
                         except Exception as exc:
                             # display the exception with full stack trace for debugging