From e2a52831884deb9f060e98026fd7e1a8696d3e30 Mon Sep 17 00:00:00 2001 From: decalage2 Date: Wed, 16 Jan 2019 22:01:38 +0100 Subject: [PATCH] olevba: added name_str and others to VBA_Module for issue #383, a few PEP8 fixes --- oletools/olevba.py | 76 ++++++++++++++++++++++++++++++++++++++++++---------------------------------- requirements.txt | 1 + 2 files changed, 43 insertions(+), 34 deletions(-) diff --git a/oletools/olevba.py b/oletools/olevba.py index 7e2c651..6c68403 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -239,14 +239,14 @@ __version__ = '0.54dev7' # - extract_macros: use combined struct.unpack instead of many calls # - all except clauses should target specific exceptions -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # REFERENCES: # - [MS-OVBA]: Microsoft Office VBA File Format Structure # http://msdn.microsoft.com/en-us/library/office/cc313094%28v=office.12%29.aspx # - officeparser: https://github.com/unixfreak0037/officeparser -#--- IMPORTS ------------------------------------------------------------------ +# --- IMPORTS ------------------------------------------------------------------ import sys import os @@ -263,7 +263,6 @@ import zlib import email # for MHTML parsing import string # for printable import json # for json output mode (argument --json) -import codecs # import lxml or ElementTree for XML parsing: try: @@ -298,7 +297,7 @@ _thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) # print('_thismodule_dir = %r' % _thismodule_dir) _parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) # print('_parent_dir = %r' % _thirdparty_dir) -if not _parent_dir in sys.path: +if _parent_dir not in sys.path: sys.path.insert(0, _parent_dir) import olefile @@ -330,38 +329,35 @@ if sys.version_info[0] <= 2: # on Python 2, just use the normal ord() because items are bytes byte_ord = ord #: Default string encoding for the olevba API - DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes) + DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes) else: # Python 3.x+ PYTHON2 = False + # to use ord on bytes/bytearray items the same way in Python 2+3 # on Python 3, items are int, so just return the item - byte_ord = lambda x: x + def byte_ord(x): + return x # xrange is now called range: xrange = range # unichr does not exist anymore, only chr: unichr = chr from functools import reduce #: Default string encoding for the olevba API - DEFAULT_API_ENCODING = None # on Python 3: None (unicode) - - -# === PYTHON 3.0 - 3.4 SUPPORT ====================================================== - -# From https://gist.github.com/ynkdir/867347/c5e188a4886bc2dd71876c7e069a7b00b6c16c61 + DEFAULT_API_ENCODING = None # on Python 3: None (unicode) + # Python 3.0 - 3.4 support: + # From https://gist.github.com/ynkdir/867347/c5e188a4886bc2dd71876c7e069a7b00b6c16c61 + if sys.version_info < (3, 5): + import codecs + _backslashreplace_errors = codecs.lookup_error("backslashreplace") -if sys.version_info >= (3, 0) and sys.version_info < (3, 5): - import codecs + def backslashreplace_errors(exc): + if isinstance(exc, UnicodeDecodeError): + u = "".join("\\x{0:02x}".format(c) for c in exc.object[exc.start:exc.end]) + return u, exc.end + return _backslashreplace_errors(exc) - _backslashreplace_errors = codecs.lookup_error("backslashreplace") - - def backslashreplace_errors(exc): - if isinstance(exc, UnicodeDecodeError): - u = "".join("\\x{0:02x}".format(c) for c in exc.object[exc.start:exc.end]) - return (u, exc.end) - return _backslashreplace_errors(exc) - - codecs.register_error("backslashreplace", backslashreplace_errors) + codecs.register_error("backslashreplace", backslashreplace_errors) # === LOGGING ================================================================= @@ -378,7 +374,7 @@ def get_logger(name, level=logging.CRITICAL+1): # First, test if there is already a logger with the same name, else it # will generate duplicate messages (due to duplicate handlers): if name in logging.Logger.manager.loggerDict: - #NOTE: another less intrusive but more "hackish" solution would be to + # NOTE: another less intrusive but more "hackish" solution would be to # use getLogger then test if its effective level is not default. logger = logging.getLogger(name) # make sure level is OK: @@ -1346,12 +1342,16 @@ class VBA_Module(object): """ #: reference to the VBA project for later use (VBA_Project) self.project = project - #: VBA project name (unicode str) + #: VBA module name (unicode str) self.name = None - #: VBA project name, unicode copy (unicode str) + #: VBA module name as a native str (utf8 bytes on py2, str on py3) + self.name_str = None + #: VBA module name, unicode copy (unicode str) self._name_unicode = None - #: Stream name containing the VBA project (unicode str) + #: Stream name containing the VBA module (unicode str) self.streamname = None + #: Stream name containing the VBA module as a native str (utf8 bytes on py2, str on py3) + self.streamname_str = None self._streamname_unicode = None self.docstring = None self._docstring_unicode = None @@ -1376,6 +1376,10 @@ class VBA_Module(object): modulename_bytes = dir_stream.read(size) # Module name always stored as Unicode: self.name = project.decode_bytes(modulename_bytes) + if PYTHON2: + self.name_str = self.name.encode('utf8', errors='replace') + else: + self.name_str = self.name # account for optional sections # TODO: shouldn't this be a loop? (check MS-OVBA) section_id = struct.unpack("=2.2.0 olefile>=0.45 +colorclass -- libgit2 0.21.4