Commit e2a52831884deb9f060e98026fd7e1a8696d3e30

Authored by decalage2
1 parent a410b68b

olevba: added name_str and others to VBA_Module for issue #383, a few PEP8 fixes

oletools/olevba.py
... ... @@ -239,14 +239,14 @@ __version__ = '0.54dev7'
239 239 # - extract_macros: use combined struct.unpack instead of many calls
240 240 # - all except clauses should target specific exceptions
241 241  
242   -#------------------------------------------------------------------------------
  242 +# ------------------------------------------------------------------------------
243 243 # REFERENCES:
244 244 # - [MS-OVBA]: Microsoft Office VBA File Format Structure
245 245 # http://msdn.microsoft.com/en-us/library/office/cc313094%28v=office.12%29.aspx
246 246 # - officeparser: https://github.com/unixfreak0037/officeparser
247 247  
248 248  
249   -#--- IMPORTS ------------------------------------------------------------------
  249 +# --- IMPORTS ------------------------------------------------------------------
250 250  
251 251 import sys
252 252 import os
... ... @@ -263,7 +263,6 @@ import zlib
263 263 import email # for MHTML parsing
264 264 import string # for printable
265 265 import json # for json output mode (argument --json)
266   -import codecs
267 266  
268 267 # import lxml or ElementTree for XML parsing:
269 268 try:
... ... @@ -298,7 +297,7 @@ _thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
298 297 # print('_thismodule_dir = %r' % _thismodule_dir)
299 298 _parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..'))
300 299 # print('_parent_dir = %r' % _thirdparty_dir)
301   -if not _parent_dir in sys.path:
  300 +if _parent_dir not in sys.path:
302 301 sys.path.insert(0, _parent_dir)
303 302  
304 303 import olefile
... ... @@ -330,38 +329,35 @@ if sys.version_info[0] <= 2:
330 329 # on Python 2, just use the normal ord() because items are bytes
331 330 byte_ord = ord
332 331 #: Default string encoding for the olevba API
333   - DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes)
  332 + DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes)
334 333 else:
335 334 # Python 3.x+
336 335 PYTHON2 = False
  336 +
337 337 # to use ord on bytes/bytearray items the same way in Python 2+3
338 338 # on Python 3, items are int, so just return the item
339   - byte_ord = lambda x: x
  339 + def byte_ord(x):
  340 + return x
340 341 # xrange is now called range:
341 342 xrange = range
342 343 # unichr does not exist anymore, only chr:
343 344 unichr = chr
344 345 from functools import reduce
345 346 #: Default string encoding for the olevba API
346   - DEFAULT_API_ENCODING = None # on Python 3: None (unicode)
347   -
348   -
349   -# === PYTHON 3.0 - 3.4 SUPPORT ======================================================
350   -
351   -# From https://gist.github.com/ynkdir/867347/c5e188a4886bc2dd71876c7e069a7b00b6c16c61
  347 + DEFAULT_API_ENCODING = None # on Python 3: None (unicode)
  348 + # Python 3.0 - 3.4 support:
  349 + # From https://gist.github.com/ynkdir/867347/c5e188a4886bc2dd71876c7e069a7b00b6c16c61
  350 + if sys.version_info < (3, 5):
  351 + import codecs
  352 + _backslashreplace_errors = codecs.lookup_error("backslashreplace")
352 353  
353   -if sys.version_info >= (3, 0) and sys.version_info < (3, 5):
354   - import codecs
  354 + def backslashreplace_errors(exc):
  355 + if isinstance(exc, UnicodeDecodeError):
  356 + u = "".join("\\x{0:02x}".format(c) for c in exc.object[exc.start:exc.end])
  357 + return u, exc.end
  358 + return _backslashreplace_errors(exc)
355 359  
356   - _backslashreplace_errors = codecs.lookup_error("backslashreplace")
357   -
358   - def backslashreplace_errors(exc):
359   - if isinstance(exc, UnicodeDecodeError):
360   - u = "".join("\\x{0:02x}".format(c) for c in exc.object[exc.start:exc.end])
361   - return (u, exc.end)
362   - return _backslashreplace_errors(exc)
363   -
364   - codecs.register_error("backslashreplace", backslashreplace_errors)
  360 + codecs.register_error("backslashreplace", backslashreplace_errors)
365 361  
366 362  
367 363 # === LOGGING =================================================================
... ... @@ -378,7 +374,7 @@ def get_logger(name, level=logging.CRITICAL+1):
378 374 # First, test if there is already a logger with the same name, else it
379 375 # will generate duplicate messages (due to duplicate handlers):
380 376 if name in logging.Logger.manager.loggerDict:
381   - #NOTE: another less intrusive but more "hackish" solution would be to
  377 + # NOTE: another less intrusive but more "hackish" solution would be to
382 378 # use getLogger then test if its effective level is not default.
383 379 logger = logging.getLogger(name)
384 380 # make sure level is OK:
... ... @@ -1346,12 +1342,16 @@ class VBA_Module(object):
1346 1342 """
1347 1343 #: reference to the VBA project for later use (VBA_Project)
1348 1344 self.project = project
1349   - #: VBA project name (unicode str)
  1345 + #: VBA module name (unicode str)
1350 1346 self.name = None
1351   - #: VBA project name, unicode copy (unicode str)
  1347 + #: VBA module name as a native str (utf8 bytes on py2, str on py3)
  1348 + self.name_str = None
  1349 + #: VBA module name, unicode copy (unicode str)
1352 1350 self._name_unicode = None
1353   - #: Stream name containing the VBA project (unicode str)
  1351 + #: Stream name containing the VBA module (unicode str)
1354 1352 self.streamname = None
  1353 + #: Stream name containing the VBA module as a native str (utf8 bytes on py2, str on py3)
  1354 + self.streamname_str = None
1355 1355 self._streamname_unicode = None
1356 1356 self.docstring = None
1357 1357 self._docstring_unicode = None
... ... @@ -1376,6 +1376,10 @@ class VBA_Module(object):
1376 1376 modulename_bytes = dir_stream.read(size)
1377 1377 # Module name always stored as Unicode:
1378 1378 self.name = project.decode_bytes(modulename_bytes)
  1379 + if PYTHON2:
  1380 + self.name_str = self.name.encode('utf8', errors='replace')
  1381 + else:
  1382 + self.name_str = self.name
1379 1383 # account for optional sections
1380 1384 # TODO: shouldn't this be a loop? (check MS-OVBA)
1381 1385 section_id = struct.unpack("<H", dir_stream.read(2))[0]
... ... @@ -1394,6 +1398,10 @@ class VBA_Module(object):
1394 1398 streamname_bytes = dir_stream.read(size)
1395 1399 # Store it as Unicode:
1396 1400 self.streamname = project.decode_bytes(streamname_bytes)
  1401 + if PYTHON2:
  1402 + self.streamname_str = self.streamname.encode('utf8', errors='replace')
  1403 + else:
  1404 + self.streamname_str = self.streamname
1397 1405 reserved = struct.unpack("<H", dir_stream.read(2))[0]
1398 1406 project.check_value('MODULESTREAMNAME_Reserved', 0x0032, reserved)
1399 1407 size = struct.unpack("<L", dir_stream.read(4))[0]
... ... @@ -1469,10 +1477,10 @@ class VBA_Module(object):
1469 1477 if section_id != None:
1470 1478 log.warning('unknown or invalid module section id {0:04X}'.format(section_id))
1471 1479  
1472   - log.debug("Module Name = {0}".format(self.name))
1473   - log.debug("Module Name Unicode = {0}".format(self._name_unicode))
1474   - log.debug("Stream Name = {0}".format(self.streamname))
1475   - log.debug("Stream Name Unicode = {0}".format(self._streamname_unicode))
  1480 + log.debug("Module Name = {0}".format(self.name_str))
  1481 + # log.debug("Module Name Unicode = {0}".format(self._name_unicode))
  1482 + log.debug("Stream Name = {0}".format(self.streamname_str))
  1483 + # log.debug("Stream Name Unicode = {0}".format(self._streamname_unicode))
1476 1484 log.debug("TextOffset = {0}".format(self.textoffset))
1477 1485  
1478 1486 code_data = None
... ... @@ -1519,10 +1527,10 @@ class VBA_Module(object):
1519 1527 self.code_str = self.code
1520 1528 # case-insensitive search in the code_modules dict to find the file extension:
1521 1529 filext = self.project.module_ext.get(self.name.lower(), 'vba')
1522   - self.filename = '{0}.{1}'.format(self.name, filext)
1523   - log.debug('extracted file {0}'.format(self.filename))
  1530 + self.filename = u'{0}.{1}'.format(self.name, filext)
  1531 + log.debug('extracted file {0}'.format(repr(self.filename)))
1524 1532 else:
1525   - log.warning("module stream {0} has code data length 0".format(self.streamname))
  1533 + log.warning("module stream {0} has code data length 0".format(self.streamname_str))
1526 1534 except (UnexpectedDataError, SubstreamOpenError):
1527 1535 raise
1528 1536 except Exception as exc:
... ...
requirements.txt
1 1 pyparsing>=2.2.0
2 2 olefile>=0.45
  3 +colorclass
... ...