Commit 4a63e59b2c7ce5487129b5f4f4d5f1654ec15c4d

Authored by decalage2
1 parent 07333a3a

olevba: added code_raw and code_str to VBA_Module

Showing 1 changed file with 22 additions and 6 deletions
oletools/olevba.py
... ... @@ -325,6 +325,7 @@ email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t
325 325  
326 326 if sys.version_info[0] <= 2:
327 327 # Python 2.x
  328 + PYTHON2 = True
328 329 # to use ord on bytes/bytearray items the same way in Python 2+3
329 330 # on Python 2, just use the normal ord() because items are bytes
330 331 byte_ord = ord
... ... @@ -332,6 +333,7 @@ if sys.version_info[0] &lt;= 2:
332 333 DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes)
333 334 else:
334 335 # Python 3.x+
  336 + PYTHON2 = False
335 337 # to use ord on bytes/bytearray items the same way in Python 2+3
336 338 # on Python 3, items are int, so just return the item
337 339 byte_ord = lambda x: x
... ... @@ -1342,13 +1344,13 @@ class VBA_Module(object):
1342 1344 :param olefile.OleStream dir_stream: olefile.OleStream, file object containing the module record
1343 1345 :param int module_index: int, index of the module in the VBA project list
1344 1346 """
1345   - #: reference to the VBA project for later use
  1347 + #: reference to the VBA project for later use (VBA_Project)
1346 1348 self.project = project
1347   - #: VBA project name
  1349 + #: VBA project name (unicode str)
1348 1350 self.name = None
1349   - #: VBA project name (Unicode)
  1351 + #: VBA project name, unicode copy (unicode str)
1350 1352 self.name_unicode = None
1351   - #: Stream name containing the VBA project
  1353 + #: Stream name containing the VBA project (unicode str)
1352 1354 self.streamname = None
1353 1355 self.streamname_unicode = None
1354 1356 self.docstring = None
... ... @@ -1357,8 +1359,12 @@ class VBA_Module(object):
1357 1359 self.type = None
1358 1360 self.readonly = False
1359 1361 self.private = False
1360   - self.code_bytes = None
  1362 + #: VBA source code in bytes format, using the original code page from the VBA project
  1363 + self.code_raw = None
  1364 + #: VBA source code in unicode format (unicode for Python2, str for Python 3)
1361 1365 self.code = None
  1366 + #: VBA source code in native str format (str encoded with UTF-8 for Python 2, str for Python 3)
  1367 + self.code_str = None
1362 1368 self.filename = None
1363 1369 self.code_path = None
1364 1370 try:
... ... @@ -1500,8 +1506,17 @@ class VBA_Module(object):
1500 1506 code_data = code_data[self.textoffset:]
1501 1507 if len(code_data) > 0:
1502 1508 code_data = decompress_stream(bytearray(code_data))
1503   - self.code_bytes = code_data
  1509 + # store the raw code encoded as bytes with the project's code page:
  1510 + self.code_raw = code_data
  1511 + # decode it to unicode:
1504 1512 self.code = project.decode_bytes(code_data)
  1513 + # also store a native str version:
  1514 + if PYTHON2:
  1515 + # UTF-8 encoded bytes for Python 2:
  1516 + self.code_str = self.code.encode('utf8', errors='replace')
  1517 + else:
  1518 + # plain unicode for Python 3:
  1519 + self.code_str = self.code
1505 1520 # case-insensitive search in the code_modules dict to find the file extension:
1506 1521 # filext = code_modules.get(modulename_modulename.lower(), 'bin')
1507 1522 filext = 'vba'
... ... @@ -1545,6 +1560,7 @@ class VBA_Project(object):
1545 1560 self. project_path = project_path
1546 1561 self.dir_path = dir_path
1547 1562 self.relaxed = relaxed
  1563 + #: VBA modules contained in the project (list of VBA_Module objects)
1548 1564 self.modules = []
1549 1565 log.debug('Parsing the dir stream from %r' % dir_path)
1550 1566 # read data from dir stream (compressed)
... ...