Commit 4a63e59b2c7ce5487129b5f4f4d5f1654ec15c4d

Authored by decalage2
1 parent 07333a3a

olevba: added code_raw and code_str to VBA_Module

Showing 1 changed file with 22 additions and 6 deletions
oletools/olevba.py
@@ -325,6 +325,7 @@ email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t @@ -325,6 +325,7 @@ email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t
325 325
326 if sys.version_info[0] <= 2: 326 if sys.version_info[0] <= 2:
327 # Python 2.x 327 # Python 2.x
  328 + PYTHON2 = True
328 # to use ord on bytes/bytearray items the same way in Python 2+3 329 # to use ord on bytes/bytearray items the same way in Python 2+3
329 # on Python 2, just use the normal ord() because items are bytes 330 # on Python 2, just use the normal ord() because items are bytes
330 byte_ord = ord 331 byte_ord = ord
@@ -332,6 +333,7 @@ if sys.version_info[0] &lt;= 2: @@ -332,6 +333,7 @@ if sys.version_info[0] &lt;= 2:
332 DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes) 333 DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes)
333 else: 334 else:
334 # Python 3.x+ 335 # Python 3.x+
  336 + PYTHON2 = False
335 # to use ord on bytes/bytearray items the same way in Python 2+3 337 # to use ord on bytes/bytearray items the same way in Python 2+3
336 # on Python 3, items are int, so just return the item 338 # on Python 3, items are int, so just return the item
337 byte_ord = lambda x: x 339 byte_ord = lambda x: x
@@ -1342,13 +1344,13 @@ class VBA_Module(object): @@ -1342,13 +1344,13 @@ class VBA_Module(object):
1342 :param olefile.OleStream dir_stream: olefile.OleStream, file object containing the module record 1344 :param olefile.OleStream dir_stream: olefile.OleStream, file object containing the module record
1343 :param int module_index: int, index of the module in the VBA project list 1345 :param int module_index: int, index of the module in the VBA project list
1344 """ 1346 """
1345 - #: reference to the VBA project for later use 1347 + #: reference to the VBA project for later use (VBA_Project)
1346 self.project = project 1348 self.project = project
1347 - #: VBA project name 1349 + #: VBA project name (unicode str)
1348 self.name = None 1350 self.name = None
1349 - #: VBA project name (Unicode) 1351 + #: VBA project name, unicode copy (unicode str)
1350 self.name_unicode = None 1352 self.name_unicode = None
1351 - #: Stream name containing the VBA project 1353 + #: Stream name containing the VBA project (unicode str)
1352 self.streamname = None 1354 self.streamname = None
1353 self.streamname_unicode = None 1355 self.streamname_unicode = None
1354 self.docstring = None 1356 self.docstring = None
@@ -1357,8 +1359,12 @@ class VBA_Module(object): @@ -1357,8 +1359,12 @@ class VBA_Module(object):
1357 self.type = None 1359 self.type = None
1358 self.readonly = False 1360 self.readonly = False
1359 self.private = False 1361 self.private = False
1360 - self.code_bytes = None 1362 + #: VBA source code in bytes format, using the original code page from the VBA project
  1363 + self.code_raw = None
  1364 + #: VBA source code in unicode format (unicode for Python2, str for Python 3)
1361 self.code = None 1365 self.code = None
  1366 + #: VBA source code in native str format (str encoded with UTF-8 for Python 2, str for Python 3)
  1367 + self.code_str = None
1362 self.filename = None 1368 self.filename = None
1363 self.code_path = None 1369 self.code_path = None
1364 try: 1370 try:
@@ -1500,8 +1506,17 @@ class VBA_Module(object): @@ -1500,8 +1506,17 @@ class VBA_Module(object):
1500 code_data = code_data[self.textoffset:] 1506 code_data = code_data[self.textoffset:]
1501 if len(code_data) > 0: 1507 if len(code_data) > 0:
1502 code_data = decompress_stream(bytearray(code_data)) 1508 code_data = decompress_stream(bytearray(code_data))
1503 - self.code_bytes = code_data 1509 + # store the raw code encoded as bytes with the project's code page:
  1510 + self.code_raw = code_data
  1511 + # decode it to unicode:
1504 self.code = project.decode_bytes(code_data) 1512 self.code = project.decode_bytes(code_data)
  1513 + # also store a native str version:
  1514 + if PYTHON2:
  1515 + # UTF-8 encoded bytes for Python 2:
  1516 + self.code_str = self.code.encode('utf8', errors='replace')
  1517 + else:
  1518 + # plain unicode for Python 3:
  1519 + self.code_str = self.code
1505 # case-insensitive search in the code_modules dict to find the file extension: 1520 # case-insensitive search in the code_modules dict to find the file extension:
1506 # filext = code_modules.get(modulename_modulename.lower(), 'bin') 1521 # filext = code_modules.get(modulename_modulename.lower(), 'bin')
1507 filext = 'vba' 1522 filext = 'vba'
@@ -1545,6 +1560,7 @@ class VBA_Project(object): @@ -1545,6 +1560,7 @@ class VBA_Project(object):
1545 self. project_path = project_path 1560 self. project_path = project_path
1546 self.dir_path = dir_path 1561 self.dir_path = dir_path
1547 self.relaxed = relaxed 1562 self.relaxed = relaxed
  1563 + #: VBA modules contained in the project (list of VBA_Module objects)
1548 self.modules = [] 1564 self.modules = []
1549 log.debug('Parsing the dir stream from %r' % dir_path) 1565 log.debug('Parsing the dir stream from %r' % dir_path)
1550 # read data from dir stream (compressed) 1566 # read data from dir stream (compressed)