Commit 4a63e59b2c7ce5487129b5f4f4d5f1654ec15c4d
1 parent
07333a3a
olevba: added code_raw and code_str to VBA_Module
Showing
1 changed file
with
22 additions
and
6 deletions
oletools/olevba.py
| ... | ... | @@ -325,6 +325,7 @@ email.feedparser.headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:?|[\t |
| 325 | 325 | |
| 326 | 326 | if sys.version_info[0] <= 2: |
| 327 | 327 | # Python 2.x |
| 328 | + PYTHON2 = True | |
| 328 | 329 | # to use ord on bytes/bytearray items the same way in Python 2+3 |
| 329 | 330 | # on Python 2, just use the normal ord() because items are bytes |
| 330 | 331 | byte_ord = ord |
| ... | ... | @@ -332,6 +333,7 @@ if sys.version_info[0] <= 2: |
| 332 | 333 | DEFAULT_API_ENCODING = 'utf8' # on Python 2: UTF-8 (bytes) |
| 333 | 334 | else: |
| 334 | 335 | # Python 3.x+ |
| 336 | + PYTHON2 = False | |
| 335 | 337 | # to use ord on bytes/bytearray items the same way in Python 2+3 |
| 336 | 338 | # on Python 3, items are int, so just return the item |
| 337 | 339 | byte_ord = lambda x: x |
| ... | ... | @@ -1342,13 +1344,13 @@ class VBA_Module(object): |
| 1342 | 1344 | :param olefile.OleStream dir_stream: olefile.OleStream, file object containing the module record |
| 1343 | 1345 | :param int module_index: int, index of the module in the VBA project list |
| 1344 | 1346 | """ |
| 1345 | - #: reference to the VBA project for later use | |
| 1347 | + #: reference to the VBA project for later use (VBA_Project) | |
| 1346 | 1348 | self.project = project |
| 1347 | - #: VBA project name | |
| 1349 | + #: VBA project name (unicode str) | |
| 1348 | 1350 | self.name = None |
| 1349 | - #: VBA project name (Unicode) | |
| 1351 | + #: VBA project name, unicode copy (unicode str) | |
| 1350 | 1352 | self.name_unicode = None |
| 1351 | - #: Stream name containing the VBA project | |
| 1353 | + #: Stream name containing the VBA project (unicode str) | |
| 1352 | 1354 | self.streamname = None |
| 1353 | 1355 | self.streamname_unicode = None |
| 1354 | 1356 | self.docstring = None |
| ... | ... | @@ -1357,8 +1359,12 @@ class VBA_Module(object): |
| 1357 | 1359 | self.type = None |
| 1358 | 1360 | self.readonly = False |
| 1359 | 1361 | self.private = False |
| 1360 | - self.code_bytes = None | |
| 1362 | + #: VBA source code in bytes format, using the original code page from the VBA project | |
| 1363 | + self.code_raw = None | |
| 1364 | + #: VBA source code in unicode format (unicode for Python2, str for Python 3) | |
| 1361 | 1365 | self.code = None |
| 1366 | + #: VBA source code in native str format (str encoded with UTF-8 for Python 2, str for Python 3) | |
| 1367 | + self.code_str = None | |
| 1362 | 1368 | self.filename = None |
| 1363 | 1369 | self.code_path = None |
| 1364 | 1370 | try: |
| ... | ... | @@ -1500,8 +1506,17 @@ class VBA_Module(object): |
| 1500 | 1506 | code_data = code_data[self.textoffset:] |
| 1501 | 1507 | if len(code_data) > 0: |
| 1502 | 1508 | code_data = decompress_stream(bytearray(code_data)) |
| 1503 | - self.code_bytes = code_data | |
| 1509 | + # store the raw code encoded as bytes with the project's code page: | |
| 1510 | + self.code_raw = code_data | |
| 1511 | + # decode it to unicode: | |
| 1504 | 1512 | self.code = project.decode_bytes(code_data) |
| 1513 | + # also store a native str version: | |
| 1514 | + if PYTHON2: | |
| 1515 | + # UTF-8 encoded bytes for Python 2: | |
| 1516 | + self.code_str = self.code.encode('utf8', errors='replace') | |
| 1517 | + else: | |
| 1518 | + # plain unicode for Python 3: | |
| 1519 | + self.code_str = self.code | |
| 1505 | 1520 | # case-insensitive search in the code_modules dict to find the file extension: |
| 1506 | 1521 | # filext = code_modules.get(modulename_modulename.lower(), 'bin') |
| 1507 | 1522 | filext = 'vba' |
| ... | ... | @@ -1545,6 +1560,7 @@ class VBA_Project(object): |
| 1545 | 1560 | self. project_path = project_path |
| 1546 | 1561 | self.dir_path = dir_path |
| 1547 | 1562 | self.relaxed = relaxed |
| 1563 | + #: VBA modules contained in the project (list of VBA_Module objects) | |
| 1548 | 1564 | self.modules = [] |
| 1549 | 1565 | log.debug('Parsing the dir stream from %r' % dir_path) |
| 1550 | 1566 | # read data from dir stream (compressed) | ... | ... |