Commit f5da931553db194eb3e8388451cbb9fa3dc72b4f
1 parent
da9749ed
try 3 different unicode versions of VBA module/stream name
use new uni_out instead of repr() to make unicode output save for shells
Showing
1 changed file
with
30 additions
and
10 deletions
oletools/olevba.py
| ... | ... | @@ -1416,6 +1416,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path): |
| 1416 | 1416 | projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0] |
| 1417 | 1417 | unused = projectmodules_projectcookierecord_cookie |
| 1418 | 1418 | |
| 1419 | + # short function to simplify unicode text output | |
| 1420 | + uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') | |
| 1421 | + | |
| 1419 | 1422 | log.debug("parsing {0} modules".format(projectmodules_count)) |
| 1420 | 1423 | for projectmodule_index in xrange(0, projectmodules_count): |
| 1421 | 1424 | try: |
| ... | ... | @@ -1428,9 +1431,10 @@ def _extract_vba(ole, vba_root, project_path, dir_path): |
| 1428 | 1431 | if section_id == 0x0047: |
| 1429 | 1432 | modulename_unicode_id = section_id |
| 1430 | 1433 | modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0] |
| 1431 | - modulename_unicode_modulename_unicode = dir_stream.read(modulename_unicode_sizeof_modulename_unicode) | |
| 1434 | + modulename_unicode_modulename_unicode = dir_stream.read( | |
| 1435 | + modulename_unicode_sizeof_modulename_unicode).decode('UTF-16LE', 'replace') | |
| 1436 | + # just guessing that this is the same encoding as used in OleFileIO | |
| 1432 | 1437 | unused = modulename_unicode_id |
| 1433 | - unused = modulename_unicode_modulename_unicode | |
| 1434 | 1438 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1435 | 1439 | if section_id == 0x001A: |
| 1436 | 1440 | modulestreamname_id = section_id |
| ... | ... | @@ -1439,7 +1443,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path): |
| 1439 | 1443 | modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 1440 | 1444 | check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved) |
| 1441 | 1445 | modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0] |
| 1442 | - modulestreamname_streamname_unicode = dir_stream.read(modulestreamname_sizeof_streamname_unicode) | |
| 1446 | + modulestreamname_streamname_unicode = dir_stream.read( | |
| 1447 | + modulestreamname_sizeof_streamname_unicode).decode('UTF-16LE', 'replace') | |
| 1448 | + # just guessing that this is the same encoding as used in OleFileIO | |
| 1443 | 1449 | unused = modulestreamname_id |
| 1444 | 1450 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1445 | 1451 | if section_id == 0x001C: |
| ... | ... | @@ -1505,16 +1511,30 @@ def _extract_vba(ole, vba_root, project_path, dir_path): |
| 1505 | 1511 | log.debug('Project CodePage = %d' % projectcodepage_codepage) |
| 1506 | 1512 | vba_codec = 'cp%d' % projectcodepage_codepage |
| 1507 | 1513 | log.debug("ModuleName = {0}".format(modulename_modulename)) |
| 1508 | - log.debug("StreamName = {0}".format(repr(modulestreamname_streamname))) | |
| 1514 | + log.debug("ModuleNameUnicode = {0}".format(uni_out(modulename_unicode_modulename_unicode))) | |
| 1515 | + log.debug("StreamName = {0}".format(uni_out(modulestreamname_streamname))) | |
| 1509 | 1516 | streamname_unicode = modulestreamname_streamname.decode(vba_codec) |
| 1510 | - log.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode))) | |
| 1511 | - log.debug("StreamNameUnicode = {0}".format(repr(modulestreamname_streamname_unicode))) | |
| 1517 | + log.debug("StreamName.decode('%s') = %s" % (vba_codec, uni_out(streamname_unicode))) | |
| 1518 | + log.debug("StreamNameUnicode = {0}".format(uni_out(modulestreamname_streamname_unicode))) | |
| 1512 | 1519 | log.debug("TextOffset = {0}".format(moduleoffset_textoffset)) |
| 1513 | 1520 | |
| 1514 | - code_path = vba_root + u'VBA/' + streamname_unicode | |
| 1515 | - #TODO: test if stream exists | |
| 1516 | - log.debug('opening VBA code stream %s' % repr(code_path)) | |
| 1517 | - code_data = ole.openstream(code_path).read() | |
| 1521 | + code_data = None | |
| 1522 | + try_names = streamname_unicode, \ | |
| 1523 | + modulename_unicode_modulename_unicode, \ | |
| 1524 | + modulestreamname_streamname_unicode | |
| 1525 | + for stream_name in try_names: | |
| 1526 | + try: | |
| 1527 | + code_path = vba_root + u'VBA/' + stream_name | |
| 1528 | + log.debug('opening VBA code stream %s' % uni_out(code_path)) | |
| 1529 | + code_data = ole.openstream(code_path).read() | |
| 1530 | + break | |
| 1531 | + except IOError as ioe: | |
| 1532 | + log.debug('failed to open stream {} ({}), try other name' | |
| 1533 | + .format(uni_out(stream_name), ioe)) | |
| 1534 | + | |
| 1535 | + if code_data is None: | |
| 1536 | + continue | |
| 1537 | + | |
| 1518 | 1538 | log.debug("length of code_data = {0}".format(len(code_data))) |
| 1519 | 1539 | log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) |
| 1520 | 1540 | code_data = code_data[moduleoffset_textoffset:] | ... | ... |