Commit f5da931553db194eb3e8388451cbb9fa3dc72b4f

Authored by Christian Herdtweck
1 parent da9749ed

try 3 different unicode versions of VBA module/stream name

use new uni_out instead of repr() to make unicode output save for shells
Showing 1 changed file with 30 additions and 10 deletions
oletools/olevba.py
@@ -1416,6 +1416,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path): @@ -1416,6 +1416,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1416 projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0] 1416 projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0]
1417 unused = projectmodules_projectcookierecord_cookie 1417 unused = projectmodules_projectcookierecord_cookie
1418 1418
  1419 + # short function to simplify unicode text output
  1420 + uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace')
  1421 +
1419 log.debug("parsing {0} modules".format(projectmodules_count)) 1422 log.debug("parsing {0} modules".format(projectmodules_count))
1420 for projectmodule_index in xrange(0, projectmodules_count): 1423 for projectmodule_index in xrange(0, projectmodules_count):
1421 try: 1424 try:
@@ -1428,9 +1431,10 @@ def _extract_vba(ole, vba_root, project_path, dir_path): @@ -1428,9 +1431,10 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1428 if section_id == 0x0047: 1431 if section_id == 0x0047:
1429 modulename_unicode_id = section_id 1432 modulename_unicode_id = section_id
1430 modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0] 1433 modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0]
1431 - modulename_unicode_modulename_unicode = dir_stream.read(modulename_unicode_sizeof_modulename_unicode) 1434 + modulename_unicode_modulename_unicode = dir_stream.read(
  1435 + modulename_unicode_sizeof_modulename_unicode).decode('UTF-16LE', 'replace')
  1436 + # just guessing that this is the same encoding as used in OleFileIO
1432 unused = modulename_unicode_id 1437 unused = modulename_unicode_id
1433 - unused = modulename_unicode_modulename_unicode  
1434 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1438 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1435 if section_id == 0x001A: 1439 if section_id == 0x001A:
1436 modulestreamname_id = section_id 1440 modulestreamname_id = section_id
@@ -1439,7 +1443,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path): @@ -1439,7 +1443,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1439 modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0] 1443 modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0]
1440 check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved) 1444 check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved)
1441 modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0] 1445 modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0]
1442 - modulestreamname_streamname_unicode = dir_stream.read(modulestreamname_sizeof_streamname_unicode) 1446 + modulestreamname_streamname_unicode = dir_stream.read(
  1447 + modulestreamname_sizeof_streamname_unicode).decode('UTF-16LE', 'replace')
  1448 + # just guessing that this is the same encoding as used in OleFileIO
1443 unused = modulestreamname_id 1449 unused = modulestreamname_id
1444 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1450 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1445 if section_id == 0x001C: 1451 if section_id == 0x001C:
@@ -1505,16 +1511,30 @@ def _extract_vba(ole, vba_root, project_path, dir_path): @@ -1505,16 +1511,30 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1505 log.debug('Project CodePage = %d' % projectcodepage_codepage) 1511 log.debug('Project CodePage = %d' % projectcodepage_codepage)
1506 vba_codec = 'cp%d' % projectcodepage_codepage 1512 vba_codec = 'cp%d' % projectcodepage_codepage
1507 log.debug("ModuleName = {0}".format(modulename_modulename)) 1513 log.debug("ModuleName = {0}".format(modulename_modulename))
1508 - log.debug("StreamName = {0}".format(repr(modulestreamname_streamname))) 1514 + log.debug("ModuleNameUnicode = {0}".format(uni_out(modulename_unicode_modulename_unicode)))
  1515 + log.debug("StreamName = {0}".format(uni_out(modulestreamname_streamname)))
1509 streamname_unicode = modulestreamname_streamname.decode(vba_codec) 1516 streamname_unicode = modulestreamname_streamname.decode(vba_codec)
1510 - log.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode)))  
1511 - log.debug("StreamNameUnicode = {0}".format(repr(modulestreamname_streamname_unicode))) 1517 + log.debug("StreamName.decode('%s') = %s" % (vba_codec, uni_out(streamname_unicode)))
  1518 + log.debug("StreamNameUnicode = {0}".format(uni_out(modulestreamname_streamname_unicode)))
1512 log.debug("TextOffset = {0}".format(moduleoffset_textoffset)) 1519 log.debug("TextOffset = {0}".format(moduleoffset_textoffset))
1513 1520
1514 - code_path = vba_root + u'VBA/' + streamname_unicode  
1515 - #TODO: test if stream exists  
1516 - log.debug('opening VBA code stream %s' % repr(code_path))  
1517 - code_data = ole.openstream(code_path).read() 1521 + code_data = None
  1522 + try_names = streamname_unicode, \
  1523 + modulename_unicode_modulename_unicode, \
  1524 + modulestreamname_streamname_unicode
  1525 + for stream_name in try_names:
  1526 + try:
  1527 + code_path = vba_root + u'VBA/' + stream_name
  1528 + log.debug('opening VBA code stream %s' % uni_out(code_path))
  1529 + code_data = ole.openstream(code_path).read()
  1530 + break
  1531 + except IOError as ioe:
  1532 + log.debug('failed to open stream {} ({}), try other name'
  1533 + .format(uni_out(stream_name), ioe))
  1534 +
  1535 + if code_data is None:
  1536 + continue
  1537 +
1518 log.debug("length of code_data = {0}".format(len(code_data))) 1538 log.debug("length of code_data = {0}".format(len(code_data)))
1519 log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) 1539 log.debug("offset of code_data = {0}".format(moduleoffset_textoffset))
1520 code_data = code_data[moduleoffset_textoffset:] 1540 code_data = code_data[moduleoffset_textoffset:]