Commit f5da931553db194eb3e8388451cbb9fa3dc72b4f

Authored by Christian Herdtweck
1 parent da9749ed

try 3 different unicode versions of VBA module/stream name

use new uni_out instead of repr() to make unicode output save for shells
Showing 1 changed file with 30 additions and 10 deletions
oletools/olevba.py
... ... @@ -1416,6 +1416,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1416 1416 projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0]
1417 1417 unused = projectmodules_projectcookierecord_cookie
1418 1418  
  1419 + # short function to simplify unicode text output
  1420 + uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace')
  1421 +
1419 1422 log.debug("parsing {0} modules".format(projectmodules_count))
1420 1423 for projectmodule_index in xrange(0, projectmodules_count):
1421 1424 try:
... ... @@ -1428,9 +1431,10 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1428 1431 if section_id == 0x0047:
1429 1432 modulename_unicode_id = section_id
1430 1433 modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0]
1431   - modulename_unicode_modulename_unicode = dir_stream.read(modulename_unicode_sizeof_modulename_unicode)
  1434 + modulename_unicode_modulename_unicode = dir_stream.read(
  1435 + modulename_unicode_sizeof_modulename_unicode).decode('UTF-16LE', 'replace')
  1436 + # just guessing that this is the same encoding as used in OleFileIO
1432 1437 unused = modulename_unicode_id
1433   - unused = modulename_unicode_modulename_unicode
1434 1438 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1435 1439 if section_id == 0x001A:
1436 1440 modulestreamname_id = section_id
... ... @@ -1439,7 +1443,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1439 1443 modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0]
1440 1444 check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved)
1441 1445 modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0]
1442   - modulestreamname_streamname_unicode = dir_stream.read(modulestreamname_sizeof_streamname_unicode)
  1446 + modulestreamname_streamname_unicode = dir_stream.read(
  1447 + modulestreamname_sizeof_streamname_unicode).decode('UTF-16LE', 'replace')
  1448 + # just guessing that this is the same encoding as used in OleFileIO
1443 1449 unused = modulestreamname_id
1444 1450 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1445 1451 if section_id == 0x001C:
... ... @@ -1505,16 +1511,30 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1505 1511 log.debug('Project CodePage = %d' % projectcodepage_codepage)
1506 1512 vba_codec = 'cp%d' % projectcodepage_codepage
1507 1513 log.debug("ModuleName = {0}".format(modulename_modulename))
1508   - log.debug("StreamName = {0}".format(repr(modulestreamname_streamname)))
  1514 + log.debug("ModuleNameUnicode = {0}".format(uni_out(modulename_unicode_modulename_unicode)))
  1515 + log.debug("StreamName = {0}".format(uni_out(modulestreamname_streamname)))
1509 1516 streamname_unicode = modulestreamname_streamname.decode(vba_codec)
1510   - log.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode)))
1511   - log.debug("StreamNameUnicode = {0}".format(repr(modulestreamname_streamname_unicode)))
  1517 + log.debug("StreamName.decode('%s') = %s" % (vba_codec, uni_out(streamname_unicode)))
  1518 + log.debug("StreamNameUnicode = {0}".format(uni_out(modulestreamname_streamname_unicode)))
1512 1519 log.debug("TextOffset = {0}".format(moduleoffset_textoffset))
1513 1520  
1514   - code_path = vba_root + u'VBA/' + streamname_unicode
1515   - #TODO: test if stream exists
1516   - log.debug('opening VBA code stream %s' % repr(code_path))
1517   - code_data = ole.openstream(code_path).read()
  1521 + code_data = None
  1522 + try_names = streamname_unicode, \
  1523 + modulename_unicode_modulename_unicode, \
  1524 + modulestreamname_streamname_unicode
  1525 + for stream_name in try_names:
  1526 + try:
  1527 + code_path = vba_root + u'VBA/' + stream_name
  1528 + log.debug('opening VBA code stream %s' % uni_out(code_path))
  1529 + code_data = ole.openstream(code_path).read()
  1530 + break
  1531 + except IOError as ioe:
  1532 + log.debug('failed to open stream {} ({}), try other name'
  1533 + .format(uni_out(stream_name), ioe))
  1534 +
  1535 + if code_data is None:
  1536 + continue
  1537 +
1518 1538 log.debug("length of code_data = {0}".format(len(code_data)))
1519 1539 log.debug("offset of code_data = {0}".format(moduleoffset_textoffset))
1520 1540 code_data = code_data[moduleoffset_textoffset:]
... ...