Commit da9749ed11a34a0f8a5008a32e9a3132ddac7693
Merge pull request #46 from christian-intra2net/robustify-extact-vba
Robustify extact vba
Showing
1 changed file
with
123 additions
and
118 deletions
oletools/olevba.py
| @@ -1417,122 +1417,127 @@ def _extract_vba(ole, vba_root, project_path, dir_path): | @@ -1417,122 +1417,127 @@ def _extract_vba(ole, vba_root, project_path, dir_path): | ||
| 1417 | unused = projectmodules_projectcookierecord_cookie | 1417 | unused = projectmodules_projectcookierecord_cookie |
| 1418 | 1418 | ||
| 1419 | log.debug("parsing {0} modules".format(projectmodules_count)) | 1419 | log.debug("parsing {0} modules".format(projectmodules_count)) |
| 1420 | - for _ in xrange(0, projectmodules_count): | ||
| 1421 | - modulename_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1422 | - check_value('MODULENAME_Id', 0x0019, modulename_id) | ||
| 1423 | - modulename_sizeof_modulename = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1424 | - modulename_modulename = dir_stream.read(modulename_sizeof_modulename) | ||
| 1425 | - # account for optional sections | ||
| 1426 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1427 | - if section_id == 0x0047: | ||
| 1428 | - modulename_unicode_id = section_id | ||
| 1429 | - modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1430 | - modulename_unicode_modulename_unicode = dir_stream.read(modulename_unicode_sizeof_modulename_unicode) | ||
| 1431 | - unused = modulename_unicode_id | ||
| 1432 | - unused = modulename_unicode_modulename_unicode | ||
| 1433 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1434 | - if section_id == 0x001A: | ||
| 1435 | - modulestreamname_id = section_id | ||
| 1436 | - modulestreamname_sizeof_streamname = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1437 | - modulestreamname_streamname = dir_stream.read(modulestreamname_sizeof_streamname) | ||
| 1438 | - modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1439 | - check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved) | ||
| 1440 | - modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1441 | - modulestreamname_streamname_unicode = dir_stream.read(modulestreamname_sizeof_streamname_unicode) | ||
| 1442 | - unused = modulestreamname_id | ||
| 1443 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1444 | - if section_id == 0x001C: | ||
| 1445 | - moduledocstring_id = section_id | ||
| 1446 | - check_value('MODULEDOCSTRING_Id', 0x001C, moduledocstring_id) | ||
| 1447 | - moduledocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1448 | - moduledocstring_docstring = dir_stream.read(moduledocstring_sizeof_docstring) | ||
| 1449 | - moduledocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1450 | - check_value('MODULEDOCSTRING_Reserved', 0x0048, moduledocstring_reserved) | ||
| 1451 | - moduledocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1452 | - moduledocstring_docstring_unicode = dir_stream.read(moduledocstring_sizeof_docstring_unicode) | ||
| 1453 | - unused = moduledocstring_docstring | ||
| 1454 | - unused = moduledocstring_docstring_unicode | ||
| 1455 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1456 | - if section_id == 0x0031: | ||
| 1457 | - moduleoffset_id = section_id | ||
| 1458 | - check_value('MODULEOFFSET_Id', 0x0031, moduleoffset_id) | ||
| 1459 | - moduleoffset_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1460 | - check_value('MODULEOFFSET_Size', 0x0004, moduleoffset_size) | ||
| 1461 | - moduleoffset_textoffset = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1462 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1463 | - if section_id == 0x001E: | ||
| 1464 | - modulehelpcontext_id = section_id | ||
| 1465 | - check_value('MODULEHELPCONTEXT_Id', 0x001E, modulehelpcontext_id) | ||
| 1466 | - modulehelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1467 | - check_value('MODULEHELPCONTEXT_Size', 0x0004, modulehelpcontext_size) | ||
| 1468 | - modulehelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1469 | - unused = modulehelpcontext_helpcontext | ||
| 1470 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1471 | - if section_id == 0x002C: | ||
| 1472 | - modulecookie_id = section_id | ||
| 1473 | - check_value('MODULECOOKIE_Id', 0x002C, modulecookie_id) | ||
| 1474 | - modulecookie_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1475 | - check_value('MODULECOOKIE_Size', 0x0002, modulecookie_size) | ||
| 1476 | - modulecookie_cookie = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1477 | - unused = modulecookie_cookie | ||
| 1478 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1479 | - if section_id == 0x0021 or section_id == 0x0022: | ||
| 1480 | - moduletype_id = section_id | ||
| 1481 | - moduletype_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1482 | - unused = moduletype_id | ||
| 1483 | - unused = moduletype_reserved | ||
| 1484 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1485 | - if section_id == 0x0025: | ||
| 1486 | - modulereadonly_id = section_id | ||
| 1487 | - check_value('MODULEREADONLY_Id', 0x0025, modulereadonly_id) | ||
| 1488 | - modulereadonly_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1489 | - check_value('MODULEREADONLY_Reserved', 0x0000, modulereadonly_reserved) | ||
| 1490 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1491 | - if section_id == 0x0028: | ||
| 1492 | - moduleprivate_id = section_id | ||
| 1493 | - check_value('MODULEPRIVATE_Id', 0x0028, moduleprivate_id) | ||
| 1494 | - moduleprivate_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1495 | - check_value('MODULEPRIVATE_Reserved', 0x0000, moduleprivate_reserved) | 1420 | + for projectmodule_index in xrange(0, projectmodules_count): |
| 1421 | + try: | ||
| 1422 | + modulename_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1423 | + check_value('MODULENAME_Id', 0x0019, modulename_id) | ||
| 1424 | + modulename_sizeof_modulename = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1425 | + modulename_modulename = dir_stream.read(modulename_sizeof_modulename) | ||
| 1426 | + # account for optional sections | ||
| 1496 | section_id = struct.unpack("<H", dir_stream.read(2))[0] | 1427 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1497 | - if section_id == 0x002B: # TERMINATOR | ||
| 1498 | - module_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1499 | - check_value('MODULE_Reserved', 0x0000, module_reserved) | ||
| 1500 | - section_id = None | ||
| 1501 | - if section_id != None: | ||
| 1502 | - log.warning('unknown or invalid module section id {0:04X}'.format(section_id)) | ||
| 1503 | - | ||
| 1504 | - log.debug('Project CodePage = %d' % projectcodepage_codepage) | ||
| 1505 | - vba_codec = 'cp%d' % projectcodepage_codepage | ||
| 1506 | - log.debug("ModuleName = {0}".format(modulename_modulename)) | ||
| 1507 | - log.debug("StreamName = {0}".format(repr(modulestreamname_streamname))) | ||
| 1508 | - streamname_unicode = modulestreamname_streamname.decode(vba_codec) | ||
| 1509 | - log.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode))) | ||
| 1510 | - log.debug("StreamNameUnicode = {0}".format(repr(modulestreamname_streamname_unicode))) | ||
| 1511 | - log.debug("TextOffset = {0}".format(moduleoffset_textoffset)) | ||
| 1512 | - | ||
| 1513 | - code_path = vba_root + u'VBA/' + streamname_unicode | ||
| 1514 | - #TODO: test if stream exists | ||
| 1515 | - log.debug('opening VBA code stream %s' % repr(code_path)) | ||
| 1516 | - code_data = ole.openstream(code_path).read() | ||
| 1517 | - log.debug("length of code_data = {0}".format(len(code_data))) | ||
| 1518 | - log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) | ||
| 1519 | - code_data = code_data[moduleoffset_textoffset:] | ||
| 1520 | - if len(code_data) > 0: | ||
| 1521 | - code_data = decompress_stream(code_data) | ||
| 1522 | - # case-insensitive search in the code_modules dict to find the file extension: | ||
| 1523 | - filext = code_modules.get(modulename_modulename.lower(), 'bin') | ||
| 1524 | - filename = '{0}.{1}'.format(modulename_modulename, filext) | ||
| 1525 | - #TODO: also yield the codepage so that callers can decode it properly | ||
| 1526 | - yield (code_path, filename, code_data) | ||
| 1527 | - # print '-'*79 | ||
| 1528 | - # print filename | ||
| 1529 | - # print '' | ||
| 1530 | - # print code_data | ||
| 1531 | - # print '' | ||
| 1532 | - log.debug('extracted file {0}'.format(filename)) | ||
| 1533 | - else: | ||
| 1534 | - log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname)) | ||
| 1535 | - _ = unused | 1428 | + if section_id == 0x0047: |
| 1429 | + modulename_unicode_id = section_id | ||
| 1430 | + modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1431 | + modulename_unicode_modulename_unicode = dir_stream.read(modulename_unicode_sizeof_modulename_unicode) | ||
| 1432 | + unused = modulename_unicode_id | ||
| 1433 | + unused = modulename_unicode_modulename_unicode | ||
| 1434 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1435 | + if section_id == 0x001A: | ||
| 1436 | + modulestreamname_id = section_id | ||
| 1437 | + modulestreamname_sizeof_streamname = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1438 | + modulestreamname_streamname = dir_stream.read(modulestreamname_sizeof_streamname) | ||
| 1439 | + modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1440 | + check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved) | ||
| 1441 | + modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1442 | + modulestreamname_streamname_unicode = dir_stream.read(modulestreamname_sizeof_streamname_unicode) | ||
| 1443 | + unused = modulestreamname_id | ||
| 1444 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1445 | + if section_id == 0x001C: | ||
| 1446 | + moduledocstring_id = section_id | ||
| 1447 | + check_value('MODULEDOCSTRING_Id', 0x001C, moduledocstring_id) | ||
| 1448 | + moduledocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1449 | + moduledocstring_docstring = dir_stream.read(moduledocstring_sizeof_docstring) | ||
| 1450 | + moduledocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1451 | + check_value('MODULEDOCSTRING_Reserved', 0x0048, moduledocstring_reserved) | ||
| 1452 | + moduledocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1453 | + moduledocstring_docstring_unicode = dir_stream.read(moduledocstring_sizeof_docstring_unicode) | ||
| 1454 | + unused = moduledocstring_docstring | ||
| 1455 | + unused = moduledocstring_docstring_unicode | ||
| 1456 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1457 | + if section_id == 0x0031: | ||
| 1458 | + moduleoffset_id = section_id | ||
| 1459 | + check_value('MODULEOFFSET_Id', 0x0031, moduleoffset_id) | ||
| 1460 | + moduleoffset_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1461 | + check_value('MODULEOFFSET_Size', 0x0004, moduleoffset_size) | ||
| 1462 | + moduleoffset_textoffset = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1463 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1464 | + if section_id == 0x001E: | ||
| 1465 | + modulehelpcontext_id = section_id | ||
| 1466 | + check_value('MODULEHELPCONTEXT_Id', 0x001E, modulehelpcontext_id) | ||
| 1467 | + modulehelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1468 | + check_value('MODULEHELPCONTEXT_Size', 0x0004, modulehelpcontext_size) | ||
| 1469 | + modulehelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1470 | + unused = modulehelpcontext_helpcontext | ||
| 1471 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1472 | + if section_id == 0x002C: | ||
| 1473 | + modulecookie_id = section_id | ||
| 1474 | + check_value('MODULECOOKIE_Id', 0x002C, modulecookie_id) | ||
| 1475 | + modulecookie_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1476 | + check_value('MODULECOOKIE_Size', 0x0002, modulecookie_size) | ||
| 1477 | + modulecookie_cookie = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1478 | + unused = modulecookie_cookie | ||
| 1479 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1480 | + if section_id == 0x0021 or section_id == 0x0022: | ||
| 1481 | + moduletype_id = section_id | ||
| 1482 | + moduletype_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1483 | + unused = moduletype_id | ||
| 1484 | + unused = moduletype_reserved | ||
| 1485 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1486 | + if section_id == 0x0025: | ||
| 1487 | + modulereadonly_id = section_id | ||
| 1488 | + check_value('MODULEREADONLY_Id', 0x0025, modulereadonly_id) | ||
| 1489 | + modulereadonly_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1490 | + check_value('MODULEREADONLY_Reserved', 0x0000, modulereadonly_reserved) | ||
| 1491 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1492 | + if section_id == 0x0028: | ||
| 1493 | + moduleprivate_id = section_id | ||
| 1494 | + check_value('MODULEPRIVATE_Id', 0x0028, moduleprivate_id) | ||
| 1495 | + moduleprivate_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1496 | + check_value('MODULEPRIVATE_Reserved', 0x0000, moduleprivate_reserved) | ||
| 1497 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1498 | + if section_id == 0x002B: # TERMINATOR | ||
| 1499 | + module_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1500 | + check_value('MODULE_Reserved', 0x0000, module_reserved) | ||
| 1501 | + section_id = None | ||
| 1502 | + if section_id != None: | ||
| 1503 | + log.warning('unknown or invalid module section id {0:04X}'.format(section_id)) | ||
| 1504 | + | ||
| 1505 | + log.debug('Project CodePage = %d' % projectcodepage_codepage) | ||
| 1506 | + vba_codec = 'cp%d' % projectcodepage_codepage | ||
| 1507 | + log.debug("ModuleName = {0}".format(modulename_modulename)) | ||
| 1508 | + log.debug("StreamName = {0}".format(repr(modulestreamname_streamname))) | ||
| 1509 | + streamname_unicode = modulestreamname_streamname.decode(vba_codec) | ||
| 1510 | + log.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode))) | ||
| 1511 | + log.debug("StreamNameUnicode = {0}".format(repr(modulestreamname_streamname_unicode))) | ||
| 1512 | + log.debug("TextOffset = {0}".format(moduleoffset_textoffset)) | ||
| 1513 | + | ||
| 1514 | + code_path = vba_root + u'VBA/' + streamname_unicode | ||
| 1515 | + #TODO: test if stream exists | ||
| 1516 | + log.debug('opening VBA code stream %s' % repr(code_path)) | ||
| 1517 | + code_data = ole.openstream(code_path).read() | ||
| 1518 | + log.debug("length of code_data = {0}".format(len(code_data))) | ||
| 1519 | + log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) | ||
| 1520 | + code_data = code_data[moduleoffset_textoffset:] | ||
| 1521 | + if len(code_data) > 0: | ||
| 1522 | + code_data = decompress_stream(code_data) | ||
| 1523 | + # case-insensitive search in the code_modules dict to find the file extension: | ||
| 1524 | + filext = code_modules.get(modulename_modulename.lower(), 'bin') | ||
| 1525 | + filename = '{0}.{1}'.format(modulename_modulename, filext) | ||
| 1526 | + #TODO: also yield the codepage so that callers can decode it properly | ||
| 1527 | + yield (code_path, filename, code_data) | ||
| 1528 | + # print '-'*79 | ||
| 1529 | + # print filename | ||
| 1530 | + # print '' | ||
| 1531 | + # print code_data | ||
| 1532 | + # print '' | ||
| 1533 | + log.debug('extracted file {0}'.format(filename)) | ||
| 1534 | + else: | ||
| 1535 | + log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname)) | ||
| 1536 | + except Exception as exc: | ||
| 1537 | + log.info('Error parsing module {} of {} in _extract_vba:' | ||
| 1538 | + .format(projectmodule_index, projectmodules_count), | ||
| 1539 | + exc_info=True) | ||
| 1540 | + _ = unused # make pylint happy: now variable "unused" is being used ;-) | ||
| 1536 | return | 1541 | return |
| 1537 | 1542 | ||
| 1538 | 1543 | ||
| @@ -1770,13 +1775,13 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): | @@ -1770,13 +1775,13 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): | ||
| 1770 | # de-code and re-encode | 1775 | # de-code and re-encode |
| 1771 | dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) | 1776 | dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) |
| 1772 | if dencoded != json_obj: | 1777 | if dencoded != json_obj: |
| 1773 | - log.info('json2ascii: replaced: {0} (len {1})' | 1778 | + log.debug('json2ascii: replaced: {0} (len {1})' |
| 1774 | .format(json_obj, len(json_obj))) | 1779 | .format(json_obj, len(json_obj))) |
| 1775 | - log.info('json2ascii: with: {0} (len {1})' | 1780 | + log.debug('json2ascii: with: {0} (len {1})' |
| 1776 | .format(dencoded, len(dencoded))) | 1781 | .format(dencoded, len(dencoded))) |
| 1777 | return dencoded | 1782 | return dencoded |
| 1778 | elif isinstance(json_obj, unicode): | 1783 | elif isinstance(json_obj, unicode): |
| 1779 | - log.info('json2ascii: replaced: {0}' | 1784 | + log.debug('json2ascii: encode unicode: {0}' |
| 1780 | .format(json_obj.encode(encoding, errors))) | 1785 | .format(json_obj.encode(encoding, errors))) |
| 1781 | # cannot put original into logger | 1786 | # cannot put original into logger |
| 1782 | # print 'original: ' json_obj | 1787 | # print 'original: ' json_obj |