Commit 7374be1e6f51334f235d58c21e072069750c054b
1 parent
55483626
olevba: added class VBA_Project
Showing
1 changed file
with
531 additions
and
454 deletions
oletools/olevba.py
| @@ -262,6 +262,7 @@ import zlib | @@ -262,6 +262,7 @@ import zlib | ||
| 262 | import email # for MHTML parsing | 262 | import email # for MHTML parsing |
| 263 | import string # for printable | 263 | import string # for printable |
| 264 | import json # for json output mode (argument --json) | 264 | import json # for json output mode (argument --json) |
| 265 | +import codecs | ||
| 265 | 266 | ||
| 266 | # import lxml or ElementTree for XML parsing: | 267 | # import lxml or ElementTree for XML parsing: |
| 267 | try: | 268 | try: |
| @@ -1337,6 +1338,525 @@ def decompress_stream(compressed_container): | @@ -1337,6 +1338,525 @@ def decompress_stream(compressed_container): | ||
| 1337 | return bytes(decompressed_container) | 1338 | return bytes(decompressed_container) |
| 1338 | 1339 | ||
| 1339 | 1340 | ||
| 1341 | +class VBA_Project(object): | ||
| 1342 | + """ | ||
| 1343 | + Class to parse a VBA project from an OLE file, and to store all the corresponding | ||
| 1344 | + metadata and VBA modules. | ||
| 1345 | + """ | ||
| 1346 | + | ||
| 1347 | + def __init__(self, ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1348 | + """ | ||
| 1349 | + Extract VBA macros from an OleFileIO object. | ||
| 1350 | + | ||
| 1351 | + :param vba_root: path to the VBA root storage, containing the VBA storage and the PROJECT stream | ||
| 1352 | + :param vba_project: path to the PROJECT stream | ||
| 1353 | + :param relaxed: If True, only create info/debug log entry if data is not as expected | ||
| 1354 | + (e.g. opening substream fails); if False, raise an error in this case | ||
| 1355 | + """ | ||
| 1356 | + self.ole = ole | ||
| 1357 | + self.vba_root = vba_root | ||
| 1358 | + self. project_path = project_path | ||
| 1359 | + self.dir_path = dir_path | ||
| 1360 | + self.relaxed = relaxed | ||
| 1361 | + log.debug('Parsing the dir stream from %r' % dir_path) | ||
| 1362 | + # read data from dir stream (compressed) | ||
| 1363 | + dir_compressed = ole.openstream(dir_path).read() | ||
| 1364 | + # decompress it: | ||
| 1365 | + dir_stream = BytesIO(decompress_stream(bytearray(dir_compressed))) | ||
| 1366 | + # store reference for later use: | ||
| 1367 | + self.dir_stream = dir_stream | ||
| 1368 | + | ||
| 1369 | + # reference: MS-VBAL 2.3.4.2 dir Stream: Version Independent Project Information | ||
| 1370 | + | ||
| 1371 | + # PROJECTSYSKIND Record | ||
| 1372 | + # Specifies the platform for which the VBA project is created. | ||
| 1373 | + projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1374 | + self.check_value('PROJECTSYSKIND_Id', 0x0001, projectsyskind_id) | ||
| 1375 | + projectsyskind_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1376 | + self.check_value('PROJECTSYSKIND_Size', 0x0004, projectsyskind_size) | ||
| 1377 | + self.syskind = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1378 | + SYSKIND_NAME = { | ||
| 1379 | + 0x00: "16-bit Windows", | ||
| 1380 | + 0x01: "32-bit Windows", | ||
| 1381 | + 0x02: "Macintosh", | ||
| 1382 | + 0x03: "64-bit Windows" | ||
| 1383 | + } | ||
| 1384 | + self.syskind_name = SYSKIND_NAME.get(self.syskind, 'Unknown') | ||
| 1385 | + log.debug("PROJECTSYSKIND_SysKind: %d - %s" % (self.syskind, self.syskind_name)) | ||
| 1386 | + if self.syskind not in SYSKIND_NAME: | ||
| 1387 | + log.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(self.syskind)) | ||
| 1388 | + | ||
| 1389 | + # PROJECTLCID Record | ||
| 1390 | + # Specifies the VBA project's LCID. | ||
| 1391 | + projectlcid_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1392 | + self.check_value('PROJECTLCID_Id', 0x0002, projectlcid_id) | ||
| 1393 | + projectlcid_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1394 | + self.check_value('PROJECTLCID_Size', 0x0004, projectlcid_size) | ||
| 1395 | + # Lcid (4 bytes): An unsigned integer that specifies the LCID value for the VBA project. MUST be 0x00000409. | ||
| 1396 | + self.lcid = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1397 | + self.check_value('PROJECTLCID_Lcid', 0x409, self.lcid) | ||
| 1398 | + | ||
| 1399 | + # PROJECTLCIDINVOKE Record | ||
| 1400 | + # Specifies an LCID value used for Invoke calls on an Automation server as specified in [MS-OAUT] section 3.1.4.4. | ||
| 1401 | + projectlcidinvoke_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1402 | + self.check_value('PROJECTLCIDINVOKE_Id', 0x0014, projectlcidinvoke_id) | ||
| 1403 | + projectlcidinvoke_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1404 | + self.check_value('PROJECTLCIDINVOKE_Size', 0x0004, projectlcidinvoke_size) | ||
| 1405 | + # LcidInvoke (4 bytes): An unsigned integer that specifies the LCID value used for Invoke calls. MUST be 0x00000409. | ||
| 1406 | + self.lcidinvoke = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1407 | + self.check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, self.lcidinvoke) | ||
| 1408 | + | ||
| 1409 | + # PROJECTCODEPAGE Record | ||
| 1410 | + # Specifies the VBA project's code page. | ||
| 1411 | + projectcodepage_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1412 | + self.check_value('PROJECTCODEPAGE_Id', 0x0003, projectcodepage_id) | ||
| 1413 | + projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1414 | + self.check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size) | ||
| 1415 | + self.codepage = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1416 | + log.debug('Project Code Page: %r' % self.codepage) | ||
| 1417 | + if self.codepage in MAC_CODEPAGES: | ||
| 1418 | + self.codec = MAC_CODEPAGES[self.codepage] | ||
| 1419 | + else: | ||
| 1420 | + self.codec = 'cp%d' % self.codepage | ||
| 1421 | + # TODO: check if valid code page or raise a clear exception, and use UTF-8 as default? | ||
| 1422 | + try: | ||
| 1423 | + codecs.lookup(self.codec) | ||
| 1424 | + except LookupError: | ||
| 1425 | + log.error('Codec not found for code page %d, using UTF-8 as fallback.' % self.codepage) | ||
| 1426 | + self.codec = 'utf8' | ||
| 1427 | + log.debug('Python codec corresponding to code page %d: %s' % (self.codepage, self.codec)) | ||
| 1428 | + | ||
| 1429 | + | ||
| 1430 | + # PROJECTNAME Record | ||
| 1431 | + # Specifies a unique VBA identifier as the name of the VBA project. | ||
| 1432 | + projectname_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1433 | + self.check_value('PROJECTNAME_Id', 0x0004, projectname_id) | ||
| 1434 | + sizeof_projectname = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1435 | + log.debug('Project name size: %d bytes' % sizeof_projectname) | ||
| 1436 | + if sizeof_projectname < 1 or sizeof_projectname > 128: | ||
| 1437 | + # TODO: raise an actual error? What is MS Office's behaviour? | ||
| 1438 | + log.error("PROJECTNAME_SizeOfProjectName value not in range [1-128]: {0}".format(sizeof_projectname)) | ||
| 1439 | + projectname_bytes = dir_stream.read(sizeof_projectname) | ||
| 1440 | + self.projectname = self.decode_bytes(projectname_bytes) | ||
| 1441 | + | ||
| 1442 | + | ||
| 1443 | + # PROJECTDOCSTRING Record | ||
| 1444 | + # Specifies the description for the VBA project. | ||
| 1445 | + projectdocstring_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1446 | + self.check_value('PROJECTDOCSTRING_Id', 0x0005, projectdocstring_id) | ||
| 1447 | + projectdocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1448 | + if projectdocstring_sizeof_docstring > 2000: | ||
| 1449 | + log.error( | ||
| 1450 | + "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring)) | ||
| 1451 | + # DocString (variable): An array of SizeOfDocString bytes that specifies the description for the VBA project. | ||
| 1452 | + # MUST contain MBCS characters encoded using the code page specified in PROJECTCODEPAGE (section 2.3.4.2.1.4). | ||
| 1453 | + # MUST NOT contain null characters. | ||
| 1454 | + docstring_bytes = dir_stream.read(projectdocstring_sizeof_docstring) | ||
| 1455 | + self.docstring = self.decode_bytes(docstring_bytes) | ||
| 1456 | + projectdocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1457 | + self.check_value('PROJECTDOCSTRING_Reserved', 0x0040, projectdocstring_reserved) | ||
| 1458 | + projectdocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1459 | + if projectdocstring_sizeof_docstring_unicode % 2 != 0: | ||
| 1460 | + log.error("PROJECTDOCSTRING_SizeOfDocStringUnicode is not even") | ||
| 1461 | + # DocStringUnicode (variable): An array of SizeOfDocStringUnicode bytes that specifies the description for the | ||
| 1462 | + # VBA project. MUST contain UTF-16 characters. MUST NOT contain null characters. | ||
| 1463 | + # MUST contain the UTF-16 encoding of DocString. | ||
| 1464 | + docstring_unicode_bytes = dir_stream.read(projectdocstring_sizeof_docstring_unicode) | ||
| 1465 | + self.docstring_unicode = docstring_unicode_bytes.decode('utf16', errors='replace') | ||
| 1466 | + | ||
| 1467 | + # PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7 | ||
| 1468 | + projecthelpfilepath_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1469 | + self.check_value('PROJECTHELPFILEPATH_Id', 0x0006, projecthelpfilepath_id) | ||
| 1470 | + projecthelpfilepath_sizeof_helpfile1 = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1471 | + if projecthelpfilepath_sizeof_helpfile1 > 260: | ||
| 1472 | + log.error( | ||
| 1473 | + "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1)) | ||
| 1474 | + projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1) | ||
| 1475 | + projecthelpfilepath_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1476 | + self.check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, projecthelpfilepath_reserved) | ||
| 1477 | + projecthelpfilepath_sizeof_helpfile2 = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1478 | + if projecthelpfilepath_sizeof_helpfile2 != projecthelpfilepath_sizeof_helpfile1: | ||
| 1479 | + log.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 does not equal PROJECTHELPFILEPATH_SizeOfHelpFile2") | ||
| 1480 | + projecthelpfilepath_helpfile2 = dir_stream.read(projecthelpfilepath_sizeof_helpfile2) | ||
| 1481 | + if projecthelpfilepath_helpfile2 != projecthelpfilepath_helpfile1: | ||
| 1482 | + log.error("PROJECTHELPFILEPATH_HelpFile1 does not equal PROJECTHELPFILEPATH_HelpFile2") | ||
| 1483 | + | ||
| 1484 | + # PROJECTHELPCONTEXT Record | ||
| 1485 | + projecthelpcontext_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1486 | + self.check_value('PROJECTHELPCONTEXT_Id', 0x0007, projecthelpcontext_id) | ||
| 1487 | + projecthelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1488 | + self.check_value('PROJECTHELPCONTEXT_Size', 0x0004, projecthelpcontext_size) | ||
| 1489 | + projecthelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1490 | + unused = projecthelpcontext_helpcontext | ||
| 1491 | + | ||
| 1492 | + # PROJECTLIBFLAGS Record | ||
| 1493 | + projectlibflags_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1494 | + self.check_value('PROJECTLIBFLAGS_Id', 0x0008, projectlibflags_id) | ||
| 1495 | + projectlibflags_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1496 | + self.check_value('PROJECTLIBFLAGS_Size', 0x0004, projectlibflags_size) | ||
| 1497 | + projectlibflags_projectlibflags = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1498 | + self.check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, projectlibflags_projectlibflags) | ||
| 1499 | + | ||
| 1500 | + # PROJECTVERSION Record | ||
| 1501 | + projectversion_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1502 | + self.check_value('PROJECTVERSION_Id', 0x0009, projectversion_id) | ||
| 1503 | + projectversion_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1504 | + self.check_value('PROJECTVERSION_Reserved', 0x0004, projectversion_reserved) | ||
| 1505 | + projectversion_versionmajor = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1506 | + projectversion_versionminor = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1507 | + unused = projectversion_versionmajor | ||
| 1508 | + unused = projectversion_versionminor | ||
| 1509 | + | ||
| 1510 | + # PROJECTCONSTANTS Record | ||
| 1511 | + projectconstants_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1512 | + self.check_value('PROJECTCONSTANTS_Id', 0x000C, projectconstants_id) | ||
| 1513 | + projectconstants_sizeof_constants = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1514 | + if projectconstants_sizeof_constants > 1015: | ||
| 1515 | + log.error( | ||
| 1516 | + "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants)) | ||
| 1517 | + projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants) | ||
| 1518 | + projectconstants_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1519 | + self.check_value('PROJECTCONSTANTS_Reserved', 0x003C, projectconstants_reserved) | ||
| 1520 | + projectconstants_sizeof_constants_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1521 | + if projectconstants_sizeof_constants_unicode % 2 != 0: | ||
| 1522 | + log.error("PROJECTCONSTANTS_SizeOfConstantsUnicode is not even") | ||
| 1523 | + projectconstants_constants_unicode = dir_stream.read(projectconstants_sizeof_constants_unicode) | ||
| 1524 | + unused = projectconstants_constants | ||
| 1525 | + unused = projectconstants_constants_unicode | ||
| 1526 | + | ||
| 1527 | + # array of REFERENCE records | ||
| 1528 | + check = None | ||
| 1529 | + while True: | ||
| 1530 | + check = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1531 | + log.debug("reference type = {0:04X}".format(check)) | ||
| 1532 | + if check == 0x000F: | ||
| 1533 | + break | ||
| 1534 | + | ||
| 1535 | + if check == 0x0016: | ||
| 1536 | + # REFERENCENAME | ||
| 1537 | + reference_id = check | ||
| 1538 | + reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1539 | + reference_name = dir_stream.read(reference_sizeof_name) | ||
| 1540 | + reference_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1541 | + # According to [MS-OVBA] 2.3.4.2.2.2 REFERENCENAME Record: | ||
| 1542 | + # "Reserved (2 bytes): MUST be 0x003E. MUST be ignored." | ||
| 1543 | + # So let's ignore it, otherwise it crashes on some files (issue #132) | ||
| 1544 | + # PR #135 by @c1fe: | ||
| 1545 | + # contrary to the specification I think that the unicode name | ||
| 1546 | + # is optional. if reference_reserved is not 0x003E I think it | ||
| 1547 | + # is actually the start of another REFERENCE record | ||
| 1548 | + # at least when projectsyskind_syskind == 0x02 (Macintosh) | ||
| 1549 | + if reference_reserved == 0x003E: | ||
| 1550 | + #if reference_reserved not in (0x003E, 0x000D): | ||
| 1551 | + # raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved', | ||
| 1552 | + # 0x0003E, reference_reserved) | ||
| 1553 | + reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1554 | + reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode) | ||
| 1555 | + unused = reference_id | ||
| 1556 | + unused = reference_name | ||
| 1557 | + unused = reference_name_unicode | ||
| 1558 | + continue | ||
| 1559 | + else: | ||
| 1560 | + check = reference_reserved | ||
| 1561 | + log.debug("reference type = {0:04X}".format(check)) | ||
| 1562 | + | ||
| 1563 | + if check == 0x0033: | ||
| 1564 | + # REFERENCEORIGINAL (followed by REFERENCECONTROL) | ||
| 1565 | + referenceoriginal_id = check | ||
| 1566 | + referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1567 | + referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal) | ||
| 1568 | + unused = referenceoriginal_id | ||
| 1569 | + unused = referenceoriginal_libidoriginal | ||
| 1570 | + continue | ||
| 1571 | + | ||
| 1572 | + if check == 0x002F: | ||
| 1573 | + # REFERENCECONTROL | ||
| 1574 | + referencecontrol_id = check | ||
| 1575 | + referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore | ||
| 1576 | + referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1577 | + referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled) | ||
| 1578 | + referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore | ||
| 1579 | + self.check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1) | ||
| 1580 | + referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore | ||
| 1581 | + self.check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2) | ||
| 1582 | + unused = referencecontrol_id | ||
| 1583 | + unused = referencecontrol_sizetwiddled | ||
| 1584 | + unused = referencecontrol_libidtwiddled | ||
| 1585 | + # optional field | ||
| 1586 | + check2 = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1587 | + if check2 == 0x0016: | ||
| 1588 | + referencecontrol_namerecordextended_id = check | ||
| 1589 | + referencecontrol_namerecordextended_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1590 | + referencecontrol_namerecordextended_name = dir_stream.read( | ||
| 1591 | + referencecontrol_namerecordextended_sizeof_name) | ||
| 1592 | + referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1593 | + if referencecontrol_namerecordextended_reserved == 0x003E: | ||
| 1594 | + referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1595 | + referencecontrol_namerecordextended_name_unicode = dir_stream.read( | ||
| 1596 | + referencecontrol_namerecordextended_sizeof_name_unicode) | ||
| 1597 | + referencecontrol_reserved3 = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1598 | + unused = referencecontrol_namerecordextended_id | ||
| 1599 | + unused = referencecontrol_namerecordextended_name | ||
| 1600 | + unused = referencecontrol_namerecordextended_name_unicode | ||
| 1601 | + else: | ||
| 1602 | + referencecontrol_reserved3 = referencecontrol_namerecordextended_reserved | ||
| 1603 | + else: | ||
| 1604 | + referencecontrol_reserved3 = check2 | ||
| 1605 | + | ||
| 1606 | + self.check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3) | ||
| 1607 | + referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1608 | + referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1609 | + referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended) | ||
| 1610 | + referencecontrol_reserved4 = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1611 | + referencecontrol_reserved5 = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1612 | + referencecontrol_originaltypelib = dir_stream.read(16) | ||
| 1613 | + referencecontrol_cookie = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1614 | + unused = referencecontrol_sizeextended | ||
| 1615 | + unused = referencecontrol_libidextended | ||
| 1616 | + unused = referencecontrol_reserved4 | ||
| 1617 | + unused = referencecontrol_reserved5 | ||
| 1618 | + unused = referencecontrol_originaltypelib | ||
| 1619 | + unused = referencecontrol_cookie | ||
| 1620 | + continue | ||
| 1621 | + | ||
| 1622 | + if check == 0x000D: | ||
| 1623 | + # REFERENCEREGISTERED | ||
| 1624 | + referenceregistered_id = check | ||
| 1625 | + referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1626 | + referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1627 | + referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid) | ||
| 1628 | + referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1629 | + self.check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1) | ||
| 1630 | + referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1631 | + self.check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2) | ||
| 1632 | + unused = referenceregistered_id | ||
| 1633 | + unused = referenceregistered_size | ||
| 1634 | + unused = referenceregistered_libid | ||
| 1635 | + continue | ||
| 1636 | + | ||
| 1637 | + if check == 0x000E: | ||
| 1638 | + # REFERENCEPROJECT | ||
| 1639 | + referenceproject_id = check | ||
| 1640 | + referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1641 | + referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1642 | + referenceproject_libidabsolute = dir_stream.read(referenceproject_sizeof_libidabsolute) | ||
| 1643 | + referenceproject_sizeof_libidrelative = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1644 | + referenceproject_libidrelative = dir_stream.read(referenceproject_sizeof_libidrelative) | ||
| 1645 | + referenceproject_majorversion = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1646 | + referenceproject_minorversion = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1647 | + unused = referenceproject_id | ||
| 1648 | + unused = referenceproject_size | ||
| 1649 | + unused = referenceproject_libidabsolute | ||
| 1650 | + unused = referenceproject_libidrelative | ||
| 1651 | + unused = referenceproject_majorversion | ||
| 1652 | + unused = referenceproject_minorversion | ||
| 1653 | + continue | ||
| 1654 | + | ||
| 1655 | + log.error('invalid or unknown check Id {0:04X}'.format(check)) | ||
| 1656 | + # raise an exception instead of stopping abruptly (issue #180) | ||
| 1657 | + raise UnexpectedDataError(dir_path, 'reference type', (0x0F, 0x16, 0x33, 0x2F, 0x0D, 0x0E), check) | ||
| 1658 | + #sys.exit(0) | ||
| 1659 | + | ||
| 1660 | + def check_value(self, name, expected, value): | ||
| 1661 | + if expected != value: | ||
| 1662 | + if self.relaxed: | ||
| 1663 | + log.error("invalid value for {0} expected {1:04X} got {2:04X}" | ||
| 1664 | + .format(name, expected, value)) | ||
| 1665 | + else: | ||
| 1666 | + raise UnexpectedDataError(self.dir_path, name, expected, value) | ||
| 1667 | + | ||
| 1668 | + | ||
| 1669 | + def parse_modules(self): | ||
| 1670 | + dir_stream = self.dir_stream | ||
| 1671 | + # projectmodules_id has already been read by the previous loop = 0x000F | ||
| 1672 | + # projectmodules_id = check #struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1673 | + # self.check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id) | ||
| 1674 | + projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1675 | + self.check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size) | ||
| 1676 | + projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1677 | + projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1678 | + self.check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id) | ||
| 1679 | + projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1680 | + self.check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, projectmodules_projectcookierecord_size) | ||
| 1681 | + projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1682 | + unused = projectmodules_projectcookierecord_cookie | ||
| 1683 | + | ||
| 1684 | + # short function to simplify unicode text output | ||
| 1685 | + uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') | ||
| 1686 | + | ||
| 1687 | + log.debug("parsing {0} modules".format(projectmodules_count)) | ||
| 1688 | + for projectmodule_index in xrange(0, projectmodules_count): | ||
| 1689 | + try: | ||
| 1690 | + modulename_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1691 | + self.check_value('MODULENAME_Id', 0x0019, modulename_id) | ||
| 1692 | + modulename_sizeof_modulename = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1693 | + modulename_modulename = dir_stream.read(modulename_sizeof_modulename) | ||
| 1694 | + # TODO: preset variables to avoid "referenced before assignment" errors | ||
| 1695 | + modulename_unicode_modulename_unicode = '' | ||
| 1696 | + # account for optional sections | ||
| 1697 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1698 | + if section_id == 0x0047: | ||
| 1699 | + modulename_unicode_id = section_id | ||
| 1700 | + modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1701 | + modulename_unicode_modulename_unicode = dir_stream.read( | ||
| 1702 | + modulename_unicode_sizeof_modulename_unicode).decode('UTF-16LE', 'replace') | ||
| 1703 | + # just guessing that this is the same encoding as used in OleFileIO | ||
| 1704 | + unused = modulename_unicode_id | ||
| 1705 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1706 | + if section_id == 0x001A: | ||
| 1707 | + modulestreamname_id = section_id | ||
| 1708 | + modulestreamname_sizeof_streamname = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1709 | + modulestreamname_streamname = dir_stream.read(modulestreamname_sizeof_streamname) | ||
| 1710 | + modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1711 | + self.check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved) | ||
| 1712 | + modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1713 | + modulestreamname_streamname_unicode = dir_stream.read( | ||
| 1714 | + modulestreamname_sizeof_streamname_unicode).decode('UTF-16LE', 'replace') | ||
| 1715 | + # just guessing that this is the same encoding as used in OleFileIO | ||
| 1716 | + unused = modulestreamname_id | ||
| 1717 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1718 | + if section_id == 0x001C: | ||
| 1719 | + moduledocstring_id = section_id | ||
| 1720 | + self.check_value('MODULEDOCSTRING_Id', 0x001C, moduledocstring_id) | ||
| 1721 | + moduledocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1722 | + moduledocstring_docstring = dir_stream.read(moduledocstring_sizeof_docstring) | ||
| 1723 | + moduledocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1724 | + self.check_value('MODULEDOCSTRING_Reserved', 0x0048, moduledocstring_reserved) | ||
| 1725 | + moduledocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1726 | + moduledocstring_docstring_unicode = dir_stream.read(moduledocstring_sizeof_docstring_unicode) | ||
| 1727 | + unused = moduledocstring_docstring | ||
| 1728 | + unused = moduledocstring_docstring_unicode | ||
| 1729 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1730 | + if section_id == 0x0031: | ||
| 1731 | + moduleoffset_id = section_id | ||
| 1732 | + self.check_value('MODULEOFFSET_Id', 0x0031, moduleoffset_id) | ||
| 1733 | + moduleoffset_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1734 | + self.check_value('MODULEOFFSET_Size', 0x0004, moduleoffset_size) | ||
| 1735 | + moduleoffset_textoffset = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1736 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1737 | + if section_id == 0x001E: | ||
| 1738 | + modulehelpcontext_id = section_id | ||
| 1739 | + self.check_value('MODULEHELPCONTEXT_Id', 0x001E, modulehelpcontext_id) | ||
| 1740 | + modulehelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1741 | + self.check_value('MODULEHELPCONTEXT_Size', 0x0004, modulehelpcontext_size) | ||
| 1742 | + modulehelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1743 | + unused = modulehelpcontext_helpcontext | ||
| 1744 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1745 | + if section_id == 0x002C: | ||
| 1746 | + modulecookie_id = section_id | ||
| 1747 | + self.check_value('MODULECOOKIE_Id', 0x002C, modulecookie_id) | ||
| 1748 | + modulecookie_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1749 | + self.check_value('MODULECOOKIE_Size', 0x0002, modulecookie_size) | ||
| 1750 | + modulecookie_cookie = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1751 | + unused = modulecookie_cookie | ||
| 1752 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1753 | + if section_id == 0x0021 or section_id == 0x0022: | ||
| 1754 | + moduletype_id = section_id | ||
| 1755 | + moduletype_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1756 | + unused = moduletype_id | ||
| 1757 | + unused = moduletype_reserved | ||
| 1758 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1759 | + if section_id == 0x0025: | ||
| 1760 | + modulereadonly_id = section_id | ||
| 1761 | + self.check_value('MODULEREADONLY_Id', 0x0025, modulereadonly_id) | ||
| 1762 | + modulereadonly_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1763 | + self.check_value('MODULEREADONLY_Reserved', 0x0000, modulereadonly_reserved) | ||
| 1764 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1765 | + if section_id == 0x0028: | ||
| 1766 | + moduleprivate_id = section_id | ||
| 1767 | + self.check_value('MODULEPRIVATE_Id', 0x0028, moduleprivate_id) | ||
| 1768 | + moduleprivate_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1769 | + self.check_value('MODULEPRIVATE_Reserved', 0x0000, moduleprivate_reserved) | ||
| 1770 | + section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1771 | + if section_id == 0x002B: # TERMINATOR | ||
| 1772 | + module_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1773 | + self.check_value('MODULE_Reserved', 0x0000, module_reserved) | ||
| 1774 | + section_id = None | ||
| 1775 | + if section_id != None: | ||
| 1776 | + log.warning('unknown or invalid module section id {0:04X}'.format(section_id)) | ||
| 1777 | + | ||
| 1778 | + # TODO: handle case when modulestreamname_streamname is not provided | ||
| 1779 | + log.debug("ModuleName = {0}".format(modulename_modulename)) | ||
| 1780 | + log.debug("ModuleNameUnicode = {0}".format(uni_out(modulename_unicode_modulename_unicode))) | ||
| 1781 | + log.debug("StreamName = {0}".format(modulestreamname_streamname)) | ||
| 1782 | + try: | ||
| 1783 | + streamname_unicode = self.decode_bytes(modulestreamname_streamname) | ||
| 1784 | + except UnicodeError as ue: | ||
| 1785 | + log.debug('failed to decode stream name {0!r} with codec {1}' | ||
| 1786 | + .format(uni_out(streamname_unicode), self.codec)) | ||
| 1787 | + streamname_unicode = modulestreamname_streamname.decode(self.codec, errors='replace') | ||
| 1788 | + log.debug("StreamName.decode('%s') = %s" % (self.codec, uni_out(streamname_unicode))) | ||
| 1789 | + log.debug("StreamNameUnicode = {0}".format(uni_out(modulestreamname_streamname_unicode))) | ||
| 1790 | + log.debug("TextOffset = {0}".format(moduleoffset_textoffset)) | ||
| 1791 | + | ||
| 1792 | + code_data = None | ||
| 1793 | + try_names = streamname_unicode, \ | ||
| 1794 | + modulename_unicode_modulename_unicode, \ | ||
| 1795 | + modulestreamname_streamname_unicode | ||
| 1796 | + for stream_name in try_names: | ||
| 1797 | + # TODO: if olefile._find were less private, could replace this | ||
| 1798 | + # try-except with calls to it | ||
| 1799 | + try: | ||
| 1800 | + code_path = self.vba_root + u'VBA/' + stream_name | ||
| 1801 | + log.debug('opening VBA code stream %s' % uni_out(code_path)) | ||
| 1802 | + code_data = self.ole.openstream(code_path).read() | ||
| 1803 | + break | ||
| 1804 | + except IOError as ioe: | ||
| 1805 | + log.debug('failed to open stream VBA/%r (%r), try other name' | ||
| 1806 | + % (uni_out(stream_name), ioe)) | ||
| 1807 | + | ||
| 1808 | + if code_data is None: | ||
| 1809 | + log.info("Could not open stream %d of %d ('VBA/' + one of %r)!" | ||
| 1810 | + % (projectmodule_index, projectmodules_count, | ||
| 1811 | + '/'.join("'" + uni_out(stream_name) + "'" | ||
| 1812 | + for stream_name in try_names))) | ||
| 1813 | + if self.relaxed: | ||
| 1814 | + continue # ... with next submodule | ||
| 1815 | + else: | ||
| 1816 | + raise SubstreamOpenError('[BASE]', 'VBA/' + | ||
| 1817 | + uni_out(modulename_unicode_modulename_unicode)) | ||
| 1818 | + | ||
| 1819 | + log.debug("length of code_data = {0}".format(len(code_data))) | ||
| 1820 | + log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) | ||
| 1821 | + code_data = code_data[moduleoffset_textoffset:] | ||
| 1822 | + if len(code_data) > 0: | ||
| 1823 | + code_data = decompress_stream(bytearray(code_data)) | ||
| 1824 | + # case-insensitive search in the code_modules dict to find the file extension: | ||
| 1825 | + # filext = code_modules.get(modulename_modulename.lower(), 'bin') | ||
| 1826 | + filext = 'vba' | ||
| 1827 | + filename = '{0}.{1}'.format(modulename_modulename, filext) | ||
| 1828 | + #TODO: also yield the codepage so that callers can decode it properly | ||
| 1829 | + yield (code_path, filename, code_data) | ||
| 1830 | + # print '-'*79 | ||
| 1831 | + # print filename | ||
| 1832 | + # print '' | ||
| 1833 | + # print code_data | ||
| 1834 | + # print '' | ||
| 1835 | + log.debug('extracted file {0}'.format(filename)) | ||
| 1836 | + else: | ||
| 1837 | + log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname)) | ||
| 1838 | + except (UnexpectedDataError, SubstreamOpenError): | ||
| 1839 | + raise | ||
| 1840 | + except Exception as exc: | ||
| 1841 | + log.info('Error parsing module {0} of {1} in _extract_vba:' | ||
| 1842 | + .format(projectmodule_index, projectmodules_count), | ||
| 1843 | + exc_info=True) | ||
| 1844 | + if not self.relaxed: | ||
| 1845 | + raise | ||
| 1846 | + _ = unused # make pylint happy: now variable "unused" is being used ;-) | ||
| 1847 | + return | ||
| 1848 | + | ||
| 1849 | + def decode_bytes(self, bytes_string, errors='replace'): | ||
| 1850 | + """ | ||
| 1851 | + Decode a bytes string to a unicode string, using the project code page | ||
| 1852 | + :param bytes_string: bytes, bytes string to be decoded | ||
| 1853 | + :param errors: str, mode to handle unicode conversion errors | ||
| 1854 | + :return: str/unicode, decoded string | ||
| 1855 | + """ | ||
| 1856 | + return bytes_string.decode(self.codec, errors=errors) | ||
| 1857 | + | ||
| 1858 | + | ||
| 1859 | + | ||
| 1340 | def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | 1860 | def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1341 | """ | 1861 | """ |
| 1342 | Extract VBA macros from an OleFileIO object. | 1862 | Extract VBA macros from an OleFileIO object. |
| @@ -1348,10 +1868,15 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1348,10 +1868,15 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1348 | (e.g. opening substream fails); if False, raise an error in this case | 1868 | (e.g. opening substream fails); if False, raise an error in this case |
| 1349 | This is a generator, yielding (stream path, VBA filename, VBA source code) for each VBA code stream | 1869 | This is a generator, yielding (stream path, VBA filename, VBA source code) for each VBA code stream |
| 1350 | """ | 1870 | """ |
| 1351 | - # Open the PROJECT stream: | ||
| 1352 | - project = ole.openstream(project_path) | ||
| 1353 | log.debug('relaxed is %s' % relaxed) | 1871 | log.debug('relaxed is %s' % relaxed) |
| 1354 | 1872 | ||
| 1873 | + project = VBA_Project(ole, vba_root, project_path, dir_path, relaxed=False) | ||
| 1874 | + | ||
| 1875 | + # Open the PROJECT stream: | ||
| 1876 | + # reference: [MS-OVBA] 2.3.1 PROJECT Stream | ||
| 1877 | + # TODO: in fact the PROJECT stream is encoded using the code page specified in the dir stream, should be read afterwards | ||
| 1878 | + project_stream = ole.openstream(project_path) | ||
| 1879 | + | ||
| 1355 | # sample content of the PROJECT stream: | 1880 | # sample content of the PROJECT stream: |
| 1356 | 1881 | ||
| 1357 | ## ID="{5312AC8A-349D-4950-BDD0-49BE3C4DD0F0}" | 1882 | ## ID="{5312AC8A-349D-4950-BDD0-49BE3C4DD0F0}" |
| @@ -1374,7 +1899,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1374,7 +1899,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1374 | 1899 | ||
| 1375 | code_modules = {} | 1900 | code_modules = {} |
| 1376 | 1901 | ||
| 1377 | - for line in project: | 1902 | + for line in project_stream: |
| 1903 | + line = project.decode_bytes(line) | ||
| 1378 | log.debug('PROJECT: %r' % line) | 1904 | log.debug('PROJECT: %r' % line) |
| 1379 | line = line.strip() | 1905 | line = line.strip() |
| 1380 | if '=' in line: | 1906 | if '=' in line: |
| @@ -1396,457 +1922,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1396,457 +1922,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1396 | elif name == 'BaseClass': | 1922 | elif name == 'BaseClass': |
| 1397 | code_modules[value] = FORM_EXTENSION | 1923 | code_modules[value] = FORM_EXTENSION |
| 1398 | 1924 | ||
| 1399 | - # read data from dir stream (compressed) | ||
| 1400 | - dir_compressed = ole.openstream(dir_path).read() | ||
| 1401 | - | ||
| 1402 | - def check_value(name, expected, value): | ||
| 1403 | - if expected != value: | ||
| 1404 | - if relaxed: | ||
| 1405 | - log.error("invalid value for {0} expected {1:04X} got {2:04X}" | ||
| 1406 | - .format(name, expected, value)) | ||
| 1407 | - else: | ||
| 1408 | - raise UnexpectedDataError(dir_path, name, expected, value) | ||
| 1409 | - | ||
| 1410 | - dir_stream = BytesIO(decompress_stream(bytearray(dir_compressed))) | ||
| 1411 | - | ||
| 1412 | - # PROJECTSYSKIND Record | ||
| 1413 | - projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1414 | - check_value('PROJECTSYSKIND_Id', 0x0001, projectsyskind_id) | ||
| 1415 | - projectsyskind_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1416 | - check_value('PROJECTSYSKIND_Size', 0x0004, projectsyskind_size) | ||
| 1417 | - projectsyskind_syskind = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1418 | - if projectsyskind_syskind == 0x00: | ||
| 1419 | - log.debug("16-bit Windows") | ||
| 1420 | - elif projectsyskind_syskind == 0x01: | ||
| 1421 | - log.debug("32-bit Windows") | ||
| 1422 | - elif projectsyskind_syskind == 0x02: | ||
| 1423 | - log.debug("Macintosh") | ||
| 1424 | - elif projectsyskind_syskind == 0x03: | ||
| 1425 | - log.debug("64-bit Windows") | ||
| 1426 | - else: | ||
| 1427 | - log.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(projectsyskind_syskind)) | ||
| 1428 | - | ||
| 1429 | - # PROJECTLCID Record | ||
| 1430 | - projectlcid_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1431 | - check_value('PROJECTLCID_Id', 0x0002, projectlcid_id) | ||
| 1432 | - projectlcid_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1433 | - check_value('PROJECTLCID_Size', 0x0004, projectlcid_size) | ||
| 1434 | - projectlcid_lcid = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1435 | - check_value('PROJECTLCID_Lcid', 0x409, projectlcid_lcid) | ||
| 1436 | - | ||
| 1437 | - # PROJECTLCIDINVOKE Record | ||
| 1438 | - projectlcidinvoke_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1439 | - check_value('PROJECTLCIDINVOKE_Id', 0x0014, projectlcidinvoke_id) | ||
| 1440 | - projectlcidinvoke_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1441 | - check_value('PROJECTLCIDINVOKE_Size', 0x0004, projectlcidinvoke_size) | ||
| 1442 | - projectlcidinvoke_lcidinvoke = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1443 | - check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, projectlcidinvoke_lcidinvoke) | ||
| 1444 | - | ||
| 1445 | - # PROJECTCODEPAGE Record | ||
| 1446 | - projectcodepage_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1447 | - check_value('PROJECTCODEPAGE_Id', 0x0003, projectcodepage_id) | ||
| 1448 | - projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1449 | - check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size) | ||
| 1450 | - projectcodepage_codepage = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1451 | - | ||
| 1452 | - # PROJECTNAME Record | ||
| 1453 | - projectname_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1454 | - check_value('PROJECTNAME_Id', 0x0004, projectname_id) | ||
| 1455 | - projectname_sizeof_projectname = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1456 | - if projectname_sizeof_projectname < 1 or projectname_sizeof_projectname > 128: | ||
| 1457 | - log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(projectname_sizeof_projectname)) | ||
| 1458 | - projectname_projectname = dir_stream.read(projectname_sizeof_projectname) | ||
| 1459 | - unused = projectname_projectname | ||
| 1460 | - | ||
| 1461 | - # PROJECTDOCSTRING Record | ||
| 1462 | - projectdocstring_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1463 | - check_value('PROJECTDOCSTRING_Id', 0x0005, projectdocstring_id) | ||
| 1464 | - projectdocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1465 | - if projectdocstring_sizeof_docstring > 2000: | ||
| 1466 | - log.error( | ||
| 1467 | - "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring)) | ||
| 1468 | - projectdocstring_docstring = dir_stream.read(projectdocstring_sizeof_docstring) | ||
| 1469 | - projectdocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1470 | - check_value('PROJECTDOCSTRING_Reserved', 0x0040, projectdocstring_reserved) | ||
| 1471 | - projectdocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1472 | - if projectdocstring_sizeof_docstring_unicode % 2 != 0: | ||
| 1473 | - log.error("PROJECTDOCSTRING_SizeOfDocStringUnicode is not even") | ||
| 1474 | - projectdocstring_docstring_unicode = dir_stream.read(projectdocstring_sizeof_docstring_unicode) | ||
| 1475 | - unused = projectdocstring_docstring | ||
| 1476 | - unused = projectdocstring_docstring_unicode | ||
| 1477 | - | ||
| 1478 | - # PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7 | ||
| 1479 | - projecthelpfilepath_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1480 | - check_value('PROJECTHELPFILEPATH_Id', 0x0006, projecthelpfilepath_id) | ||
| 1481 | - projecthelpfilepath_sizeof_helpfile1 = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1482 | - if projecthelpfilepath_sizeof_helpfile1 > 260: | ||
| 1483 | - log.error( | ||
| 1484 | - "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1)) | ||
| 1485 | - projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1) | ||
| 1486 | - projecthelpfilepath_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1487 | - check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, projecthelpfilepath_reserved) | ||
| 1488 | - projecthelpfilepath_sizeof_helpfile2 = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1489 | - if projecthelpfilepath_sizeof_helpfile2 != projecthelpfilepath_sizeof_helpfile1: | ||
| 1490 | - log.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 does not equal PROJECTHELPFILEPATH_SizeOfHelpFile2") | ||
| 1491 | - projecthelpfilepath_helpfile2 = dir_stream.read(projecthelpfilepath_sizeof_helpfile2) | ||
| 1492 | - if projecthelpfilepath_helpfile2 != projecthelpfilepath_helpfile1: | ||
| 1493 | - log.error("PROJECTHELPFILEPATH_HelpFile1 does not equal PROJECTHELPFILEPATH_HelpFile2") | ||
| 1494 | - | ||
| 1495 | - # PROJECTHELPCONTEXT Record | ||
| 1496 | - projecthelpcontext_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1497 | - check_value('PROJECTHELPCONTEXT_Id', 0x0007, projecthelpcontext_id) | ||
| 1498 | - projecthelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1499 | - check_value('PROJECTHELPCONTEXT_Size', 0x0004, projecthelpcontext_size) | ||
| 1500 | - projecthelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1501 | - unused = projecthelpcontext_helpcontext | ||
| 1502 | - | ||
| 1503 | - # PROJECTLIBFLAGS Record | ||
| 1504 | - projectlibflags_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1505 | - check_value('PROJECTLIBFLAGS_Id', 0x0008, projectlibflags_id) | ||
| 1506 | - projectlibflags_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1507 | - check_value('PROJECTLIBFLAGS_Size', 0x0004, projectlibflags_size) | ||
| 1508 | - projectlibflags_projectlibflags = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1509 | - check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, projectlibflags_projectlibflags) | ||
| 1510 | - | ||
| 1511 | - # PROJECTVERSION Record | ||
| 1512 | - projectversion_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1513 | - check_value('PROJECTVERSION_Id', 0x0009, projectversion_id) | ||
| 1514 | - projectversion_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1515 | - check_value('PROJECTVERSION_Reserved', 0x0004, projectversion_reserved) | ||
| 1516 | - projectversion_versionmajor = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1517 | - projectversion_versionminor = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1518 | - unused = projectversion_versionmajor | ||
| 1519 | - unused = projectversion_versionminor | ||
| 1520 | - | ||
| 1521 | - # PROJECTCONSTANTS Record | ||
| 1522 | - projectconstants_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1523 | - check_value('PROJECTCONSTANTS_Id', 0x000C, projectconstants_id) | ||
| 1524 | - projectconstants_sizeof_constants = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1525 | - if projectconstants_sizeof_constants > 1015: | ||
| 1526 | - log.error( | ||
| 1527 | - "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants)) | ||
| 1528 | - projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants) | ||
| 1529 | - projectconstants_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1530 | - check_value('PROJECTCONSTANTS_Reserved', 0x003C, projectconstants_reserved) | ||
| 1531 | - projectconstants_sizeof_constants_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1532 | - if projectconstants_sizeof_constants_unicode % 2 != 0: | ||
| 1533 | - log.error("PROJECTCONSTANTS_SizeOfConstantsUnicode is not even") | ||
| 1534 | - projectconstants_constants_unicode = dir_stream.read(projectconstants_sizeof_constants_unicode) | ||
| 1535 | - unused = projectconstants_constants | ||
| 1536 | - unused = projectconstants_constants_unicode | ||
| 1537 | - | ||
| 1538 | - # array of REFERENCE records | ||
| 1539 | - check = None | ||
| 1540 | - while True: | ||
| 1541 | - check = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1542 | - log.debug("reference type = {0:04X}".format(check)) | ||
| 1543 | - if check == 0x000F: | ||
| 1544 | - break | ||
| 1545 | - | ||
| 1546 | - if check == 0x0016: | ||
| 1547 | - # REFERENCENAME | ||
| 1548 | - reference_id = check | ||
| 1549 | - reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1550 | - reference_name = dir_stream.read(reference_sizeof_name) | ||
| 1551 | - reference_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1552 | - # According to [MS-OVBA] 2.3.4.2.2.2 REFERENCENAME Record: | ||
| 1553 | - # "Reserved (2 bytes): MUST be 0x003E. MUST be ignored." | ||
| 1554 | - # So let's ignore it, otherwise it crashes on some files (issue #132) | ||
| 1555 | - # PR #135 by @c1fe: | ||
| 1556 | - # contrary to the specification I think that the unicode name | ||
| 1557 | - # is optional. if reference_reserved is not 0x003E I think it | ||
| 1558 | - # is actually the start of another REFERENCE record | ||
| 1559 | - # at least when projectsyskind_syskind == 0x02 (Macintosh) | ||
| 1560 | - if reference_reserved == 0x003E: | ||
| 1561 | - #if reference_reserved not in (0x003E, 0x000D): | ||
| 1562 | - # raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved', | ||
| 1563 | - # 0x0003E, reference_reserved) | ||
| 1564 | - reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1565 | - reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode) | ||
| 1566 | - unused = reference_id | ||
| 1567 | - unused = reference_name | ||
| 1568 | - unused = reference_name_unicode | ||
| 1569 | - continue | ||
| 1570 | - else: | ||
| 1571 | - check = reference_reserved | ||
| 1572 | - log.debug("reference type = {0:04X}".format(check)) | ||
| 1573 | - | ||
| 1574 | - if check == 0x0033: | ||
| 1575 | - # REFERENCEORIGINAL (followed by REFERENCECONTROL) | ||
| 1576 | - referenceoriginal_id = check | ||
| 1577 | - referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1578 | - referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal) | ||
| 1579 | - unused = referenceoriginal_id | ||
| 1580 | - unused = referenceoriginal_libidoriginal | ||
| 1581 | - continue | ||
| 1582 | - | ||
| 1583 | - if check == 0x002F: | ||
| 1584 | - # REFERENCECONTROL | ||
| 1585 | - referencecontrol_id = check | ||
| 1586 | - referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore | ||
| 1587 | - referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1588 | - referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled) | ||
| 1589 | - referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore | ||
| 1590 | - check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1) | ||
| 1591 | - referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore | ||
| 1592 | - check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2) | ||
| 1593 | - unused = referencecontrol_id | ||
| 1594 | - unused = referencecontrol_sizetwiddled | ||
| 1595 | - unused = referencecontrol_libidtwiddled | ||
| 1596 | - # optional field | ||
| 1597 | - check2 = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1598 | - if check2 == 0x0016: | ||
| 1599 | - referencecontrol_namerecordextended_id = check | ||
| 1600 | - referencecontrol_namerecordextended_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1601 | - referencecontrol_namerecordextended_name = dir_stream.read( | ||
| 1602 | - referencecontrol_namerecordextended_sizeof_name) | ||
| 1603 | - referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1604 | - if referencecontrol_namerecordextended_reserved == 0x003E: | ||
| 1605 | - referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1606 | - referencecontrol_namerecordextended_name_unicode = dir_stream.read( | ||
| 1607 | - referencecontrol_namerecordextended_sizeof_name_unicode) | ||
| 1608 | - referencecontrol_reserved3 = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1609 | - unused = referencecontrol_namerecordextended_id | ||
| 1610 | - unused = referencecontrol_namerecordextended_name | ||
| 1611 | - unused = referencecontrol_namerecordextended_name_unicode | ||
| 1612 | - else: | ||
| 1613 | - referencecontrol_reserved3 = referencecontrol_namerecordextended_reserved | ||
| 1614 | - else: | ||
| 1615 | - referencecontrol_reserved3 = check2 | ||
| 1616 | - | ||
| 1617 | - check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3) | ||
| 1618 | - referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1619 | - referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1620 | - referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended) | ||
| 1621 | - referencecontrol_reserved4 = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1622 | - referencecontrol_reserved5 = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1623 | - referencecontrol_originaltypelib = dir_stream.read(16) | ||
| 1624 | - referencecontrol_cookie = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1625 | - unused = referencecontrol_sizeextended | ||
| 1626 | - unused = referencecontrol_libidextended | ||
| 1627 | - unused = referencecontrol_reserved4 | ||
| 1628 | - unused = referencecontrol_reserved5 | ||
| 1629 | - unused = referencecontrol_originaltypelib | ||
| 1630 | - unused = referencecontrol_cookie | ||
| 1631 | - continue | ||
| 1632 | - | ||
| 1633 | - if check == 0x000D: | ||
| 1634 | - # REFERENCEREGISTERED | ||
| 1635 | - referenceregistered_id = check | ||
| 1636 | - referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1637 | - referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1638 | - referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid) | ||
| 1639 | - referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1640 | - check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1) | ||
| 1641 | - referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1642 | - check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2) | ||
| 1643 | - unused = referenceregistered_id | ||
| 1644 | - unused = referenceregistered_size | ||
| 1645 | - unused = referenceregistered_libid | ||
| 1646 | - continue | ||
| 1647 | - | ||
| 1648 | - if check == 0x000E: | ||
| 1649 | - # REFERENCEPROJECT | ||
| 1650 | - referenceproject_id = check | ||
| 1651 | - referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1652 | - referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1653 | - referenceproject_libidabsolute = dir_stream.read(referenceproject_sizeof_libidabsolute) | ||
| 1654 | - referenceproject_sizeof_libidrelative = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1655 | - referenceproject_libidrelative = dir_stream.read(referenceproject_sizeof_libidrelative) | ||
| 1656 | - referenceproject_majorversion = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1657 | - referenceproject_minorversion = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1658 | - unused = referenceproject_id | ||
| 1659 | - unused = referenceproject_size | ||
| 1660 | - unused = referenceproject_libidabsolute | ||
| 1661 | - unused = referenceproject_libidrelative | ||
| 1662 | - unused = referenceproject_majorversion | ||
| 1663 | - unused = referenceproject_minorversion | ||
| 1664 | - continue | ||
| 1665 | - | ||
| 1666 | - log.error('invalid or unknown check Id {0:04X}'.format(check)) | ||
| 1667 | - # raise an exception instead of stopping abruptly (issue #180) | ||
| 1668 | - raise UnexpectedDataError(dir_path, 'reference type', (0x0F, 0x16, 0x33, 0x2F, 0x0D, 0x0E), check) | ||
| 1669 | - #sys.exit(0) | ||
| 1670 | - | ||
| 1671 | - projectmodules_id = check #struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1672 | - check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id) | ||
| 1673 | - projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1674 | - check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size) | ||
| 1675 | - projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1676 | - projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1677 | - check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id) | ||
| 1678 | - projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1679 | - check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, projectmodules_projectcookierecord_size) | ||
| 1680 | - projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1681 | - unused = projectmodules_projectcookierecord_cookie | ||
| 1682 | - | ||
| 1683 | - # short function to simplify unicode text output | ||
| 1684 | - uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') | ||
| 1685 | - | ||
| 1686 | - log.debug("parsing {0} modules".format(projectmodules_count)) | ||
| 1687 | - for projectmodule_index in xrange(0, projectmodules_count): | ||
| 1688 | - try: | ||
| 1689 | - modulename_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1690 | - check_value('MODULENAME_Id', 0x0019, modulename_id) | ||
| 1691 | - modulename_sizeof_modulename = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1692 | - modulename_modulename = dir_stream.read(modulename_sizeof_modulename) | ||
| 1693 | - # TODO: preset variables to avoid "referenced before assignment" errors | ||
| 1694 | - modulename_unicode_modulename_unicode = '' | ||
| 1695 | - # account for optional sections | ||
| 1696 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1697 | - if section_id == 0x0047: | ||
| 1698 | - modulename_unicode_id = section_id | ||
| 1699 | - modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1700 | - modulename_unicode_modulename_unicode = dir_stream.read( | ||
| 1701 | - modulename_unicode_sizeof_modulename_unicode).decode('UTF-16LE', 'replace') | ||
| 1702 | - # just guessing that this is the same encoding as used in OleFileIO | ||
| 1703 | - unused = modulename_unicode_id | ||
| 1704 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1705 | - if section_id == 0x001A: | ||
| 1706 | - modulestreamname_id = section_id | ||
| 1707 | - modulestreamname_sizeof_streamname = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1708 | - modulestreamname_streamname = dir_stream.read(modulestreamname_sizeof_streamname) | ||
| 1709 | - modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1710 | - check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved) | ||
| 1711 | - modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1712 | - modulestreamname_streamname_unicode = dir_stream.read( | ||
| 1713 | - modulestreamname_sizeof_streamname_unicode).decode('UTF-16LE', 'replace') | ||
| 1714 | - # just guessing that this is the same encoding as used in OleFileIO | ||
| 1715 | - unused = modulestreamname_id | ||
| 1716 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1717 | - if section_id == 0x001C: | ||
| 1718 | - moduledocstring_id = section_id | ||
| 1719 | - check_value('MODULEDOCSTRING_Id', 0x001C, moduledocstring_id) | ||
| 1720 | - moduledocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1721 | - moduledocstring_docstring = dir_stream.read(moduledocstring_sizeof_docstring) | ||
| 1722 | - moduledocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1723 | - check_value('MODULEDOCSTRING_Reserved', 0x0048, moduledocstring_reserved) | ||
| 1724 | - moduledocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1725 | - moduledocstring_docstring_unicode = dir_stream.read(moduledocstring_sizeof_docstring_unicode) | ||
| 1726 | - unused = moduledocstring_docstring | ||
| 1727 | - unused = moduledocstring_docstring_unicode | ||
| 1728 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1729 | - if section_id == 0x0031: | ||
| 1730 | - moduleoffset_id = section_id | ||
| 1731 | - check_value('MODULEOFFSET_Id', 0x0031, moduleoffset_id) | ||
| 1732 | - moduleoffset_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1733 | - check_value('MODULEOFFSET_Size', 0x0004, moduleoffset_size) | ||
| 1734 | - moduleoffset_textoffset = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1735 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1736 | - if section_id == 0x001E: | ||
| 1737 | - modulehelpcontext_id = section_id | ||
| 1738 | - check_value('MODULEHELPCONTEXT_Id', 0x001E, modulehelpcontext_id) | ||
| 1739 | - modulehelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1740 | - check_value('MODULEHELPCONTEXT_Size', 0x0004, modulehelpcontext_size) | ||
| 1741 | - modulehelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1742 | - unused = modulehelpcontext_helpcontext | ||
| 1743 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1744 | - if section_id == 0x002C: | ||
| 1745 | - modulecookie_id = section_id | ||
| 1746 | - check_value('MODULECOOKIE_Id', 0x002C, modulecookie_id) | ||
| 1747 | - modulecookie_size = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1748 | - check_value('MODULECOOKIE_Size', 0x0002, modulecookie_size) | ||
| 1749 | - modulecookie_cookie = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1750 | - unused = modulecookie_cookie | ||
| 1751 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1752 | - if section_id == 0x0021 or section_id == 0x0022: | ||
| 1753 | - moduletype_id = section_id | ||
| 1754 | - moduletype_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1755 | - unused = moduletype_id | ||
| 1756 | - unused = moduletype_reserved | ||
| 1757 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1758 | - if section_id == 0x0025: | ||
| 1759 | - modulereadonly_id = section_id | ||
| 1760 | - check_value('MODULEREADONLY_Id', 0x0025, modulereadonly_id) | ||
| 1761 | - modulereadonly_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1762 | - check_value('MODULEREADONLY_Reserved', 0x0000, modulereadonly_reserved) | ||
| 1763 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1764 | - if section_id == 0x0028: | ||
| 1765 | - moduleprivate_id = section_id | ||
| 1766 | - check_value('MODULEPRIVATE_Id', 0x0028, moduleprivate_id) | ||
| 1767 | - moduleprivate_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1768 | - check_value('MODULEPRIVATE_Reserved', 0x0000, moduleprivate_reserved) | ||
| 1769 | - section_id = struct.unpack("<H", dir_stream.read(2))[0] | ||
| 1770 | - if section_id == 0x002B: # TERMINATOR | ||
| 1771 | - module_reserved = struct.unpack("<L", dir_stream.read(4))[0] | ||
| 1772 | - check_value('MODULE_Reserved', 0x0000, module_reserved) | ||
| 1773 | - section_id = None | ||
| 1774 | - if section_id != None: | ||
| 1775 | - log.warning('unknown or invalid module section id {0:04X}'.format(section_id)) | ||
| 1776 | - | ||
| 1777 | - log.debug('Project CodePage = %d' % projectcodepage_codepage) | ||
| 1778 | - if projectcodepage_codepage in MAC_CODEPAGES: | ||
| 1779 | - vba_codec = MAC_CODEPAGES[projectcodepage_codepage] | ||
| 1780 | - else: | ||
| 1781 | - vba_codec = 'cp%d' % projectcodepage_codepage | ||
| 1782 | - log.debug("ModuleName = {0}".format(modulename_modulename)) | ||
| 1783 | - log.debug("ModuleNameUnicode = {0}".format(uni_out(modulename_unicode_modulename_unicode))) | ||
| 1784 | - log.debug("StreamName = {0}".format(modulestreamname_streamname)) | ||
| 1785 | - try: | ||
| 1786 | - streamname_unicode = modulestreamname_streamname.decode(vba_codec) | ||
| 1787 | - except UnicodeError as ue: | ||
| 1788 | - log.debug('failed to decode stream name {0!r} with codec {1}' | ||
| 1789 | - .format(uni_out(streamname_unicode), vba_codec)) | ||
| 1790 | - streamname_unicode = modulestreamname_streamname.decode(vba_codec, errors='replace') | ||
| 1791 | - log.debug("StreamName.decode('%s') = %s" % (vba_codec, uni_out(streamname_unicode))) | ||
| 1792 | - log.debug("StreamNameUnicode = {0}".format(uni_out(modulestreamname_streamname_unicode))) | ||
| 1793 | - log.debug("TextOffset = {0}".format(moduleoffset_textoffset)) | ||
| 1794 | - | ||
| 1795 | - code_data = None | ||
| 1796 | - try_names = streamname_unicode, \ | ||
| 1797 | - modulename_unicode_modulename_unicode, \ | ||
| 1798 | - modulestreamname_streamname_unicode | ||
| 1799 | - for stream_name in try_names: | ||
| 1800 | - # TODO: if olefile._find were less private, could replace this | ||
| 1801 | - # try-except with calls to it | ||
| 1802 | - try: | ||
| 1803 | - code_path = vba_root + u'VBA/' + stream_name | ||
| 1804 | - log.debug('opening VBA code stream %s' % uni_out(code_path)) | ||
| 1805 | - code_data = ole.openstream(code_path).read() | ||
| 1806 | - break | ||
| 1807 | - except IOError as ioe: | ||
| 1808 | - log.debug('failed to open stream VBA/%r (%r), try other name' | ||
| 1809 | - % (uni_out(stream_name), ioe)) | ||
| 1810 | - | ||
| 1811 | - if code_data is None: | ||
| 1812 | - log.info("Could not open stream %d of %d ('VBA/' + one of %r)!" | ||
| 1813 | - % (projectmodule_index, projectmodules_count, | ||
| 1814 | - '/'.join("'" + uni_out(stream_name) + "'" | ||
| 1815 | - for stream_name in try_names))) | ||
| 1816 | - if relaxed: | ||
| 1817 | - continue # ... with next submodule | ||
| 1818 | - else: | ||
| 1819 | - raise SubstreamOpenError('[BASE]', 'VBA/' + | ||
| 1820 | - uni_out(modulename_unicode_modulename_unicode)) | ||
| 1821 | - | ||
| 1822 | - log.debug("length of code_data = {0}".format(len(code_data))) | ||
| 1823 | - log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) | ||
| 1824 | - code_data = code_data[moduleoffset_textoffset:] | ||
| 1825 | - if len(code_data) > 0: | ||
| 1826 | - code_data = decompress_stream(bytearray(code_data)) | ||
| 1827 | - # case-insensitive search in the code_modules dict to find the file extension: | ||
| 1828 | - filext = code_modules.get(modulename_modulename.lower(), 'bin') | ||
| 1829 | - filename = '{0}.{1}'.format(modulename_modulename, filext) | ||
| 1830 | - #TODO: also yield the codepage so that callers can decode it properly | ||
| 1831 | - yield (code_path, filename, code_data) | ||
| 1832 | - # print '-'*79 | ||
| 1833 | - # print filename | ||
| 1834 | - # print '' | ||
| 1835 | - # print code_data | ||
| 1836 | - # print '' | ||
| 1837 | - log.debug('extracted file {0}'.format(filename)) | ||
| 1838 | - else: | ||
| 1839 | - log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname)) | ||
| 1840 | - except (UnexpectedDataError, SubstreamOpenError): | ||
| 1841 | - raise | ||
| 1842 | - except Exception as exc: | ||
| 1843 | - log.info('Error parsing module {0} of {1} in _extract_vba:' | ||
| 1844 | - .format(projectmodule_index, projectmodules_count), | ||
| 1845 | - exc_info=True) | ||
| 1846 | - if not relaxed: | ||
| 1847 | - raise | ||
| 1848 | - _ = unused # make pylint happy: now variable "unused" is being used ;-) | ||
| 1849 | - return | 1925 | + for code_path, filename, code_data in project.parse_modules(): |
| 1926 | + yield (code_path, filename, code_data) | ||
| 1850 | 1927 | ||
| 1851 | 1928 | ||
| 1852 | def vba_collapse_long_lines(vba_code): | 1929 | def vba_collapse_long_lines(vba_code): |