Commit fbbad0a2eb0ab3000045217187c68245030f465f
1 parent
0982acb0
olevba: use new codepages module
Showing
1 changed file
with
16 additions
and
16 deletions
oletools/olevba.py
| ... | ... | @@ -313,6 +313,7 @@ from oletools import oleform |
| 313 | 313 | from oletools import rtfobj |
| 314 | 314 | from oletools import oleid |
| 315 | 315 | from oletools.common.errors import FileIsEncryptedError |
| 316 | +from oletools.common import codepages | |
| 316 | 317 | |
| 317 | 318 | # monkeypatch email to fix issue #32: |
| 318 | 319 | # allow header lines without ":" |
| ... | ... | @@ -1413,17 +1414,9 @@ class VBA_Project(object): |
| 1413 | 1414 | projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0] |
| 1414 | 1415 | self.check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size) |
| 1415 | 1416 | self.codepage = struct.unpack("<H", dir_stream.read(2))[0] |
| 1416 | - log.debug('Project Code Page: %r' % self.codepage) | |
| 1417 | - if self.codepage in MAC_CODEPAGES: | |
| 1418 | - self.codec = MAC_CODEPAGES[self.codepage] | |
| 1419 | - else: | |
| 1420 | - self.codec = 'cp%d' % self.codepage | |
| 1421 | - # TODO: check if valid code page or raise a clear exception, and use UTF-8 as default? | |
| 1422 | - try: | |
| 1423 | - codecs.lookup(self.codec) | |
| 1424 | - except LookupError: | |
| 1425 | - log.error('Codec not found for code page %d, using UTF-8 as fallback.' % self.codepage) | |
| 1426 | - self.codec = 'utf8' | |
| 1417 | + self.codepage_name = codepages.get_codepage_name(self.codepage) | |
| 1418 | + log.debug('Project Code Page: %r - %s' % (self.codepage, self.codepage_name)) | |
| 1419 | + self.codec = codepages.codepage2codec(self.codepage) | |
| 1427 | 1420 | log.debug('Python codec corresponding to code page %d: %s' % (self.codepage, self.codec)) |
| 1428 | 1421 | |
| 1429 | 1422 | |
| ... | ... | @@ -1525,6 +1518,7 @@ class VBA_Project(object): |
| 1525 | 1518 | unused = projectconstants_constants_unicode |
| 1526 | 1519 | |
| 1527 | 1520 | # array of REFERENCE records |
| 1521 | + # Specifies a reference to an Automation type library or VBA project. | |
| 1528 | 1522 | check = None |
| 1529 | 1523 | while True: |
| 1530 | 1524 | check = struct.unpack("<H", dir_stream.read(2))[0] |
| ... | ... | @@ -1534,6 +1528,7 @@ class VBA_Project(object): |
| 1534 | 1528 | |
| 1535 | 1529 | if check == 0x0016: |
| 1536 | 1530 | # REFERENCENAME |
| 1531 | + # Specifies the name of a referenced VBA project or Automation type library. | |
| 1537 | 1532 | reference_id = check |
| 1538 | 1533 | reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] |
| 1539 | 1534 | reference_name = dir_stream.read(reference_sizeof_name) |
| ... | ... | @@ -1562,6 +1557,8 @@ class VBA_Project(object): |
| 1562 | 1557 | |
| 1563 | 1558 | if check == 0x0033: |
| 1564 | 1559 | # REFERENCEORIGINAL (followed by REFERENCECONTROL) |
| 1560 | + # Specifies the identifier of the Automation type library the containing REFERENCECONTROL’s | |
| 1561 | + # (section 2.3.4.2.2.3) twiddled type library was generated from. | |
| 1565 | 1562 | referenceoriginal_id = check |
| 1566 | 1563 | referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0] |
| 1567 | 1564 | referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal) |
| ... | ... | @@ -1571,6 +1568,7 @@ class VBA_Project(object): |
| 1571 | 1568 | |
| 1572 | 1569 | if check == 0x002F: |
| 1573 | 1570 | # REFERENCECONTROL |
| 1571 | + # Specifies a reference to a twiddled type library and its extended type library. | |
| 1574 | 1572 | referencecontrol_id = check |
| 1575 | 1573 | referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore |
| 1576 | 1574 | referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] |
| ... | ... | @@ -1621,6 +1619,7 @@ class VBA_Project(object): |
| 1621 | 1619 | |
| 1622 | 1620 | if check == 0x000D: |
| 1623 | 1621 | # REFERENCEREGISTERED |
| 1622 | + # Specifies a reference to an Automation type library. | |
| 1624 | 1623 | referenceregistered_id = check |
| 1625 | 1624 | referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0] |
| 1626 | 1625 | referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] |
| ... | ... | @@ -1636,6 +1635,7 @@ class VBA_Project(object): |
| 1636 | 1635 | |
| 1637 | 1636 | if check == 0x000E: |
| 1638 | 1637 | # REFERENCEPROJECT |
| 1638 | + # Specifies a reference to an external VBA project. | |
| 1639 | 1639 | referenceproject_id = check |
| 1640 | 1640 | referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0] |
| 1641 | 1641 | referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0] |
| ... | ... | @@ -1673,7 +1673,7 @@ class VBA_Project(object): |
| 1673 | 1673 | # self.check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id) |
| 1674 | 1674 | projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0] |
| 1675 | 1675 | self.check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size) |
| 1676 | - projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1676 | + self.modules_count = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1677 | 1677 | projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1678 | 1678 | self.check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id) |
| 1679 | 1679 | projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0] |
| ... | ... | @@ -1684,8 +1684,8 @@ class VBA_Project(object): |
| 1684 | 1684 | # short function to simplify unicode text output |
| 1685 | 1685 | uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') |
| 1686 | 1686 | |
| 1687 | - log.debug("parsing {0} modules".format(projectmodules_count)) | |
| 1688 | - for projectmodule_index in xrange(0, projectmodules_count): | |
| 1687 | + log.debug("parsing {0} modules".format(self.modules_count)) | |
| 1688 | + for projectmodule_index in xrange(0, self.modules_count): | |
| 1689 | 1689 | try: |
| 1690 | 1690 | modulename_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1691 | 1691 | self.check_value('MODULENAME_Id', 0x0019, modulename_id) |
| ... | ... | @@ -1807,7 +1807,7 @@ class VBA_Project(object): |
| 1807 | 1807 | |
| 1808 | 1808 | if code_data is None: |
| 1809 | 1809 | log.info("Could not open stream %d of %d ('VBA/' + one of %r)!" |
| 1810 | - % (projectmodule_index, projectmodules_count, | |
| 1810 | + % (projectmodule_index, self.modules_count, | |
| 1811 | 1811 | '/'.join("'" + uni_out(stream_name) + "'" |
| 1812 | 1812 | for stream_name in try_names))) |
| 1813 | 1813 | if self.relaxed: |
| ... | ... | @@ -1839,7 +1839,7 @@ class VBA_Project(object): |
| 1839 | 1839 | raise |
| 1840 | 1840 | except Exception as exc: |
| 1841 | 1841 | log.info('Error parsing module {0} of {1} in _extract_vba:' |
| 1842 | - .format(projectmodule_index, projectmodules_count), | |
| 1842 | + .format(projectmodule_index, self.modules_count), | |
| 1843 | 1843 | exc_info=True) |
| 1844 | 1844 | if not self.relaxed: |
| 1845 | 1845 | raise | ... | ... |