Commit fbbad0a2eb0ab3000045217187c68245030f465f
1 parent
0982acb0
olevba: use new codepages module
Showing
1 changed file
with
16 additions
and
16 deletions
oletools/olevba.py
| @@ -313,6 +313,7 @@ from oletools import oleform | @@ -313,6 +313,7 @@ from oletools import oleform | ||
| 313 | from oletools import rtfobj | 313 | from oletools import rtfobj |
| 314 | from oletools import oleid | 314 | from oletools import oleid |
| 315 | from oletools.common.errors import FileIsEncryptedError | 315 | from oletools.common.errors import FileIsEncryptedError |
| 316 | +from oletools.common import codepages | ||
| 316 | 317 | ||
| 317 | # monkeypatch email to fix issue #32: | 318 | # monkeypatch email to fix issue #32: |
| 318 | # allow header lines without ":" | 319 | # allow header lines without ":" |
| @@ -1413,17 +1414,9 @@ class VBA_Project(object): | @@ -1413,17 +1414,9 @@ class VBA_Project(object): | ||
| 1413 | projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0] | 1414 | projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0] |
| 1414 | self.check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size) | 1415 | self.check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size) |
| 1415 | self.codepage = struct.unpack("<H", dir_stream.read(2))[0] | 1416 | self.codepage = struct.unpack("<H", dir_stream.read(2))[0] |
| 1416 | - log.debug('Project Code Page: %r' % self.codepage) | ||
| 1417 | - if self.codepage in MAC_CODEPAGES: | ||
| 1418 | - self.codec = MAC_CODEPAGES[self.codepage] | ||
| 1419 | - else: | ||
| 1420 | - self.codec = 'cp%d' % self.codepage | ||
| 1421 | - # TODO: check if valid code page or raise a clear exception, and use UTF-8 as default? | ||
| 1422 | - try: | ||
| 1423 | - codecs.lookup(self.codec) | ||
| 1424 | - except LookupError: | ||
| 1425 | - log.error('Codec not found for code page %d, using UTF-8 as fallback.' % self.codepage) | ||
| 1426 | - self.codec = 'utf8' | 1417 | + self.codepage_name = codepages.get_codepage_name(self.codepage) |
| 1418 | + log.debug('Project Code Page: %r - %s' % (self.codepage, self.codepage_name)) | ||
| 1419 | + self.codec = codepages.codepage2codec(self.codepage) | ||
| 1427 | log.debug('Python codec corresponding to code page %d: %s' % (self.codepage, self.codec)) | 1420 | log.debug('Python codec corresponding to code page %d: %s' % (self.codepage, self.codec)) |
| 1428 | 1421 | ||
| 1429 | 1422 | ||
| @@ -1525,6 +1518,7 @@ class VBA_Project(object): | @@ -1525,6 +1518,7 @@ class VBA_Project(object): | ||
| 1525 | unused = projectconstants_constants_unicode | 1518 | unused = projectconstants_constants_unicode |
| 1526 | 1519 | ||
| 1527 | # array of REFERENCE records | 1520 | # array of REFERENCE records |
| 1521 | + # Specifies a reference to an Automation type library or VBA project. | ||
| 1528 | check = None | 1522 | check = None |
| 1529 | while True: | 1523 | while True: |
| 1530 | check = struct.unpack("<H", dir_stream.read(2))[0] | 1524 | check = struct.unpack("<H", dir_stream.read(2))[0] |
| @@ -1534,6 +1528,7 @@ class VBA_Project(object): | @@ -1534,6 +1528,7 @@ class VBA_Project(object): | ||
| 1534 | 1528 | ||
| 1535 | if check == 0x0016: | 1529 | if check == 0x0016: |
| 1536 | # REFERENCENAME | 1530 | # REFERENCENAME |
| 1531 | + # Specifies the name of a referenced VBA project or Automation type library. | ||
| 1537 | reference_id = check | 1532 | reference_id = check |
| 1538 | reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | 1533 | reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] |
| 1539 | reference_name = dir_stream.read(reference_sizeof_name) | 1534 | reference_name = dir_stream.read(reference_sizeof_name) |
| @@ -1562,6 +1557,8 @@ class VBA_Project(object): | @@ -1562,6 +1557,8 @@ class VBA_Project(object): | ||
| 1562 | 1557 | ||
| 1563 | if check == 0x0033: | 1558 | if check == 0x0033: |
| 1564 | # REFERENCEORIGINAL (followed by REFERENCECONTROL) | 1559 | # REFERENCEORIGINAL (followed by REFERENCECONTROL) |
| 1560 | + # Specifies the identifier of the Automation type library the containing REFERENCECONTROL’s | ||
| 1561 | + # (section 2.3.4.2.2.3) twiddled type library was generated from. | ||
| 1565 | referenceoriginal_id = check | 1562 | referenceoriginal_id = check |
| 1566 | referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0] | 1563 | referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0] |
| 1567 | referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal) | 1564 | referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal) |
| @@ -1571,6 +1568,7 @@ class VBA_Project(object): | @@ -1571,6 +1568,7 @@ class VBA_Project(object): | ||
| 1571 | 1568 | ||
| 1572 | if check == 0x002F: | 1569 | if check == 0x002F: |
| 1573 | # REFERENCECONTROL | 1570 | # REFERENCECONTROL |
| 1571 | + # Specifies a reference to a twiddled type library and its extended type library. | ||
| 1574 | referencecontrol_id = check | 1572 | referencecontrol_id = check |
| 1575 | referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore | 1573 | referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore |
| 1576 | referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] | 1574 | referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] |
| @@ -1621,6 +1619,7 @@ class VBA_Project(object): | @@ -1621,6 +1619,7 @@ class VBA_Project(object): | ||
| 1621 | 1619 | ||
| 1622 | if check == 0x000D: | 1620 | if check == 0x000D: |
| 1623 | # REFERENCEREGISTERED | 1621 | # REFERENCEREGISTERED |
| 1622 | + # Specifies a reference to an Automation type library. | ||
| 1624 | referenceregistered_id = check | 1623 | referenceregistered_id = check |
| 1625 | referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0] | 1624 | referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0] |
| 1626 | referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] | 1625 | referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] |
| @@ -1636,6 +1635,7 @@ class VBA_Project(object): | @@ -1636,6 +1635,7 @@ class VBA_Project(object): | ||
| 1636 | 1635 | ||
| 1637 | if check == 0x000E: | 1636 | if check == 0x000E: |
| 1638 | # REFERENCEPROJECT | 1637 | # REFERENCEPROJECT |
| 1638 | + # Specifies a reference to an external VBA project. | ||
| 1639 | referenceproject_id = check | 1639 | referenceproject_id = check |
| 1640 | referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0] | 1640 | referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0] |
| 1641 | referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0] | 1641 | referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0] |
| @@ -1673,7 +1673,7 @@ class VBA_Project(object): | @@ -1673,7 +1673,7 @@ class VBA_Project(object): | ||
| 1673 | # self.check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id) | 1673 | # self.check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id) |
| 1674 | projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0] | 1674 | projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0] |
| 1675 | self.check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size) | 1675 | self.check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size) |
| 1676 | - projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0] | 1676 | + self.modules_count = struct.unpack("<H", dir_stream.read(2))[0] |
| 1677 | projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0] | 1677 | projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1678 | self.check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id) | 1678 | self.check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id) |
| 1679 | projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0] | 1679 | projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0] |
| @@ -1684,8 +1684,8 @@ class VBA_Project(object): | @@ -1684,8 +1684,8 @@ class VBA_Project(object): | ||
| 1684 | # short function to simplify unicode text output | 1684 | # short function to simplify unicode text output |
| 1685 | uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') | 1685 | uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') |
| 1686 | 1686 | ||
| 1687 | - log.debug("parsing {0} modules".format(projectmodules_count)) | ||
| 1688 | - for projectmodule_index in xrange(0, projectmodules_count): | 1687 | + log.debug("parsing {0} modules".format(self.modules_count)) |
| 1688 | + for projectmodule_index in xrange(0, self.modules_count): | ||
| 1689 | try: | 1689 | try: |
| 1690 | modulename_id = struct.unpack("<H", dir_stream.read(2))[0] | 1690 | modulename_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1691 | self.check_value('MODULENAME_Id', 0x0019, modulename_id) | 1691 | self.check_value('MODULENAME_Id', 0x0019, modulename_id) |
| @@ -1807,7 +1807,7 @@ class VBA_Project(object): | @@ -1807,7 +1807,7 @@ class VBA_Project(object): | ||
| 1807 | 1807 | ||
| 1808 | if code_data is None: | 1808 | if code_data is None: |
| 1809 | log.info("Could not open stream %d of %d ('VBA/' + one of %r)!" | 1809 | log.info("Could not open stream %d of %d ('VBA/' + one of %r)!" |
| 1810 | - % (projectmodule_index, projectmodules_count, | 1810 | + % (projectmodule_index, self.modules_count, |
| 1811 | '/'.join("'" + uni_out(stream_name) + "'" | 1811 | '/'.join("'" + uni_out(stream_name) + "'" |
| 1812 | for stream_name in try_names))) | 1812 | for stream_name in try_names))) |
| 1813 | if self.relaxed: | 1813 | if self.relaxed: |
| @@ -1839,7 +1839,7 @@ class VBA_Project(object): | @@ -1839,7 +1839,7 @@ class VBA_Project(object): | ||
| 1839 | raise | 1839 | raise |
| 1840 | except Exception as exc: | 1840 | except Exception as exc: |
| 1841 | log.info('Error parsing module {0} of {1} in _extract_vba:' | 1841 | log.info('Error parsing module {0} of {1} in _extract_vba:' |
| 1842 | - .format(projectmodule_index, projectmodules_count), | 1842 | + .format(projectmodule_index, self.modules_count), |
| 1843 | exc_info=True) | 1843 | exc_info=True) |
| 1844 | if not self.relaxed: | 1844 | if not self.relaxed: |
| 1845 | raise | 1845 | raise |