Commit fbbad0a2eb0ab3000045217187c68245030f465f

Authored by decalage2
1 parent 0982acb0

olevba: use new codepages module

Showing 1 changed file with 16 additions and 16 deletions
oletools/olevba.py
... ... @@ -313,6 +313,7 @@ from oletools import oleform
313 313 from oletools import rtfobj
314 314 from oletools import oleid
315 315 from oletools.common.errors import FileIsEncryptedError
  316 +from oletools.common import codepages
316 317  
317 318 # monkeypatch email to fix issue #32:
318 319 # allow header lines without ":"
... ... @@ -1413,17 +1414,9 @@ class VBA_Project(object):
1413 1414 projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0]
1414 1415 self.check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size)
1415 1416 self.codepage = struct.unpack("<H", dir_stream.read(2))[0]
1416   - log.debug('Project Code Page: %r' % self.codepage)
1417   - if self.codepage in MAC_CODEPAGES:
1418   - self.codec = MAC_CODEPAGES[self.codepage]
1419   - else:
1420   - self.codec = 'cp%d' % self.codepage
1421   - # TODO: check if valid code page or raise a clear exception, and use UTF-8 as default?
1422   - try:
1423   - codecs.lookup(self.codec)
1424   - except LookupError:
1425   - log.error('Codec not found for code page %d, using UTF-8 as fallback.' % self.codepage)
1426   - self.codec = 'utf8'
  1417 + self.codepage_name = codepages.get_codepage_name(self.codepage)
  1418 + log.debug('Project Code Page: %r - %s' % (self.codepage, self.codepage_name))
  1419 + self.codec = codepages.codepage2codec(self.codepage)
1427 1420 log.debug('Python codec corresponding to code page %d: %s' % (self.codepage, self.codec))
1428 1421  
1429 1422  
... ... @@ -1525,6 +1518,7 @@ class VBA_Project(object):
1525 1518 unused = projectconstants_constants_unicode
1526 1519  
1527 1520 # array of REFERENCE records
  1521 + # Specifies a reference to an Automation type library or VBA project.
1528 1522 check = None
1529 1523 while True:
1530 1524 check = struct.unpack("<H", dir_stream.read(2))[0]
... ... @@ -1534,6 +1528,7 @@ class VBA_Project(object):
1534 1528  
1535 1529 if check == 0x0016:
1536 1530 # REFERENCENAME
  1531 + # Specifies the name of a referenced VBA project or Automation type library.
1537 1532 reference_id = check
1538 1533 reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0]
1539 1534 reference_name = dir_stream.read(reference_sizeof_name)
... ... @@ -1562,6 +1557,8 @@ class VBA_Project(object):
1562 1557  
1563 1558 if check == 0x0033:
1564 1559 # REFERENCEORIGINAL (followed by REFERENCECONTROL)
  1560 + # Specifies the identifier of the Automation type library the containing REFERENCECONTROL’s
  1561 + # (section 2.3.4.2.2.3) twiddled type library was generated from.
1565 1562 referenceoriginal_id = check
1566 1563 referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0]
1567 1564 referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal)
... ... @@ -1571,6 +1568,7 @@ class VBA_Project(object):
1571 1568  
1572 1569 if check == 0x002F:
1573 1570 # REFERENCECONTROL
  1571 + # Specifies a reference to a twiddled type library and its extended type library.
1574 1572 referencecontrol_id = check
1575 1573 referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore
1576 1574 referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0]
... ... @@ -1621,6 +1619,7 @@ class VBA_Project(object):
1621 1619  
1622 1620 if check == 0x000D:
1623 1621 # REFERENCEREGISTERED
  1622 + # Specifies a reference to an Automation type library.
1624 1623 referenceregistered_id = check
1625 1624 referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0]
1626 1625 referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0]
... ... @@ -1636,6 +1635,7 @@ class VBA_Project(object):
1636 1635  
1637 1636 if check == 0x000E:
1638 1637 # REFERENCEPROJECT
  1638 + # Specifies a reference to an external VBA project.
1639 1639 referenceproject_id = check
1640 1640 referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0]
1641 1641 referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0]
... ... @@ -1673,7 +1673,7 @@ class VBA_Project(object):
1673 1673 # self.check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id)
1674 1674 projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0]
1675 1675 self.check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size)
1676   - projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0]
  1676 + self.modules_count = struct.unpack("<H", dir_stream.read(2))[0]
1677 1677 projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0]
1678 1678 self.check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id)
1679 1679 projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0]
... ... @@ -1684,8 +1684,8 @@ class VBA_Project(object):
1684 1684 # short function to simplify unicode text output
1685 1685 uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace')
1686 1686  
1687   - log.debug("parsing {0} modules".format(projectmodules_count))
1688   - for projectmodule_index in xrange(0, projectmodules_count):
  1687 + log.debug("parsing {0} modules".format(self.modules_count))
  1688 + for projectmodule_index in xrange(0, self.modules_count):
1689 1689 try:
1690 1690 modulename_id = struct.unpack("<H", dir_stream.read(2))[0]
1691 1691 self.check_value('MODULENAME_Id', 0x0019, modulename_id)
... ... @@ -1807,7 +1807,7 @@ class VBA_Project(object):
1807 1807  
1808 1808 if code_data is None:
1809 1809 log.info("Could not open stream %d of %d ('VBA/' + one of %r)!"
1810   - % (projectmodule_index, projectmodules_count,
  1810 + % (projectmodule_index, self.modules_count,
1811 1811 '/'.join("'" + uni_out(stream_name) + "'"
1812 1812 for stream_name in try_names)))
1813 1813 if self.relaxed:
... ... @@ -1839,7 +1839,7 @@ class VBA_Project(object):
1839 1839 raise
1840 1840 except Exception as exc:
1841 1841 log.info('Error parsing module {0} of {1} in _extract_vba:'
1842   - .format(projectmodule_index, projectmodules_count),
  1842 + .format(projectmodule_index, self.modules_count),
1843 1843 exc_info=True)
1844 1844 if not self.relaxed:
1845 1845 raise
... ...