Commit fbbad0a2eb0ab3000045217187c68245030f465f

Authored by decalage2
1 parent 0982acb0

olevba: use new codepages module

Showing 1 changed file with 16 additions and 16 deletions
oletools/olevba.py
@@ -313,6 +313,7 @@ from oletools import oleform @@ -313,6 +313,7 @@ from oletools import oleform
313 from oletools import rtfobj 313 from oletools import rtfobj
314 from oletools import oleid 314 from oletools import oleid
315 from oletools.common.errors import FileIsEncryptedError 315 from oletools.common.errors import FileIsEncryptedError
  316 +from oletools.common import codepages
316 317
317 # monkeypatch email to fix issue #32: 318 # monkeypatch email to fix issue #32:
318 # allow header lines without ":" 319 # allow header lines without ":"
@@ -1413,17 +1414,9 @@ class VBA_Project(object): @@ -1413,17 +1414,9 @@ class VBA_Project(object):
1413 projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0] 1414 projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0]
1414 self.check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size) 1415 self.check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size)
1415 self.codepage = struct.unpack("<H", dir_stream.read(2))[0] 1416 self.codepage = struct.unpack("<H", dir_stream.read(2))[0]
1416 - log.debug('Project Code Page: %r' % self.codepage)  
1417 - if self.codepage in MAC_CODEPAGES:  
1418 - self.codec = MAC_CODEPAGES[self.codepage]  
1419 - else:  
1420 - self.codec = 'cp%d' % self.codepage  
1421 - # TODO: check if valid code page or raise a clear exception, and use UTF-8 as default?  
1422 - try:  
1423 - codecs.lookup(self.codec)  
1424 - except LookupError:  
1425 - log.error('Codec not found for code page %d, using UTF-8 as fallback.' % self.codepage)  
1426 - self.codec = 'utf8' 1417 + self.codepage_name = codepages.get_codepage_name(self.codepage)
  1418 + log.debug('Project Code Page: %r - %s' % (self.codepage, self.codepage_name))
  1419 + self.codec = codepages.codepage2codec(self.codepage)
1427 log.debug('Python codec corresponding to code page %d: %s' % (self.codepage, self.codec)) 1420 log.debug('Python codec corresponding to code page %d: %s' % (self.codepage, self.codec))
1428 1421
1429 1422
@@ -1525,6 +1518,7 @@ class VBA_Project(object): @@ -1525,6 +1518,7 @@ class VBA_Project(object):
1525 unused = projectconstants_constants_unicode 1518 unused = projectconstants_constants_unicode
1526 1519
1527 # array of REFERENCE records 1520 # array of REFERENCE records
  1521 + # Specifies a reference to an Automation type library or VBA project.
1528 check = None 1522 check = None
1529 while True: 1523 while True:
1530 check = struct.unpack("<H", dir_stream.read(2))[0] 1524 check = struct.unpack("<H", dir_stream.read(2))[0]
@@ -1534,6 +1528,7 @@ class VBA_Project(object): @@ -1534,6 +1528,7 @@ class VBA_Project(object):
1534 1528
1535 if check == 0x0016: 1529 if check == 0x0016:
1536 # REFERENCENAME 1530 # REFERENCENAME
  1531 + # Specifies the name of a referenced VBA project or Automation type library.
1537 reference_id = check 1532 reference_id = check
1538 reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] 1533 reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0]
1539 reference_name = dir_stream.read(reference_sizeof_name) 1534 reference_name = dir_stream.read(reference_sizeof_name)
@@ -1562,6 +1557,8 @@ class VBA_Project(object): @@ -1562,6 +1557,8 @@ class VBA_Project(object):
1562 1557
1563 if check == 0x0033: 1558 if check == 0x0033:
1564 # REFERENCEORIGINAL (followed by REFERENCECONTROL) 1559 # REFERENCEORIGINAL (followed by REFERENCECONTROL)
  1560 + # Specifies the identifier of the Automation type library the containing REFERENCECONTROL’s
  1561 + # (section 2.3.4.2.2.3) twiddled type library was generated from.
1565 referenceoriginal_id = check 1562 referenceoriginal_id = check
1566 referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0] 1563 referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0]
1567 referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal) 1564 referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal)
@@ -1571,6 +1568,7 @@ class VBA_Project(object): @@ -1571,6 +1568,7 @@ class VBA_Project(object):
1571 1568
1572 if check == 0x002F: 1569 if check == 0x002F:
1573 # REFERENCECONTROL 1570 # REFERENCECONTROL
  1571 + # Specifies a reference to a twiddled type library and its extended type library.
1574 referencecontrol_id = check 1572 referencecontrol_id = check
1575 referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore 1573 referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore
1576 referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] 1574 referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0]
@@ -1621,6 +1619,7 @@ class VBA_Project(object): @@ -1621,6 +1619,7 @@ class VBA_Project(object):
1621 1619
1622 if check == 0x000D: 1620 if check == 0x000D:
1623 # REFERENCEREGISTERED 1621 # REFERENCEREGISTERED
  1622 + # Specifies a reference to an Automation type library.
1624 referenceregistered_id = check 1623 referenceregistered_id = check
1625 referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0] 1624 referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0]
1626 referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] 1625 referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0]
@@ -1636,6 +1635,7 @@ class VBA_Project(object): @@ -1636,6 +1635,7 @@ class VBA_Project(object):
1636 1635
1637 if check == 0x000E: 1636 if check == 0x000E:
1638 # REFERENCEPROJECT 1637 # REFERENCEPROJECT
  1638 + # Specifies a reference to an external VBA project.
1639 referenceproject_id = check 1639 referenceproject_id = check
1640 referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0] 1640 referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0]
1641 referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0] 1641 referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0]
@@ -1673,7 +1673,7 @@ class VBA_Project(object): @@ -1673,7 +1673,7 @@ class VBA_Project(object):
1673 # self.check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id) 1673 # self.check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id)
1674 projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0] 1674 projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0]
1675 self.check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size) 1675 self.check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size)
1676 - projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0] 1676 + self.modules_count = struct.unpack("<H", dir_stream.read(2))[0]
1677 projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0] 1677 projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0]
1678 self.check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id) 1678 self.check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id)
1679 projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0] 1679 projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0]
@@ -1684,8 +1684,8 @@ class VBA_Project(object): @@ -1684,8 +1684,8 @@ class VBA_Project(object):
1684 # short function to simplify unicode text output 1684 # short function to simplify unicode text output
1685 uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace') 1685 uni_out = lambda unicode_text: unicode_text.encode('utf-8', 'replace')
1686 1686
1687 - log.debug("parsing {0} modules".format(projectmodules_count))  
1688 - for projectmodule_index in xrange(0, projectmodules_count): 1687 + log.debug("parsing {0} modules".format(self.modules_count))
  1688 + for projectmodule_index in xrange(0, self.modules_count):
1689 try: 1689 try:
1690 modulename_id = struct.unpack("<H", dir_stream.read(2))[0] 1690 modulename_id = struct.unpack("<H", dir_stream.read(2))[0]
1691 self.check_value('MODULENAME_Id', 0x0019, modulename_id) 1691 self.check_value('MODULENAME_Id', 0x0019, modulename_id)
@@ -1807,7 +1807,7 @@ class VBA_Project(object): @@ -1807,7 +1807,7 @@ class VBA_Project(object):
1807 1807
1808 if code_data is None: 1808 if code_data is None:
1809 log.info("Could not open stream %d of %d ('VBA/' + one of %r)!" 1809 log.info("Could not open stream %d of %d ('VBA/' + one of %r)!"
1810 - % (projectmodule_index, projectmodules_count, 1810 + % (projectmodule_index, self.modules_count,
1811 '/'.join("'" + uni_out(stream_name) + "'" 1811 '/'.join("'" + uni_out(stream_name) + "'"
1812 for stream_name in try_names))) 1812 for stream_name in try_names)))
1813 if self.relaxed: 1813 if self.relaxed:
@@ -1839,7 +1839,7 @@ class VBA_Project(object): @@ -1839,7 +1839,7 @@ class VBA_Project(object):
1839 raise 1839 raise
1840 except Exception as exc: 1840 except Exception as exc:
1841 log.info('Error parsing module {0} of {1} in _extract_vba:' 1841 log.info('Error parsing module {0} of {1} in _extract_vba:'
1842 - .format(projectmodule_index, projectmodules_count), 1842 + .format(projectmodule_index, self.modules_count),
1843 exc_info=True) 1843 exc_info=True)
1844 if not self.relaxed: 1844 if not self.relaxed:
1845 raise 1845 raise