Commit 82c4a54d92f41e173d934c30fff795c34a0d5888

Authored by decalage2
1 parent db990fea

olevba: temporary fix for issue #132 - the dir stream could not be parsed on some Mac files

Showing 1 changed file with 29 additions and 18 deletions
oletools/olevba.py
@@ -26,7 +26,7 @@ https://github.com/unixfreak0037/officeparser @@ -26,7 +26,7 @@ https://github.com/unixfreak0037/officeparser
26 26
27 # === LICENSE ================================================================== 27 # === LICENSE ==================================================================
28 28
29 -# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info) 29 +# olevba is copyright (c) 2014-2017 Philippe Lagadec (http://www.decalage.info)
30 # All rights reserved. 30 # All rights reserved.
31 # 31 #
32 # Redistribution and use in source and binary forms, with or without modification, 32 # Redistribution and use in source and binary forms, with or without modification,
@@ -188,8 +188,9 @@ from __future__ import print_function @@ -188,8 +188,9 @@ from __future__ import print_function
188 # 2016-09-12 PL: - enabled packrat to improve pyparsing performance 188 # 2016-09-12 PL: - enabled packrat to improve pyparsing performance
189 # 2016-10-25 PL: - fixed raise and print statements for Python 3 189 # 2016-10-25 PL: - fixed raise and print statements for Python 3
190 # 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW 190 # 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW
  191 +# 2017-02-07 PL: - temporary fix for issue #132
191 192
192 -__version__ = '0.51a' 193 +__version__ = '0.51dev1'
193 194
194 #------------------------------------------------------------------------------ 195 #------------------------------------------------------------------------------
195 # TODO: 196 # TODO:
@@ -1416,9 +1417,12 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1416,9 +1417,12 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1416 reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] 1417 reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0]
1417 reference_name = dir_stream.read(reference_sizeof_name) 1418 reference_name = dir_stream.read(reference_sizeof_name)
1418 reference_reserved = struct.unpack("<H", dir_stream.read(2))[0] 1419 reference_reserved = struct.unpack("<H", dir_stream.read(2))[0]
1419 - if reference_reserved not in (0x003E, 0x000D):  
1420 - raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved',  
1421 - (0x003E, 0x000D), reference_reserved) 1420 + # According to [MS-OVBA] 2.3.4.2.2.2 REFERENCENAME Record:
  1421 + # "Reserved (2 bytes): MUST be 0x003E. MUST be ignored."
  1422 + # So let's ignore it, otherwise it crashes on some files (issue #132)
  1423 + # if reference_reserved not in (0x003E, 0x000D):
  1424 + # raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved',
  1425 + # (0x003E, 0x000D), reference_reserved)
1422 reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] 1426 reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0]
1423 reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode) 1427 reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode)
1424 unused = reference_id 1428 unused = reference_id
@@ -1442,9 +1446,11 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1442,9 +1446,11 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1442 referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] 1446 referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0]
1443 referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled) 1447 referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled)
1444 referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore 1448 referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore
1445 - check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1) 1449 + # MS-OVBA: "Reserved1 (4 bytes): MUST be 0x00000000. MUST be ignored."
  1450 + # check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1)
1446 referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore 1451 referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore
1447 - check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2) 1452 + # MS-OVBA: "Reserved2 (2 bytes): MUST be 0x0000. MUST be ignored."
  1453 + # check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2)
1448 unused = referencecontrol_id 1454 unused = referencecontrol_id
1449 unused = referencecontrol_sizetwiddled 1455 unused = referencecontrol_sizetwiddled
1450 unused = referencecontrol_libidtwiddled 1456 unused = referencecontrol_libidtwiddled
@@ -1456,8 +1462,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1456,8 +1462,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1456 referencecontrol_namerecordextended_name = dir_stream.read( 1462 referencecontrol_namerecordextended_name = dir_stream.read(
1457 referencecontrol_namerecordextended_sizeof_name) 1463 referencecontrol_namerecordextended_sizeof_name)
1458 referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0] 1464 referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0]
1459 - check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E,  
1460 - referencecontrol_namerecordextended_reserved) 1465 + # check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E,
  1466 + # referencecontrol_namerecordextended_reserved)
1461 referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] 1467 referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0]
1462 referencecontrol_namerecordextended_name_unicode = dir_stream.read( 1468 referencecontrol_namerecordextended_name_unicode = dir_stream.read(
1463 referencecontrol_namerecordextended_sizeof_name_unicode) 1469 referencecontrol_namerecordextended_sizeof_name_unicode)
@@ -1468,7 +1474,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1468,7 +1474,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1468 else: 1474 else:
1469 referencecontrol_reserved3 = check2 1475 referencecontrol_reserved3 = check2
1470 1476
1471 - check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3) 1477 + # MS-OVBA: "Reserved3 (2 bytes): MUST be 0x0030. MUST be ignored."
  1478 + # check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3)
1472 referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0] 1479 referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0]
1473 referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0] 1480 referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0]
1474 referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended) 1481 referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended)
@@ -1491,9 +1498,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): @@ -1491,9 +1498,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1491 referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] 1498 referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0]
1492 referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid) 1499 referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid)
1493 referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] 1500 referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0]
1494 - check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1) 1501 + # check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1)
1495 referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] 1502 referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0]
1496 - check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2) 1503 + # check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2)
1497 unused = referenceregistered_id 1504 unused = referenceregistered_id
1498 unused = referenceregistered_size 1505 unused = referenceregistered_size
1499 unused = referenceregistered_libid 1506 unused = referenceregistered_libid
@@ -2709,12 +2716,16 @@ class VBA_Parser(object): @@ -2709,12 +2716,16 @@ class VBA_Parser(object):
2709 vba_stream_ids = set() 2716 vba_stream_ids = set()
2710 for vba_root, project_path, dir_path in self.vba_projects: 2717 for vba_root, project_path, dir_path in self.vba_projects:
2711 # extract all VBA macros from that VBA root storage: 2718 # extract all VBA macros from that VBA root storage:
2712 - for stream_path, vba_filename, vba_code in \  
2713 - _extract_vba(self.ole_file, vba_root, project_path,  
2714 - dir_path, self.relaxed):  
2715 - # store direntry ids in a set:  
2716 - vba_stream_ids.add(self.ole_file._find(stream_path))  
2717 - yield (self.filename, stream_path, vba_filename, vba_code) 2719 + # The function _extract_vba may fail on some files (issue #132)
  2720 + try:
  2721 + for stream_path, vba_filename, vba_code in \
  2722 + _extract_vba(self.ole_file, vba_root, project_path,
  2723 + dir_path, self.relaxed):
  2724 + # store direntry ids in a set:
  2725 + vba_stream_ids.add(self.ole_file._find(stream_path))
  2726 + yield (self.filename, stream_path, vba_filename, vba_code)
  2727 + except Exception as e:
  2728 + log.exception('Error in _extract_vba')
2718 # Also look for VBA code in any stream including orphans 2729 # Also look for VBA code in any stream including orphans
2719 # (happens in some malformed files) 2730 # (happens in some malformed files)
2720 ole = self.ole_file 2731 ole = self.ole_file