Commit 82c4a54d92f41e173d934c30fff795c34a0d5888

Authored by decalage2
1 parent db990fea

olevba: temporary fix for issue #132 - the dir stream could not be parsed on some Mac files

Showing 1 changed file with 29 additions and 18 deletions
oletools/olevba.py
... ... @@ -26,7 +26,7 @@ https://github.com/unixfreak0037/officeparser
26 26  
27 27 # === LICENSE ==================================================================
28 28  
29   -# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info)
  29 +# olevba is copyright (c) 2014-2017 Philippe Lagadec (http://www.decalage.info)
30 30 # All rights reserved.
31 31 #
32 32 # Redistribution and use in source and binary forms, with or without modification,
... ... @@ -188,8 +188,9 @@ from __future__ import print_function
188 188 # 2016-09-12 PL: - enabled packrat to improve pyparsing performance
189 189 # 2016-10-25 PL: - fixed raise and print statements for Python 3
190 190 # 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW
  191 +# 2017-02-07 PL: - temporary fix for issue #132
191 192  
192   -__version__ = '0.51a'
  193 +__version__ = '0.51dev1'
193 194  
194 195 #------------------------------------------------------------------------------
195 196 # TODO:
... ... @@ -1416,9 +1417,12 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1416 1417 reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0]
1417 1418 reference_name = dir_stream.read(reference_sizeof_name)
1418 1419 reference_reserved = struct.unpack("<H", dir_stream.read(2))[0]
1419   - if reference_reserved not in (0x003E, 0x000D):
1420   - raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved',
1421   - (0x003E, 0x000D), reference_reserved)
  1420 + # According to [MS-OVBA] 2.3.4.2.2.2 REFERENCENAME Record:
  1421 + # "Reserved (2 bytes): MUST be 0x003E. MUST be ignored."
  1422 + # So let's ignore it, otherwise it crashes on some files (issue #132)
  1423 + # if reference_reserved not in (0x003E, 0x000D):
  1424 + # raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved',
  1425 + # (0x003E, 0x000D), reference_reserved)
1422 1426 reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0]
1423 1427 reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode)
1424 1428 unused = reference_id
... ... @@ -1442,9 +1446,11 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1442 1446 referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0]
1443 1447 referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled)
1444 1448 referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore
1445   - check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1)
  1449 + # MS-OVBA: "Reserved1 (4 bytes): MUST be 0x00000000. MUST be ignored."
  1450 + # check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1)
1446 1451 referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore
1447   - check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2)
  1452 + # MS-OVBA: "Reserved2 (2 bytes): MUST be 0x0000. MUST be ignored."
  1453 + # check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2)
1448 1454 unused = referencecontrol_id
1449 1455 unused = referencecontrol_sizetwiddled
1450 1456 unused = referencecontrol_libidtwiddled
... ... @@ -1456,8 +1462,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1456 1462 referencecontrol_namerecordextended_name = dir_stream.read(
1457 1463 referencecontrol_namerecordextended_sizeof_name)
1458 1464 referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0]
1459   - check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E,
1460   - referencecontrol_namerecordextended_reserved)
  1465 + # check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E,
  1466 + # referencecontrol_namerecordextended_reserved)
1461 1467 referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0]
1462 1468 referencecontrol_namerecordextended_name_unicode = dir_stream.read(
1463 1469 referencecontrol_namerecordextended_sizeof_name_unicode)
... ... @@ -1468,7 +1474,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1468 1474 else:
1469 1475 referencecontrol_reserved3 = check2
1470 1476  
1471   - check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3)
  1477 + # MS-OVBA: "Reserved3 (2 bytes): MUST be 0x0030. MUST be ignored."
  1478 + # check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3)
1472 1479 referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0]
1473 1480 referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0]
1474 1481 referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended)
... ... @@ -1491,9 +1498,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False):
1491 1498 referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0]
1492 1499 referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid)
1493 1500 referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0]
1494   - check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1)
  1501 + # check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1)
1495 1502 referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0]
1496   - check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2)
  1503 + # check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2)
1497 1504 unused = referenceregistered_id
1498 1505 unused = referenceregistered_size
1499 1506 unused = referenceregistered_libid
... ... @@ -2709,12 +2716,16 @@ class VBA_Parser(object):
2709 2716 vba_stream_ids = set()
2710 2717 for vba_root, project_path, dir_path in self.vba_projects:
2711 2718 # extract all VBA macros from that VBA root storage:
2712   - for stream_path, vba_filename, vba_code in \
2713   - _extract_vba(self.ole_file, vba_root, project_path,
2714   - dir_path, self.relaxed):
2715   - # store direntry ids in a set:
2716   - vba_stream_ids.add(self.ole_file._find(stream_path))
2717   - yield (self.filename, stream_path, vba_filename, vba_code)
  2719 + # The function _extract_vba may fail on some files (issue #132)
  2720 + try:
  2721 + for stream_path, vba_filename, vba_code in \
  2722 + _extract_vba(self.ole_file, vba_root, project_path,
  2723 + dir_path, self.relaxed):
  2724 + # store direntry ids in a set:
  2725 + vba_stream_ids.add(self.ole_file._find(stream_path))
  2726 + yield (self.filename, stream_path, vba_filename, vba_code)
  2727 + except Exception as e:
  2728 + log.exception('Error in _extract_vba')
2718 2729 # Also look for VBA code in any stream including orphans
2719 2730 # (happens in some malformed files)
2720 2731 ole = self.ole_file
... ...