Commit 82c4a54d92f41e173d934c30fff795c34a0d5888
1 parent
db990fea
olevba: temporary fix for issue #132 - the dir stream could not be parsed on some Mac files
Showing
1 changed file
with
29 additions
and
18 deletions
oletools/olevba.py
| @@ -26,7 +26,7 @@ https://github.com/unixfreak0037/officeparser | @@ -26,7 +26,7 @@ https://github.com/unixfreak0037/officeparser | ||
| 26 | 26 | ||
| 27 | # === LICENSE ================================================================== | 27 | # === LICENSE ================================================================== |
| 28 | 28 | ||
| 29 | -# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info) | 29 | +# olevba is copyright (c) 2014-2017 Philippe Lagadec (http://www.decalage.info) |
| 30 | # All rights reserved. | 30 | # All rights reserved. |
| 31 | # | 31 | # |
| 32 | # Redistribution and use in source and binary forms, with or without modification, | 32 | # Redistribution and use in source and binary forms, with or without modification, |
| @@ -188,8 +188,9 @@ from __future__ import print_function | @@ -188,8 +188,9 @@ from __future__ import print_function | ||
| 188 | # 2016-09-12 PL: - enabled packrat to improve pyparsing performance | 188 | # 2016-09-12 PL: - enabled packrat to improve pyparsing performance |
| 189 | # 2016-10-25 PL: - fixed raise and print statements for Python 3 | 189 | # 2016-10-25 PL: - fixed raise and print statements for Python 3 |
| 190 | # 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW | 190 | # 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW |
| 191 | +# 2017-02-07 PL: - temporary fix for issue #132 | ||
| 191 | 192 | ||
| 192 | -__version__ = '0.51a' | 193 | +__version__ = '0.51dev1' |
| 193 | 194 | ||
| 194 | #------------------------------------------------------------------------------ | 195 | #------------------------------------------------------------------------------ |
| 195 | # TODO: | 196 | # TODO: |
| @@ -1416,9 +1417,12 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1416,9 +1417,12 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1416 | reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | 1417 | reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] |
| 1417 | reference_name = dir_stream.read(reference_sizeof_name) | 1418 | reference_name = dir_stream.read(reference_sizeof_name) |
| 1418 | reference_reserved = struct.unpack("<H", dir_stream.read(2))[0] | 1419 | reference_reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 1419 | - if reference_reserved not in (0x003E, 0x000D): | ||
| 1420 | - raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved', | ||
| 1421 | - (0x003E, 0x000D), reference_reserved) | 1420 | + # According to [MS-OVBA] 2.3.4.2.2.2 REFERENCENAME Record: |
| 1421 | + # "Reserved (2 bytes): MUST be 0x003E. MUST be ignored." | ||
| 1422 | + # So let's ignore it, otherwise it crashes on some files (issue #132) | ||
| 1423 | + # if reference_reserved not in (0x003E, 0x000D): | ||
| 1424 | + # raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved', | ||
| 1425 | + # (0x003E, 0x000D), reference_reserved) | ||
| 1422 | reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | 1426 | reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] |
| 1423 | reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode) | 1427 | reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode) |
| 1424 | unused = reference_id | 1428 | unused = reference_id |
| @@ -1442,9 +1446,11 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1442,9 +1446,11 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1442 | referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] | 1446 | referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] |
| 1443 | referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled) | 1447 | referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled) |
| 1444 | referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore | 1448 | referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore |
| 1445 | - check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1) | 1449 | + # MS-OVBA: "Reserved1 (4 bytes): MUST be 0x00000000. MUST be ignored." |
| 1450 | + # check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1) | ||
| 1446 | referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore | 1451 | referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore |
| 1447 | - check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2) | 1452 | + # MS-OVBA: "Reserved2 (2 bytes): MUST be 0x0000. MUST be ignored." |
| 1453 | + # check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2) | ||
| 1448 | unused = referencecontrol_id | 1454 | unused = referencecontrol_id |
| 1449 | unused = referencecontrol_sizetwiddled | 1455 | unused = referencecontrol_sizetwiddled |
| 1450 | unused = referencecontrol_libidtwiddled | 1456 | unused = referencecontrol_libidtwiddled |
| @@ -1456,8 +1462,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1456,8 +1462,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1456 | referencecontrol_namerecordextended_name = dir_stream.read( | 1462 | referencecontrol_namerecordextended_name = dir_stream.read( |
| 1457 | referencecontrol_namerecordextended_sizeof_name) | 1463 | referencecontrol_namerecordextended_sizeof_name) |
| 1458 | referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0] | 1464 | referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 1459 | - check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, | ||
| 1460 | - referencecontrol_namerecordextended_reserved) | 1465 | + # check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, |
| 1466 | + # referencecontrol_namerecordextended_reserved) | ||
| 1461 | referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | 1467 | referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] |
| 1462 | referencecontrol_namerecordextended_name_unicode = dir_stream.read( | 1468 | referencecontrol_namerecordextended_name_unicode = dir_stream.read( |
| 1463 | referencecontrol_namerecordextended_sizeof_name_unicode) | 1469 | referencecontrol_namerecordextended_sizeof_name_unicode) |
| @@ -1468,7 +1474,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1468,7 +1474,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1468 | else: | 1474 | else: |
| 1469 | referencecontrol_reserved3 = check2 | 1475 | referencecontrol_reserved3 = check2 |
| 1470 | 1476 | ||
| 1471 | - check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3) | 1477 | + # MS-OVBA: "Reserved3 (2 bytes): MUST be 0x0030. MUST be ignored." |
| 1478 | + # check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3) | ||
| 1472 | referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0] | 1479 | referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0] |
| 1473 | referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0] | 1480 | referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0] |
| 1474 | referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended) | 1481 | referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended) |
| @@ -1491,9 +1498,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | @@ -1491,9 +1498,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): | ||
| 1491 | referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] | 1498 | referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] |
| 1492 | referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid) | 1499 | referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid) |
| 1493 | referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] | 1500 | referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] |
| 1494 | - check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1) | 1501 | + # check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1) |
| 1495 | referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] | 1502 | referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] |
| 1496 | - check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2) | 1503 | + # check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2) |
| 1497 | unused = referenceregistered_id | 1504 | unused = referenceregistered_id |
| 1498 | unused = referenceregistered_size | 1505 | unused = referenceregistered_size |
| 1499 | unused = referenceregistered_libid | 1506 | unused = referenceregistered_libid |
| @@ -2709,12 +2716,16 @@ class VBA_Parser(object): | @@ -2709,12 +2716,16 @@ class VBA_Parser(object): | ||
| 2709 | vba_stream_ids = set() | 2716 | vba_stream_ids = set() |
| 2710 | for vba_root, project_path, dir_path in self.vba_projects: | 2717 | for vba_root, project_path, dir_path in self.vba_projects: |
| 2711 | # extract all VBA macros from that VBA root storage: | 2718 | # extract all VBA macros from that VBA root storage: |
| 2712 | - for stream_path, vba_filename, vba_code in \ | ||
| 2713 | - _extract_vba(self.ole_file, vba_root, project_path, | ||
| 2714 | - dir_path, self.relaxed): | ||
| 2715 | - # store direntry ids in a set: | ||
| 2716 | - vba_stream_ids.add(self.ole_file._find(stream_path)) | ||
| 2717 | - yield (self.filename, stream_path, vba_filename, vba_code) | 2719 | + # The function _extract_vba may fail on some files (issue #132) |
| 2720 | + try: | ||
| 2721 | + for stream_path, vba_filename, vba_code in \ | ||
| 2722 | + _extract_vba(self.ole_file, vba_root, project_path, | ||
| 2723 | + dir_path, self.relaxed): | ||
| 2724 | + # store direntry ids in a set: | ||
| 2725 | + vba_stream_ids.add(self.ole_file._find(stream_path)) | ||
| 2726 | + yield (self.filename, stream_path, vba_filename, vba_code) | ||
| 2727 | + except Exception as e: | ||
| 2728 | + log.exception('Error in _extract_vba') | ||
| 2718 | # Also look for VBA code in any stream including orphans | 2729 | # Also look for VBA code in any stream including orphans |
| 2719 | # (happens in some malformed files) | 2730 | # (happens in some malformed files) |
| 2720 | ole = self.ole_file | 2731 | ole = self.ole_file |