Commit 82c4a54d92f41e173d934c30fff795c34a0d5888
1 parent
db990fea
olevba: temporary fix for issue #132 - the dir stream could not be parsed on some Mac files
Showing
1 changed file
with
29 additions
and
18 deletions
oletools/olevba.py
| ... | ... | @@ -26,7 +26,7 @@ https://github.com/unixfreak0037/officeparser |
| 26 | 26 | |
| 27 | 27 | # === LICENSE ================================================================== |
| 28 | 28 | |
| 29 | -# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info) | |
| 29 | +# olevba is copyright (c) 2014-2017 Philippe Lagadec (http://www.decalage.info) | |
| 30 | 30 | # All rights reserved. |
| 31 | 31 | # |
| 32 | 32 | # Redistribution and use in source and binary forms, with or without modification, |
| ... | ... | @@ -188,8 +188,9 @@ from __future__ import print_function |
| 188 | 188 | # 2016-09-12 PL: - enabled packrat to improve pyparsing performance |
| 189 | 189 | # 2016-10-25 PL: - fixed raise and print statements for Python 3 |
| 190 | 190 | # 2016-11-03 v0.51 PL: - added EnumDateFormats and EnumSystemLanguageGroupsW |
| 191 | +# 2017-02-07 PL: - temporary fix for issue #132 | |
| 191 | 192 | |
| 192 | -__version__ = '0.51a' | |
| 193 | +__version__ = '0.51dev1' | |
| 193 | 194 | |
| 194 | 195 | #------------------------------------------------------------------------------ |
| 195 | 196 | # TODO: |
| ... | ... | @@ -1416,9 +1417,12 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1416 | 1417 | reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] |
| 1417 | 1418 | reference_name = dir_stream.read(reference_sizeof_name) |
| 1418 | 1419 | reference_reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 1419 | - if reference_reserved not in (0x003E, 0x000D): | |
| 1420 | - raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved', | |
| 1421 | - (0x003E, 0x000D), reference_reserved) | |
| 1420 | + # According to [MS-OVBA] 2.3.4.2.2.2 REFERENCENAME Record: | |
| 1421 | + # "Reserved (2 bytes): MUST be 0x003E. MUST be ignored." | |
| 1422 | + # So let's ignore it, otherwise it crashes on some files (issue #132) | |
| 1423 | + # if reference_reserved not in (0x003E, 0x000D): | |
| 1424 | + # raise UnexpectedDataError(dir_path, 'REFERENCE_Reserved', | |
| 1425 | + # (0x003E, 0x000D), reference_reserved) | |
| 1422 | 1426 | reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] |
| 1423 | 1427 | reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode) |
| 1424 | 1428 | unused = reference_id |
| ... | ... | @@ -1442,9 +1446,11 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1442 | 1446 | referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] |
| 1443 | 1447 | referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled) |
| 1444 | 1448 | referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore |
| 1445 | - check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1) | |
| 1449 | + # MS-OVBA: "Reserved1 (4 bytes): MUST be 0x00000000. MUST be ignored." | |
| 1450 | + # check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1) | |
| 1446 | 1451 | referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore |
| 1447 | - check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2) | |
| 1452 | + # MS-OVBA: "Reserved2 (2 bytes): MUST be 0x0000. MUST be ignored." | |
| 1453 | + # check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2) | |
| 1448 | 1454 | unused = referencecontrol_id |
| 1449 | 1455 | unused = referencecontrol_sizetwiddled |
| 1450 | 1456 | unused = referencecontrol_libidtwiddled |
| ... | ... | @@ -1456,8 +1462,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1456 | 1462 | referencecontrol_namerecordextended_name = dir_stream.read( |
| 1457 | 1463 | referencecontrol_namerecordextended_sizeof_name) |
| 1458 | 1464 | referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0] |
| 1459 | - check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, | |
| 1460 | - referencecontrol_namerecordextended_reserved) | |
| 1465 | + # check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, | |
| 1466 | + # referencecontrol_namerecordextended_reserved) | |
| 1461 | 1467 | referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] |
| 1462 | 1468 | referencecontrol_namerecordextended_name_unicode = dir_stream.read( |
| 1463 | 1469 | referencecontrol_namerecordextended_sizeof_name_unicode) |
| ... | ... | @@ -1468,7 +1474,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1468 | 1474 | else: |
| 1469 | 1475 | referencecontrol_reserved3 = check2 |
| 1470 | 1476 | |
| 1471 | - check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3) | |
| 1477 | + # MS-OVBA: "Reserved3 (2 bytes): MUST be 0x0030. MUST be ignored." | |
| 1478 | + # check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3) | |
| 1472 | 1479 | referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0] |
| 1473 | 1480 | referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0] |
| 1474 | 1481 | referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended) |
| ... | ... | @@ -1491,9 +1498,9 @@ def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=False): |
| 1491 | 1498 | referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] |
| 1492 | 1499 | referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid) |
| 1493 | 1500 | referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] |
| 1494 | - check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1) | |
| 1501 | + # check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1) | |
| 1495 | 1502 | referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] |
| 1496 | - check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2) | |
| 1503 | + # check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2) | |
| 1497 | 1504 | unused = referenceregistered_id |
| 1498 | 1505 | unused = referenceregistered_size |
| 1499 | 1506 | unused = referenceregistered_libid |
| ... | ... | @@ -2709,12 +2716,16 @@ class VBA_Parser(object): |
| 2709 | 2716 | vba_stream_ids = set() |
| 2710 | 2717 | for vba_root, project_path, dir_path in self.vba_projects: |
| 2711 | 2718 | # extract all VBA macros from that VBA root storage: |
| 2712 | - for stream_path, vba_filename, vba_code in \ | |
| 2713 | - _extract_vba(self.ole_file, vba_root, project_path, | |
| 2714 | - dir_path, self.relaxed): | |
| 2715 | - # store direntry ids in a set: | |
| 2716 | - vba_stream_ids.add(self.ole_file._find(stream_path)) | |
| 2717 | - yield (self.filename, stream_path, vba_filename, vba_code) | |
| 2719 | + # The function _extract_vba may fail on some files (issue #132) | |
| 2720 | + try: | |
| 2721 | + for stream_path, vba_filename, vba_code in \ | |
| 2722 | + _extract_vba(self.ole_file, vba_root, project_path, | |
| 2723 | + dir_path, self.relaxed): | |
| 2724 | + # store direntry ids in a set: | |
| 2725 | + vba_stream_ids.add(self.ole_file._find(stream_path)) | |
| 2726 | + yield (self.filename, stream_path, vba_filename, vba_code) | |
| 2727 | + except Exception as e: | |
| 2728 | + log.exception('Error in _extract_vba') | |
| 2718 | 2729 | # Also look for VBA code in any stream including orphans |
| 2719 | 2730 | # (happens in some malformed files) |
| 2720 | 2731 | ole = self.ole_file | ... | ... |