Commit 9e355a9d79489b76a81f9f796bc80e135be92a0a

Authored by Christian Herdtweck
1 parent eb680811

deal with case that offset in mso_file_extract might not be found,

specify some more exceptions in mso_file_extract
Showing 1 changed file with 14 additions and 10 deletions
oletools/olevba.py
... ... @@ -871,25 +871,29 @@ def mso_file_extract(data):
871 871 """
872 872 # check the magic:
873 873 assert is_mso_file(data)
  874 +
  875 + # In all the samples seen so far, Word always uses an offset of 0x32,
  876 + # and Excel 0x22A. But we read the offset from the header to be more
  877 + # generic.
  878 + offsets = [0x32, 0x22A]
  879 +
874 880 # First, attempt to get the compressed data offset from the header
875 881 # According to my tests, it should be an unsigned 16 bits integer,
876 882 # at offset 0x1E (little endian) + add 46:
877 883 try:
878 884 offset = struct.unpack_from('<H', data, offset=0x1E)[0] + 46
879 885 log.debug('Parsing MSO file: data offset = 0x%X' % offset)
880   - except Exception:
881   - log.exception('Unable to parse MSO/ActiveMime file header')
882   - raise RuntimeError('Unable to parse MSO/ActiveMime file header')
883   - # In all the samples seen so far, Word always uses an offset of 0x32,
884   - # and Excel 0x22A. But we read the offset from the header to be more
885   - # generic.
886   - # Let's try that offset, then 0x32 and 0x22A, just in case:
887   - for start in (offset, 0x32, 0x22A):
  886 + offsets.insert(0, offset) # insert at beginning of offsets
  887 + except struct.error as exc:
  888 + log.exception('Unable to parse MSO/ActiveMime file header (%s)' % exc)
  889 + raise MsoExtractionError('Unable to parse MSO/ActiveMime file header')
  890 + # now try offsets
  891 + for start in offsets:
888 892 try:
889 893 log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start)
890 894 extracted_data = zlib.decompress(data[start:])
891 895 return extracted_data
892   - except Exception:
  896 + except zlib.error as exc:
893 897 log.exception('zlib decompression failed')
894 898 # None of the guessed offsets worked, let's try brute-forcing by looking
895 899 # for potential zlib-compressed blocks starting with 0x78:
... ... @@ -900,7 +904,7 @@ def mso_file_extract(data):
900 904 log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start)
901 905 extracted_data = zlib.decompress(data[start:])
902 906 return extracted_data
903   - except Exception:
  907 + except zlib.error as exc:
904 908 log.exception('zlib decompression failed')
905 909 raise MsoExtractionError('Unable to decompress data from a MSO/ActiveMime file')
906 910  
... ...