Commit fb0dcd592c9067ec5bf9be1df45b2428c49daeb1

Authored by Philippe Lagadec
1 parent 145f062f

olevba: fixed issue #31 in VBA_Parser.open_mht

Showing 1 changed file with 21 additions and 1 deletions
oletools/olevba.py
@@ -25,7 +25,7 @@ https://github.com/unixfreak0037/officeparser @@ -25,7 +25,7 @@ https://github.com/unixfreak0037/officeparser
25 25
26 # === LICENSE ================================================================== 26 # === LICENSE ==================================================================
27 27
28 -# olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info) 28 +# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info)
29 # All rights reserved. 29 # All rights reserved.
30 # 30 #
31 # Redistribution and use in source and binary forms, with or without modification, 31 # Redistribution and use in source and binary forms, with or without modification,
@@ -158,6 +158,7 @@ https://github.com/unixfreak0037/officeparser @@ -158,6 +158,7 @@ https://github.com/unixfreak0037/officeparser
158 # 2015-11-17 PL: - fixed bug with --decode option 158 # 2015-11-17 PL: - fixed bug with --decode option
159 # 2015-12-16 PL: - fixed bug in main (no options input anymore) 159 # 2015-12-16 PL: - fixed bug in main (no options input anymore)
160 # - improved logging, added -l option 160 # - improved logging, added -l option
  161 +# 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht
161 162
162 __version__ = '0.42' 163 __version__ = '0.42'
163 164
@@ -1939,6 +1940,19 @@ class VBA_Parser(object): @@ -1939,6 +1940,19 @@ class VBA_Parser(object):
1939 # parse the MIME content 1940 # parse the MIME content
1940 # remove any leading whitespace or newline (workaround for issue in email package) 1941 # remove any leading whitespace or newline (workaround for issue in email package)
1941 stripped_data = data.lstrip('\r\n\t ') 1942 stripped_data = data.lstrip('\r\n\t ')
  1943 + # strip any junk from the beginning of the file
  1944 + # (issue #31 fix by Greg C - gdigreg)
  1945 + # TODO: improve keywords to avoid false positives
  1946 + mime_offset = stripped_data.find('MIME')
  1947 + content_offset = stripped_data.find('Content')
  1948 + # if "MIME" is found, and located before "Content":
  1949 + if -1 < mime_offset <= content_offset:
  1950 + stripped_data = stripped_data[mime_offset:]
  1951 + # else if "Content" is found, and before "MIME"
  1952 + # TODO: can it work without "MIME" at all?
  1953 + elif content_offset > -1:
  1954 + stripped_data = stripped_data[content_offset:]
  1955 + # TODO: quick and dirty fix: insert a standard line with MIME-Version header?
1942 mhtml = email.message_from_string(stripped_data) 1956 mhtml = email.message_from_string(stripped_data)
1943 # find all the attached files: 1957 # find all the attached files:
1944 for part in mhtml.walk(): 1958 for part in mhtml.walk():
@@ -1966,6 +1980,12 @@ class VBA_Parser(object): @@ -1966,6 +1980,12 @@ class VBA_Parser(object):
1966 log.exception('Failed decompressing an MSO container in %r - %s' 1980 log.exception('Failed decompressing an MSO container in %r - %s'
1967 % (fname, MSG_OLEVBA_ISSUES)) 1981 % (fname, MSG_OLEVBA_ISSUES))
1968 # TODO: bug here - need to split in smaller functions/classes? 1982 # TODO: bug here - need to split in smaller functions/classes?
  1983 + else:
  1984 + try:
  1985 + log.debug('type(part_data) = %s' % type(part_data))
  1986 + log.debug('part_data[0:20] = %r' % part_data[0:20])
  1987 + except:
  1988 + pass
1969 # set type only if parsing succeeds 1989 # set type only if parsing succeeds
1970 self.type = TYPE_MHTML 1990 self.type = TYPE_MHTML
1971 except: 1991 except: