Commit fb0dcd592c9067ec5bf9be1df45b2428c49daeb1

Authored by Philippe Lagadec
1 parent 145f062f

olevba: fixed issue #31 in VBA_Parser.open_mht

Showing 1 changed file with 21 additions and 1 deletions
oletools/olevba.py
... ... @@ -25,7 +25,7 @@ https://github.com/unixfreak0037/officeparser
25 25  
26 26 # === LICENSE ==================================================================
27 27  
28   -# olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info)
  28 +# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info)
29 29 # All rights reserved.
30 30 #
31 31 # Redistribution and use in source and binary forms, with or without modification,
... ... @@ -158,6 +158,7 @@ https://github.com/unixfreak0037/officeparser
158 158 # 2015-11-17 PL: - fixed bug with --decode option
159 159 # 2015-12-16 PL: - fixed bug in main (no options input anymore)
160 160 # - improved logging, added -l option
  161 +# 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht
161 162  
162 163 __version__ = '0.42'
163 164  
... ... @@ -1939,6 +1940,19 @@ class VBA_Parser(object):
1939 1940 # parse the MIME content
1940 1941 # remove any leading whitespace or newline (workaround for issue in email package)
1941 1942 stripped_data = data.lstrip('\r\n\t ')
  1943 + # strip any junk from the beginning of the file
  1944 + # (issue #31 fix by Greg C - gdigreg)
  1945 + # TODO: improve keywords to avoid false positives
  1946 + mime_offset = stripped_data.find('MIME')
  1947 + content_offset = stripped_data.find('Content')
  1948 + # if "MIME" is found, and located before "Content":
  1949 + if -1 < mime_offset <= content_offset:
  1950 + stripped_data = stripped_data[mime_offset:]
  1951 + # else if "Content" is found, and before "MIME"
  1952 + # TODO: can it work without "MIME" at all?
  1953 + elif content_offset > -1:
  1954 + stripped_data = stripped_data[content_offset:]
  1955 + # TODO: quick and dirty fix: insert a standard line with MIME-Version header?
1942 1956 mhtml = email.message_from_string(stripped_data)
1943 1957 # find all the attached files:
1944 1958 for part in mhtml.walk():
... ... @@ -1966,6 +1980,12 @@ class VBA_Parser(object):
1966 1980 log.exception('Failed decompressing an MSO container in %r - %s'
1967 1981 % (fname, MSG_OLEVBA_ISSUES))
1968 1982 # TODO: bug here - need to split in smaller functions/classes?
  1983 + else:
  1984 + try:
  1985 + log.debug('type(part_data) = %s' % type(part_data))
  1986 + log.debug('part_data[0:20] = %r' % part_data[0:20])
  1987 + except:
  1988 + pass
1969 1989 # set type only if parsing succeeds
1970 1990 self.type = TYPE_MHTML
1971 1991 except:
... ...