Commit fb0dcd592c9067ec5bf9be1df45b2428c49daeb1
1 parent
145f062f
olevba: fixed issue #31 in VBA_Parser.open_mht
Showing
1 changed file
with
21 additions
and
1 deletions
oletools/olevba.py
| @@ -25,7 +25,7 @@ https://github.com/unixfreak0037/officeparser | @@ -25,7 +25,7 @@ https://github.com/unixfreak0037/officeparser | ||
| 25 | 25 | ||
| 26 | # === LICENSE ================================================================== | 26 | # === LICENSE ================================================================== |
| 27 | 27 | ||
| 28 | -# olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info) | 28 | +# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info) |
| 29 | # All rights reserved. | 29 | # All rights reserved. |
| 30 | # | 30 | # |
| 31 | # Redistribution and use in source and binary forms, with or without modification, | 31 | # Redistribution and use in source and binary forms, with or without modification, |
| @@ -158,6 +158,7 @@ https://github.com/unixfreak0037/officeparser | @@ -158,6 +158,7 @@ https://github.com/unixfreak0037/officeparser | ||
| 158 | # 2015-11-17 PL: - fixed bug with --decode option | 158 | # 2015-11-17 PL: - fixed bug with --decode option |
| 159 | # 2015-12-16 PL: - fixed bug in main (no options input anymore) | 159 | # 2015-12-16 PL: - fixed bug in main (no options input anymore) |
| 160 | # - improved logging, added -l option | 160 | # - improved logging, added -l option |
| 161 | +# 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht | ||
| 161 | 162 | ||
| 162 | __version__ = '0.42' | 163 | __version__ = '0.42' |
| 163 | 164 | ||
| @@ -1939,6 +1940,19 @@ class VBA_Parser(object): | @@ -1939,6 +1940,19 @@ class VBA_Parser(object): | ||
| 1939 | # parse the MIME content | 1940 | # parse the MIME content |
| 1940 | # remove any leading whitespace or newline (workaround for issue in email package) | 1941 | # remove any leading whitespace or newline (workaround for issue in email package) |
| 1941 | stripped_data = data.lstrip('\r\n\t ') | 1942 | stripped_data = data.lstrip('\r\n\t ') |
| 1943 | + # strip any junk from the beginning of the file | ||
| 1944 | + # (issue #31 fix by Greg C - gdigreg) | ||
| 1945 | + # TODO: improve keywords to avoid false positives | ||
| 1946 | + mime_offset = stripped_data.find('MIME') | ||
| 1947 | + content_offset = stripped_data.find('Content') | ||
| 1948 | + # if "MIME" is found, and located before "Content": | ||
| 1949 | + if -1 < mime_offset <= content_offset: | ||
| 1950 | + stripped_data = stripped_data[mime_offset:] | ||
| 1951 | + # else if "Content" is found, and before "MIME" | ||
| 1952 | + # TODO: can it work without "MIME" at all? | ||
| 1953 | + elif content_offset > -1: | ||
| 1954 | + stripped_data = stripped_data[content_offset:] | ||
| 1955 | + # TODO: quick and dirty fix: insert a standard line with MIME-Version header? | ||
| 1942 | mhtml = email.message_from_string(stripped_data) | 1956 | mhtml = email.message_from_string(stripped_data) |
| 1943 | # find all the attached files: | 1957 | # find all the attached files: |
| 1944 | for part in mhtml.walk(): | 1958 | for part in mhtml.walk(): |
| @@ -1966,6 +1980,12 @@ class VBA_Parser(object): | @@ -1966,6 +1980,12 @@ class VBA_Parser(object): | ||
| 1966 | log.exception('Failed decompressing an MSO container in %r - %s' | 1980 | log.exception('Failed decompressing an MSO container in %r - %s' |
| 1967 | % (fname, MSG_OLEVBA_ISSUES)) | 1981 | % (fname, MSG_OLEVBA_ISSUES)) |
| 1968 | # TODO: bug here - need to split in smaller functions/classes? | 1982 | # TODO: bug here - need to split in smaller functions/classes? |
| 1983 | + else: | ||
| 1984 | + try: | ||
| 1985 | + log.debug('type(part_data) = %s' % type(part_data)) | ||
| 1986 | + log.debug('part_data[0:20] = %r' % part_data[0:20]) | ||
| 1987 | + except: | ||
| 1988 | + pass | ||
| 1969 | # set type only if parsing succeeds | 1989 | # set type only if parsing succeeds |
| 1970 | self.type = TYPE_MHTML | 1990 | self.type = TYPE_MHTML |
| 1971 | except: | 1991 | except: |