Commit fb0dcd592c9067ec5bf9be1df45b2428c49daeb1
1 parent
145f062f
olevba: fixed issue #31 in VBA_Parser.open_mht
Showing
1 changed file
with
21 additions
and
1 deletions
oletools/olevba.py
| ... | ... | @@ -25,7 +25,7 @@ https://github.com/unixfreak0037/officeparser |
| 25 | 25 | |
| 26 | 26 | # === LICENSE ================================================================== |
| 27 | 27 | |
| 28 | -# olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info) | |
| 28 | +# olevba is copyright (c) 2014-2016 Philippe Lagadec (http://www.decalage.info) | |
| 29 | 29 | # All rights reserved. |
| 30 | 30 | # |
| 31 | 31 | # Redistribution and use in source and binary forms, with or without modification, |
| ... | ... | @@ -158,6 +158,7 @@ https://github.com/unixfreak0037/officeparser |
| 158 | 158 | # 2015-11-17 PL: - fixed bug with --decode option |
| 159 | 159 | # 2015-12-16 PL: - fixed bug in main (no options input anymore) |
| 160 | 160 | # - improved logging, added -l option |
| 161 | +# 2016-01-31 PL: - fixed issue #31 in VBA_Parser.open_mht | |
| 161 | 162 | |
| 162 | 163 | __version__ = '0.42' |
| 163 | 164 | |
| ... | ... | @@ -1939,6 +1940,19 @@ class VBA_Parser(object): |
| 1939 | 1940 | # parse the MIME content |
| 1940 | 1941 | # remove any leading whitespace or newline (workaround for issue in email package) |
| 1941 | 1942 | stripped_data = data.lstrip('\r\n\t ') |
| 1943 | + # strip any junk from the beginning of the file | |
| 1944 | + # (issue #31 fix by Greg C - gdigreg) | |
| 1945 | + # TODO: improve keywords to avoid false positives | |
| 1946 | + mime_offset = stripped_data.find('MIME') | |
| 1947 | + content_offset = stripped_data.find('Content') | |
| 1948 | + # if "MIME" is found, and located before "Content": | |
| 1949 | + if -1 < mime_offset <= content_offset: | |
| 1950 | + stripped_data = stripped_data[mime_offset:] | |
| 1951 | + # else if "Content" is found, and before "MIME" | |
| 1952 | + # TODO: can it work without "MIME" at all? | |
| 1953 | + elif content_offset > -1: | |
| 1954 | + stripped_data = stripped_data[content_offset:] | |
| 1955 | + # TODO: quick and dirty fix: insert a standard line with MIME-Version header? | |
| 1942 | 1956 | mhtml = email.message_from_string(stripped_data) |
| 1943 | 1957 | # find all the attached files: |
| 1944 | 1958 | for part in mhtml.walk(): |
| ... | ... | @@ -1966,6 +1980,12 @@ class VBA_Parser(object): |
| 1966 | 1980 | log.exception('Failed decompressing an MSO container in %r - %s' |
| 1967 | 1981 | % (fname, MSG_OLEVBA_ISSUES)) |
| 1968 | 1982 | # TODO: bug here - need to split in smaller functions/classes? |
| 1983 | + else: | |
| 1984 | + try: | |
| 1985 | + log.debug('type(part_data) = %s' % type(part_data)) | |
| 1986 | + log.debug('part_data[0:20] = %r' % part_data[0:20]) | |
| 1987 | + except: | |
| 1988 | + pass | |
| 1969 | 1989 | # set type only if parsing succeeds |
| 1970 | 1990 | self.type = TYPE_MHTML |
| 1971 | 1991 | except: | ... | ... |