Commit a7309e599dc99b2b7bf08f4b7ffb4ca4b15a6364

Authored by decalage2
1 parent 238d0172

olevba: fixed MHTML format support on Python 3 (issue #106)

Showing 1 changed file with 9 additions and 5 deletions
oletools/olevba.py
@@ -2797,12 +2797,12 @@ class VBA_Parser(object): @@ -2797,12 +2797,12 @@ class VBA_Parser(object):
2797 try: 2797 try:
2798 # parse the MIME content 2798 # parse the MIME content
2799 # remove any leading whitespace or newline (workaround for issue in email package) 2799 # remove any leading whitespace or newline (workaround for issue in email package)
2800 - stripped_data = data.lstrip('\r\n\t ') 2800 + stripped_data = data.lstrip(b'\r\n\t ')
2801 # strip any junk from the beginning of the file 2801 # strip any junk from the beginning of the file
2802 # (issue #31 fix by Greg C - gdigreg) 2802 # (issue #31 fix by Greg C - gdigreg)
2803 # TODO: improve keywords to avoid false positives 2803 # TODO: improve keywords to avoid false positives
2804 - mime_offset = stripped_data.find('MIME')  
2805 - content_offset = stripped_data.find('Content') 2804 + mime_offset = stripped_data.find(b'MIME')
  2805 + content_offset = stripped_data.find(b'Content')
2806 # if "MIME" is found, and located before "Content": 2806 # if "MIME" is found, and located before "Content":
2807 if -1 < mime_offset <= content_offset: 2807 if -1 < mime_offset <= content_offset:
2808 stripped_data = stripped_data[mime_offset:] 2808 stripped_data = stripped_data[mime_offset:]
@@ -2811,7 +2811,11 @@ class VBA_Parser(object): @@ -2811,7 +2811,11 @@ class VBA_Parser(object):
2811 elif content_offset > -1: 2811 elif content_offset > -1:
2812 stripped_data = stripped_data[content_offset:] 2812 stripped_data = stripped_data[content_offset:]
2813 # TODO: quick and dirty fix: insert a standard line with MIME-Version header? 2813 # TODO: quick and dirty fix: insert a standard line with MIME-Version header?
2814 - mhtml = email.message_from_string(stripped_data) 2814 + if PYTHON2:
  2815 + mhtml = email.message_from_string(stripped_data)
  2816 + else:
  2817 + # on Python 3, need to use message_from_bytes instead:
  2818 + mhtml = email.message_from_bytes(stripped_data)
2815 # find all the attached files: 2819 # find all the attached files:
2816 for part in mhtml.walk(): 2820 for part in mhtml.walk():
2817 content_type = part.get_content_type() # always returns a value 2821 content_type = part.get_content_type() # always returns a value
@@ -2824,7 +2828,7 @@ class VBA_Parser(object): @@ -2824,7 +2828,7 @@ class VBA_Parser(object):
2824 # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded. 2828 # using the ActiveMime/MSO format (zlib-compressed), and Base64 encoded.
2825 # decompress the zlib data starting at offset 0x32, which is the OLE container: 2829 # decompress the zlib data starting at offset 0x32, which is the OLE container:
2826 # check ActiveMime header: 2830 # check ActiveMime header:
2827 - if isinstance(part_data, str) and is_mso_file(part_data): 2831 + if isinstance(part_data, bytes) and is_mso_file(part_data):
2828 log.debug('Found ActiveMime header, decompressing MSO container') 2832 log.debug('Found ActiveMime header, decompressing MSO container')
2829 try: 2833 try:
2830 ole_data = mso_file_extract(part_data) 2834 ole_data = mso_file_extract(part_data)