Commit 56759d612ff666092e21c424e7ad76ec3590e775

Authored by Philippe Lagadec
1 parent 9f45875a

olevba: fixed issue #2, decoding VBA stream names using specified codepage and e…

…nabling unicode stream names in olefile
oletools/olevba.py
@@ -96,6 +96,8 @@ https://github.com/unixfreak0037/officeparser @@ -96,6 +96,8 @@ https://github.com/unixfreak0037/officeparser
96 # - improved list of executable file extensions 96 # - improved list of executable file extensions
97 # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display 97 # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display
98 # 2015-01-08 v0.14 PL: - added hex strings detection and decoding 98 # 2015-01-08 v0.14 PL: - added hex strings detection and decoding
  99 +# - fixed issue #2, decoding VBA stream names using
  100 +# specified codepage and unicode stream names
99 101
100 __version__ = '0.14' 102 __version__ = '0.14'
101 103
@@ -734,12 +736,18 @@ def _extract_vba (ole, vba_root, project_path, dir_path): @@ -734,12 +736,18 @@ def _extract_vba (ole, vba_root, project_path, dir_path):
734 if section_id != None: 736 if section_id != None:
735 logging.warning('unknown or invalid module section id {0:04X}'.format(section_id)) 737 logging.warning('unknown or invalid module section id {0:04X}'.format(section_id))
736 738
  739 + logging.debug('Project CodePage = %d' % PROJECTCODEPAGE_CodePage)
  740 + vba_codec = 'cp%d' % PROJECTCODEPAGE_CodePage
737 logging.debug("ModuleName = {0}".format(MODULENAME_ModuleName)) 741 logging.debug("ModuleName = {0}".format(MODULENAME_ModuleName))
738 - logging.debug("StreamName = {0}".format(MODULESTREAMNAME_StreamName)) 742 + logging.debug("StreamName = {0}".format(repr(MODULESTREAMNAME_StreamName)))
  743 + streamname_unicode = MODULESTREAMNAME_StreamName.decode(vba_codec)
  744 + logging.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode)))
  745 + logging.debug("StreamNameUnicode = {0}".format(repr(MODULESTREAMNAME_StreamNameUnicode)))
739 logging.debug("TextOffset = {0}".format(MODULEOFFSET_TextOffset)) 746 logging.debug("TextOffset = {0}".format(MODULEOFFSET_TextOffset))
740 747
741 - code_path = vba_root + 'VBA/' + MODULESTREAMNAME_StreamName 748 + code_path = vba_root + u'VBA/' + streamname_unicode
742 #TODO: test if stream exists 749 #TODO: test if stream exists
  750 + logging.debug('opening VBA code stream %s' % repr(code_path))
743 code_data = ole.openstream(code_path).read() 751 code_data = ole.openstream(code_path).read()
744 logging.debug("length of code_data = {0}".format(len(code_data))) 752 logging.debug("length of code_data = {0}".format(len(code_data)))
745 logging.debug("offset of code_data = {0}".format(MODULEOFFSET_TextOffset)) 753 logging.debug("offset of code_data = {0}".format(MODULEOFFSET_TextOffset))
@@ -1107,7 +1115,7 @@ def process_file (container, filename, data): @@ -1107,7 +1115,7 @@ def process_file (container, filename, data):
1107 vba_code = filter_vba(vba_code) 1115 vba_code = filter_vba(vba_code)
1108 print '-'*79 1116 print '-'*79
1109 print 'VBA MACRO %s ' % vba_filename 1117 print 'VBA MACRO %s ' % vba_filename
1110 - print 'in file: %s - OLE stream: %s' % (subfilename, stream_path) 1118 + print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
1111 print '- '*39 1119 print '- '*39
1112 # detect empty macros: 1120 # detect empty macros:
1113 if vba_code.strip() == '': 1121 if vba_code.strip() == '':
@@ -1142,6 +1150,7 @@ def process_file (container, filename, data): @@ -1142,6 +1150,7 @@ def process_file (container, filename, data):
1142 print 'No VBA macros found.' 1150 print 'No VBA macros found.'
1143 except: #TypeError: 1151 except: #TypeError:
1144 #raise 1152 #raise
  1153 + #TODO: print more info if debug mode
1145 print sys.exc_value 1154 print sys.exc_value
1146 print '' 1155 print ''
1147 1156
oletools/thirdparty/olefile/olefile.py
@@ -312,7 +312,7 @@ except NameError: @@ -312,7 +312,7 @@ except NameError:
312 312
313 #[PL] Experimental setting: if True, OLE filenames will be kept in Unicode 313 #[PL] Experimental setting: if True, OLE filenames will be kept in Unicode
314 # if False (default PIL behaviour), all filenames are converted to Latin-1. 314 # if False (default PIL behaviour), all filenames are converted to Latin-1.
315 -KEEP_UNICODE_NAMES = False 315 +KEEP_UNICODE_NAMES = True
316 316
317 #=== DEBUGGING =============================================================== 317 #=== DEBUGGING ===============================================================
318 318