Commit ffa04426f10dfe4cd8a805fe35a85f6121694213

Authored by Philippe Lagadec
2 parents 5e019d00 b0033e5f

olevba: many improvements and fixes by Christian Herdtweck (exit code, exception…

… handling, JSON output)
oletools/olevba.py
@@ -76,7 +76,7 @@ https://github.com/unixfreak0037/officeparser @@ -76,7 +76,7 @@ https://github.com/unixfreak0037/officeparser
76 # CHANGELOG: 76 # CHANGELOG:
77 # 2014-08-05 v0.01 PL: - first version based on officeparser code 77 # 2014-08-05 v0.01 PL: - first version based on officeparser code
78 # 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser 78 # 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser
79 -# 2014-08-15 PL: - fixed incorrect value check in PROJECTHELPFILEPATH Record 79 +# 2014-08-15 PL: - fixed incorrect value check in projecthelpfilepath Record
80 # 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats 80 # 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats
81 # and to find the VBA project root anywhere in the file 81 # and to find the VBA project root anywhere in the file
82 # 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL 82 # 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL
@@ -169,6 +169,9 @@ https://github.com/unixfreak0037/officeparser @@ -169,6 +169,9 @@ https://github.com/unixfreak0037/officeparser
169 # 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate 169 # 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate
170 # - updated suspicious keywords 170 # - updated suspicious keywords
171 # 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans 171 # 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans
  172 +# 2016-04-28 CH: - return an exit code depending on the results
  173 +# - improved error and exception handling
  174 +# - improved JSON output
172 175
173 __version__ = '0.47' 176 __version__ = '0.47'
174 177
@@ -212,10 +215,8 @@ import math @@ -212,10 +215,8 @@ import math
212 import zipfile 215 import zipfile
213 import re 216 import re
214 import optparse 217 import optparse
215 -import os.path  
216 import binascii 218 import binascii
217 import base64 219 import base64
218 -import traceback  
219 import zlib 220 import zlib
220 import email # for MHTML parsing 221 import email # for MHTML parsing
221 import string # for printable 222 import string # for printable
@@ -240,8 +241,12 @@ except ImportError: @@ -240,8 +241,12 @@ except ImportError:
240 241
241 import thirdparty.olefile as olefile 242 import thirdparty.olefile as olefile
242 from thirdparty.prettytable import prettytable 243 from thirdparty.prettytable import prettytable
243 -from thirdparty.xglob import xglob  
244 -from thirdparty.pyparsing.pyparsing import * 244 +from thirdparty.xglob import xglob, PathNotFoundException
  245 +from thirdparty.pyparsing.pyparsing import \
  246 + CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \
  247 + Optional, QuotedString,Regex, Suppress, Word, WordStart, \
  248 + alphanums, alphas, hexnums,nums, opAssoc, srange, \
  249 + infixNotation
245 250
246 # monkeypatch email to fix issue #32: 251 # monkeypatch email to fix issue #32:
247 # allow header lines without ":" 252 # allow header lines without ":"
@@ -291,8 +296,51 @@ def get_logger(name, level=logging.CRITICAL+1): @@ -291,8 +296,51 @@ def get_logger(name, level=logging.CRITICAL+1):
291 log = get_logger('olevba') 296 log = get_logger('olevba')
292 297
293 298
  299 +#=== EXCEPTIONS ==============================================================
  300 +
  301 +class FileOpenError(Exception):
  302 + """ raised by VBA_Parser constructor if all open_... attempts failed
  303 +
  304 + probably means the file type is not supported
  305 + """
  306 +
  307 + def __init__(self, filename):
  308 + super(FileOpenError, self).__init__(
  309 + 'Failed to open file %s ... probably not supported' % filename)
  310 + self.filename = filename
  311 +
  312 +
  313 +class ProcessingError(Exception):
  314 + """ raised by VBA_Parser.process_file* functions """
  315 +
  316 + def __init__(self, filename, orig_exception):
  317 + super(ProcessingError, self).__init__(
  318 + 'Error processing file %s (%s)' % (filename, orig_exception))
  319 + self.filename = filename
  320 + self.orig_exception = orig_exception
  321 +
  322 +
  323 +class MsoExtractionError(RuntimeError):
  324 + """ raised by mso_file_extract if parsing MSO/ActiveMIME data failed """
  325 +
  326 + def __init__(self, msg):
  327 + super(MsoExtractionError, self).__init__(msg)
  328 + self.msg = msg
  329 +
  330 +
294 #--- CONSTANTS ---------------------------------------------------------------- 331 #--- CONSTANTS ----------------------------------------------------------------
295 332
  333 +# return codes
  334 +RETURN_OK = 0
  335 +RETURN_WARNINGS = 1 # (reserved, not used yet)
  336 +RETURN_WRONG_ARGS = 2 # (fixed, built into optparse)
  337 +RETURN_FILE_NOT_FOUND = 3
  338 +RETURN_XGLOB_ERR = 4
  339 +RETURN_OPEN_ERROR = 5
  340 +RETURN_PARSE_ERROR = 6
  341 +RETURN_SEVERAL_ERRS = 7
  342 +RETURN_UNEXPECTED = 8
  343 +
296 # URL and message to report issues: 344 # URL and message to report issues:
297 URL_OLEVBA_ISSUES = 'https://bitbucket.org/decalage/oletools/issues' 345 URL_OLEVBA_ISSUES = 'https://bitbucket.org/decalage/oletools/issues'
298 MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES 346 MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES
@@ -846,36 +894,37 @@ def mso_file_extract(data): @@ -846,36 +894,37 @@ def mso_file_extract(data):
846 :param data: bytes string, MSO/ActiveMime file content 894 :param data: bytes string, MSO/ActiveMime file content
847 :return: bytes string, extracted data (uncompressed) 895 :return: bytes string, extracted data (uncompressed)
848 896
849 - raise a RuntimeError if the data cannot be extracted 897 + raise a MsoExtractionError if the data cannot be extracted
850 """ 898 """
851 # check the magic: 899 # check the magic:
852 assert is_mso_file(data) 900 assert is_mso_file(data)
  901 +
  902 + # In all the samples seen so far, Word always uses an offset of 0x32,
  903 + # and Excel 0x22A. But we read the offset from the header to be more
  904 + # generic.
  905 + offsets = [0x32, 0x22A]
  906 +
853 # First, attempt to get the compressed data offset from the header 907 # First, attempt to get the compressed data offset from the header
854 # According to my tests, it should be an unsigned 16 bits integer, 908 # According to my tests, it should be an unsigned 16 bits integer,
855 # at offset 0x1E (little endian) + add 46: 909 # at offset 0x1E (little endian) + add 46:
856 try: 910 try:
857 offset = struct.unpack_from('<H', data, offset=0x1E)[0] + 46 911 offset = struct.unpack_from('<H', data, offset=0x1E)[0] + 46
858 log.debug('Parsing MSO file: data offset = 0x%X' % offset) 912 log.debug('Parsing MSO file: data offset = 0x%X' % offset)
859 - except KeyboardInterrupt:  
860 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
861 - raise  
862 - except:  
863 - log.exception('Unable to parse MSO/ActiveMime file header')  
864 - raise RuntimeError('Unable to parse MSO/ActiveMime file header')  
865 - # In all the samples seen so far, Word always uses an offset of 0x32,  
866 - # and Excel 0x22A. But we read the offset from the header to be more  
867 - # generic.  
868 - # Let's try that offset, then 0x32 and 0x22A, just in case:  
869 - for start in (offset, 0x32, 0x22A): 913 + offsets.insert(0, offset) # insert at beginning of offsets
  914 + except struct.error as exc:
  915 + log.info('Unable to parse MSO/ActiveMime file header (%s)' % exc)
  916 + log.debug('Trace:', exc_info=True)
  917 + raise MsoExtractionError('Unable to parse MSO/ActiveMime file header')
  918 + # now try offsets
  919 + for start in offsets:
870 try: 920 try:
871 log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start) 921 log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start)
872 extracted_data = zlib.decompress(data[start:]) 922 extracted_data = zlib.decompress(data[start:])
873 return extracted_data 923 return extracted_data
874 - except KeyboardInterrupt:  
875 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
876 - raise  
877 - except:  
878 - log.exception('zlib decompression failed') 924 + except zlib.error as exc:
  925 + log.info('zlib decompression failed for offset %s (%s)'
  926 + % (start, exc))
  927 + log.debug('Trace:', exc_info=True)
879 # None of the guessed offsets worked, let's try brute-forcing by looking 928 # None of the guessed offsets worked, let's try brute-forcing by looking
880 # for potential zlib-compressed blocks starting with 0x78: 929 # for potential zlib-compressed blocks starting with 0x78:
881 log.debug('Looking for potential zlib-compressed blocks in MSO file') 930 log.debug('Looking for potential zlib-compressed blocks in MSO file')
@@ -885,12 +934,10 @@ def mso_file_extract(data): @@ -885,12 +934,10 @@ def mso_file_extract(data):
885 log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start) 934 log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start)
886 extracted_data = zlib.decompress(data[start:]) 935 extracted_data = zlib.decompress(data[start:])
887 return extracted_data 936 return extracted_data
888 - except KeyboardInterrupt:  
889 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
890 - raise  
891 - except:  
892 - log.exception('zlib decompression failed')  
893 - raise RuntimeError('Unable to decompress data from a MSO/ActiveMime file') 937 + except zlib.error as exc:
  938 + log.info('zlib decompression failed (%s)' % exc)
  939 + log.debug('Trace:', exc_info=True)
  940 + raise MsoExtractionError('Unable to decompress data from a MSO/ActiveMime file')
894 941
895 942
896 #--- FUNCTIONS ---------------------------------------------------------------- 943 #--- FUNCTIONS ----------------------------------------------------------------
@@ -911,29 +958,6 @@ def is_printable(s): @@ -911,29 +958,6 @@ def is_printable(s):
911 return set(s).issubset(_PRINTABLE_SET) 958 return set(s).issubset(_PRINTABLE_SET)
912 959
913 960
914 -def print_json(j):  
915 - """  
916 - Print a dictionary, a list or any other object to stdout  
917 - :param j: object to be printed  
918 - :return:  
919 - """  
920 - if isinstance(j, dict):  
921 - for key, val in j.items():  
922 - print_json(key)  
923 - print_json(val)  
924 - elif isinstance(j, list):  
925 - for elem in j:  
926 - print_json(elem)  
927 - else:  
928 - try:  
929 - if len(j) > 20:  
930 - print type(j), repr(j[:20]), '...(len {0})'.format(len(j))  
931 - else:  
932 - print type(j), repr(j)  
933 - except TypeError:  
934 - print type(j), repr(j)  
935 -  
936 -  
937 def copytoken_help(decompressed_current, decompressed_chunk_start): 961 def copytoken_help(decompressed_current, decompressed_chunk_start):
938 """ 962 """
939 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help 963 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help
@@ -1057,7 +1081,7 @@ def decompress_stream(compressed_container): @@ -1057,7 +1081,7 @@ def decompress_stream(compressed_container):
1057 copy_token = \ 1081 copy_token = \
1058 struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0] 1082 struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0]
1059 #TODO: check this 1083 #TODO: check this
1060 - length_mask, offset_mask, bit_count, maximum_length = copytoken_help( 1084 + length_mask, offset_mask, bit_count, _ = copytoken_help(
1061 len(decompressed_container), decompressed_chunk_start) 1085 len(decompressed_container), decompressed_chunk_start)
1062 length = (copy_token & length_mask) + 3 1086 length = (copy_token & length_mask) + 3
1063 temp1 = copy_token & offset_mask 1087 temp1 = copy_token & offset_mask
@@ -1136,122 +1160,130 @@ def _extract_vba(ole, vba_root, project_path, dir_path): @@ -1136,122 +1160,130 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1136 dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed)) 1160 dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed))
1137 1161
1138 # PROJECTSYSKIND Record 1162 # PROJECTSYSKIND Record
1139 - PROJECTSYSKIND_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1140 - check_value('PROJECTSYSKIND_Id', 0x0001, PROJECTSYSKIND_Id)  
1141 - PROJECTSYSKIND_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1142 - check_value('PROJECTSYSKIND_Size', 0x0004, PROJECTSYSKIND_Size)  
1143 - PROJECTSYSKIND_SysKind = struct.unpack("<L", dir_stream.read(4))[0]  
1144 - if PROJECTSYSKIND_SysKind == 0x00: 1163 + projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0]
  1164 + check_value('PROJECTSYSKIND_Id', 0x0001, projectsyskind_id)
  1165 + projectsyskind_size = struct.unpack("<L", dir_stream.read(4))[0]
  1166 + check_value('PROJECTSYSKIND_Size', 0x0004, projectsyskind_size)
  1167 + projectsyskind_syskind = struct.unpack("<L", dir_stream.read(4))[0]
  1168 + if projectsyskind_syskind == 0x00:
1145 log.debug("16-bit Windows") 1169 log.debug("16-bit Windows")
1146 - elif PROJECTSYSKIND_SysKind == 0x01: 1170 + elif projectsyskind_syskind == 0x01:
1147 log.debug("32-bit Windows") 1171 log.debug("32-bit Windows")
1148 - elif PROJECTSYSKIND_SysKind == 0x02: 1172 + elif projectsyskind_syskind == 0x02:
1149 log.debug("Macintosh") 1173 log.debug("Macintosh")
1150 - elif PROJECTSYSKIND_SysKind == 0x03: 1174 + elif projectsyskind_syskind == 0x03:
1151 log.debug("64-bit Windows") 1175 log.debug("64-bit Windows")
1152 else: 1176 else:
1153 - log.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(PROJECTSYSKIND_SysKind)) 1177 + log.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(projectsyskind_syskind))
1154 1178
1155 # PROJECTLCID Record 1179 # PROJECTLCID Record
1156 - PROJECTLCID_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1157 - check_value('PROJECTLCID_Id', 0x0002, PROJECTLCID_Id)  
1158 - PROJECTLCID_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1159 - check_value('PROJECTLCID_Size', 0x0004, PROJECTLCID_Size)  
1160 - PROJECTLCID_Lcid = struct.unpack("<L", dir_stream.read(4))[0]  
1161 - check_value('PROJECTLCID_Lcid', 0x409, PROJECTLCID_Lcid) 1180 + projectlcid_id = struct.unpack("<H", dir_stream.read(2))[0]
  1181 + check_value('PROJECTLCID_Id', 0x0002, projectlcid_id)
  1182 + projectlcid_size = struct.unpack("<L", dir_stream.read(4))[0]
  1183 + check_value('PROJECTLCID_Size', 0x0004, projectlcid_size)
  1184 + projectlcid_lcid = struct.unpack("<L", dir_stream.read(4))[0]
  1185 + check_value('PROJECTLCID_Lcid', 0x409, projectlcid_lcid)
1162 1186
1163 # PROJECTLCIDINVOKE Record 1187 # PROJECTLCIDINVOKE Record
1164 - PROJECTLCIDINVOKE_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1165 - check_value('PROJECTLCIDINVOKE_Id', 0x0014, PROJECTLCIDINVOKE_Id)  
1166 - PROJECTLCIDINVOKE_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1167 - check_value('PROJECTLCIDINVOKE_Size', 0x0004, PROJECTLCIDINVOKE_Size)  
1168 - PROJECTLCIDINVOKE_LcidInvoke = struct.unpack("<L", dir_stream.read(4))[0]  
1169 - check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, PROJECTLCIDINVOKE_LcidInvoke) 1188 + projectlcidinvoke_id = struct.unpack("<H", dir_stream.read(2))[0]
  1189 + check_value('PROJECTLCIDINVOKE_Id', 0x0014, projectlcidinvoke_id)
  1190 + projectlcidinvoke_size = struct.unpack("<L", dir_stream.read(4))[0]
  1191 + check_value('PROJECTLCIDINVOKE_Size', 0x0004, projectlcidinvoke_size)
  1192 + projectlcidinvoke_lcidinvoke = struct.unpack("<L", dir_stream.read(4))[0]
  1193 + check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, projectlcidinvoke_lcidinvoke)
1170 1194
1171 # PROJECTCODEPAGE Record 1195 # PROJECTCODEPAGE Record
1172 - PROJECTCODEPAGE_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1173 - check_value('PROJECTCODEPAGE_Id', 0x0003, PROJECTCODEPAGE_Id)  
1174 - PROJECTCODEPAGE_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1175 - check_value('PROJECTCODEPAGE_Size', 0x0002, PROJECTCODEPAGE_Size)  
1176 - PROJECTCODEPAGE_CodePage = struct.unpack("<H", dir_stream.read(2))[0] 1196 + projectcodepage_id = struct.unpack("<H", dir_stream.read(2))[0]
  1197 + check_value('PROJECTCODEPAGE_Id', 0x0003, projectcodepage_id)
  1198 + projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0]
  1199 + check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size)
  1200 + projectcodepage_codepage = struct.unpack("<H", dir_stream.read(2))[0]
1177 1201
1178 # PROJECTNAME Record 1202 # PROJECTNAME Record
1179 - PROJECTNAME_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1180 - check_value('PROJECTNAME_Id', 0x0004, PROJECTNAME_Id)  
1181 - PROJECTNAME_SizeOfProjectName = struct.unpack("<L", dir_stream.read(4))[0]  
1182 - if PROJECTNAME_SizeOfProjectName < 1 or PROJECTNAME_SizeOfProjectName > 128:  
1183 - log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(PROJECTNAME_SizeOfProjectName))  
1184 - PROJECTNAME_ProjectName = dir_stream.read(PROJECTNAME_SizeOfProjectName) 1203 + projectname_id = struct.unpack("<H", dir_stream.read(2))[0]
  1204 + check_value('PROJECTNAME_Id', 0x0004, projectname_id)
  1205 + projectname_sizeof_projectname = struct.unpack("<L", dir_stream.read(4))[0]
  1206 + if projectname_sizeof_projectname < 1 or projectname_sizeof_projectname > 128:
  1207 + log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(projectname_sizeof_projectname))
  1208 + projectname_projectname = dir_stream.read(projectname_sizeof_projectname)
  1209 + unused = projectname_projectname
1185 1210
1186 # PROJECTDOCSTRING Record 1211 # PROJECTDOCSTRING Record
1187 - PROJECTDOCSTRING_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1188 - check_value('PROJECTDOCSTRING_Id', 0x0005, PROJECTDOCSTRING_Id)  
1189 - PROJECTDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0]  
1190 - if PROJECTNAME_SizeOfProjectName > 2000: 1212 + projectdocstring_id = struct.unpack("<H", dir_stream.read(2))[0]
  1213 + check_value('PROJECTDOCSTRING_Id', 0x0005, projectdocstring_id)
  1214 + projectdocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0]
  1215 + if projectdocstring_sizeof_docstring > 2000:
1191 log.error( 1216 log.error(
1192 - "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString))  
1193 - PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString)  
1194 - PROJECTDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0]  
1195 - check_value('PROJECTDOCSTRING_Reserved', 0x0040, PROJECTDOCSTRING_Reserved)  
1196 - PROJECTDOCSTRING_SizeOfDocStringUnicode = struct.unpack("<L", dir_stream.read(4))[0]  
1197 - if PROJECTDOCSTRING_SizeOfDocStringUnicode % 2 != 0: 1217 + "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring))
  1218 + projectdocstring_docstring = dir_stream.read(projectdocstring_sizeof_docstring)
  1219 + projectdocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1220 + check_value('PROJECTDOCSTRING_Reserved', 0x0040, projectdocstring_reserved)
  1221 + projectdocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1222 + if projectdocstring_sizeof_docstring_unicode % 2 != 0:
1198 log.error("PROJECTDOCSTRING_SizeOfDocStringUnicode is not even") 1223 log.error("PROJECTDOCSTRING_SizeOfDocStringUnicode is not even")
1199 - PROJECTDOCSTRING_DocStringUnicode = dir_stream.read(PROJECTDOCSTRING_SizeOfDocStringUnicode) 1224 + projectdocstring_docstring_unicode = dir_stream.read(projectdocstring_sizeof_docstring_unicode)
  1225 + unused = projectdocstring_docstring
  1226 + unused = projectdocstring_docstring_unicode
1200 1227
1201 # PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7 1228 # PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7
1202 - PROJECTHELPFILEPATH_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1203 - check_value('PROJECTHELPFILEPATH_Id', 0x0006, PROJECTHELPFILEPATH_Id)  
1204 - PROJECTHELPFILEPATH_SizeOfHelpFile1 = struct.unpack("<L", dir_stream.read(4))[0]  
1205 - if PROJECTHELPFILEPATH_SizeOfHelpFile1 > 260: 1229 + projecthelpfilepath_id = struct.unpack("<H", dir_stream.read(2))[0]
  1230 + check_value('PROJECTHELPFILEPATH_Id', 0x0006, projecthelpfilepath_id)
  1231 + projecthelpfilepath_sizeof_helpfile1 = struct.unpack("<L", dir_stream.read(4))[0]
  1232 + if projecthelpfilepath_sizeof_helpfile1 > 260:
1206 log.error( 1233 log.error(
1207 - "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1))  
1208 - PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1)  
1209 - PROJECTHELPFILEPATH_Reserved = struct.unpack("<H", dir_stream.read(2))[0]  
1210 - check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, PROJECTHELPFILEPATH_Reserved)  
1211 - PROJECTHELPFILEPATH_SizeOfHelpFile2 = struct.unpack("<L", dir_stream.read(4))[0]  
1212 - if PROJECTHELPFILEPATH_SizeOfHelpFile2 != PROJECTHELPFILEPATH_SizeOfHelpFile1: 1234 + "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1))
  1235 + projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1)
  1236 + projecthelpfilepath_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1237 + check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, projecthelpfilepath_reserved)
  1238 + projecthelpfilepath_sizeof_helpfile2 = struct.unpack("<L", dir_stream.read(4))[0]
  1239 + if projecthelpfilepath_sizeof_helpfile2 != projecthelpfilepath_sizeof_helpfile1:
1213 log.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 does not equal PROJECTHELPFILEPATH_SizeOfHelpFile2") 1240 log.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 does not equal PROJECTHELPFILEPATH_SizeOfHelpFile2")
1214 - PROJECTHELPFILEPATH_HelpFile2 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile2)  
1215 - if PROJECTHELPFILEPATH_HelpFile2 != PROJECTHELPFILEPATH_HelpFile1: 1241 + projecthelpfilepath_helpfile2 = dir_stream.read(projecthelpfilepath_sizeof_helpfile2)
  1242 + if projecthelpfilepath_helpfile2 != projecthelpfilepath_helpfile1:
1216 log.error("PROJECTHELPFILEPATH_HelpFile1 does not equal PROJECTHELPFILEPATH_HelpFile2") 1243 log.error("PROJECTHELPFILEPATH_HelpFile1 does not equal PROJECTHELPFILEPATH_HelpFile2")
1217 1244
1218 # PROJECTHELPCONTEXT Record 1245 # PROJECTHELPCONTEXT Record
1219 - PROJECTHELPCONTEXT_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1220 - check_value('PROJECTHELPCONTEXT_Id', 0x0007, PROJECTHELPCONTEXT_Id)  
1221 - PROJECTHELPCONTEXT_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1222 - check_value('PROJECTHELPCONTEXT_Size', 0x0004, PROJECTHELPCONTEXT_Size)  
1223 - PROJECTHELPCONTEXT_HelpContext = struct.unpack("<L", dir_stream.read(4))[0] 1246 + projecthelpcontext_id = struct.unpack("<H", dir_stream.read(2))[0]
  1247 + check_value('PROJECTHELPCONTEXT_Id', 0x0007, projecthelpcontext_id)
  1248 + projecthelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0]
  1249 + check_value('PROJECTHELPCONTEXT_Size', 0x0004, projecthelpcontext_size)
  1250 + projecthelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0]
  1251 + unused = projecthelpcontext_helpcontext
1224 1252
1225 # PROJECTLIBFLAGS Record 1253 # PROJECTLIBFLAGS Record
1226 - PROJECTLIBFLAGS_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1227 - check_value('PROJECTLIBFLAGS_Id', 0x0008, PROJECTLIBFLAGS_Id)  
1228 - PROJECTLIBFLAGS_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1229 - check_value('PROJECTLIBFLAGS_Size', 0x0004, PROJECTLIBFLAGS_Size)  
1230 - PROJECTLIBFLAGS_ProjectLibFlags = struct.unpack("<L", dir_stream.read(4))[0]  
1231 - check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, PROJECTLIBFLAGS_ProjectLibFlags) 1254 + projectlibflags_id = struct.unpack("<H", dir_stream.read(2))[0]
  1255 + check_value('PROJECTLIBFLAGS_Id', 0x0008, projectlibflags_id)
  1256 + projectlibflags_size = struct.unpack("<L", dir_stream.read(4))[0]
  1257 + check_value('PROJECTLIBFLAGS_Size', 0x0004, projectlibflags_size)
  1258 + projectlibflags_projectlibflags = struct.unpack("<L", dir_stream.read(4))[0]
  1259 + check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, projectlibflags_projectlibflags)
1232 1260
1233 # PROJECTVERSION Record 1261 # PROJECTVERSION Record
1234 - PROJECTVERSION_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1235 - check_value('PROJECTVERSION_Id', 0x0009, PROJECTVERSION_Id)  
1236 - PROJECTVERSION_Reserved = struct.unpack("<L", dir_stream.read(4))[0]  
1237 - check_value('PROJECTVERSION_Reserved', 0x0004, PROJECTVERSION_Reserved)  
1238 - PROJECTVERSION_VersionMajor = struct.unpack("<L", dir_stream.read(4))[0]  
1239 - PROJECTVERSION_VersionMinor = struct.unpack("<H", dir_stream.read(2))[0] 1262 + projectversion_id = struct.unpack("<H", dir_stream.read(2))[0]
  1263 + check_value('PROJECTVERSION_Id', 0x0009, projectversion_id)
  1264 + projectversion_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1265 + check_value('PROJECTVERSION_Reserved', 0x0004, projectversion_reserved)
  1266 + projectversion_versionmajor = struct.unpack("<L", dir_stream.read(4))[0]
  1267 + projectversion_versionminor = struct.unpack("<H", dir_stream.read(2))[0]
  1268 + unused = projectversion_versionmajor
  1269 + unused = projectversion_versionminor
1240 1270
1241 # PROJECTCONSTANTS Record 1271 # PROJECTCONSTANTS Record
1242 - PROJECTCONSTANTS_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1243 - check_value('PROJECTCONSTANTS_Id', 0x000C, PROJECTCONSTANTS_Id)  
1244 - PROJECTCONSTANTS_SizeOfConstants = struct.unpack("<L", dir_stream.read(4))[0]  
1245 - if PROJECTCONSTANTS_SizeOfConstants > 1015: 1272 + projectconstants_id = struct.unpack("<H", dir_stream.read(2))[0]
  1273 + check_value('PROJECTCONSTANTS_Id', 0x000C, projectconstants_id)
  1274 + projectconstants_sizeof_constants = struct.unpack("<L", dir_stream.read(4))[0]
  1275 + if projectconstants_sizeof_constants > 1015:
1246 log.error( 1276 log.error(
1247 - "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants))  
1248 - PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants)  
1249 - PROJECTCONSTANTS_Reserved = struct.unpack("<H", dir_stream.read(2))[0]  
1250 - check_value('PROJECTCONSTANTS_Reserved', 0x003C, PROJECTCONSTANTS_Reserved)  
1251 - PROJECTCONSTANTS_SizeOfConstantsUnicode = struct.unpack("<L", dir_stream.read(4))[0]  
1252 - if PROJECTCONSTANTS_SizeOfConstantsUnicode % 2 != 0: 1277 + "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants))
  1278 + projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants)
  1279 + projectconstants_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1280 + check_value('PROJECTCONSTANTS_Reserved', 0x003C, projectconstants_reserved)
  1281 + projectconstants_sizeof_constants_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1282 + if projectconstants_sizeof_constants_unicode % 2 != 0:
1253 log.error("PROJECTCONSTANTS_SizeOfConstantsUnicode is not even") 1283 log.error("PROJECTCONSTANTS_SizeOfConstantsUnicode is not even")
1254 - PROJECTCONSTANTS_ConstantsUnicode = dir_stream.read(PROJECTCONSTANTS_SizeOfConstantsUnicode) 1284 + projectconstants_constants_unicode = dir_stream.read(projectconstants_sizeof_constants_unicode)
  1285 + unused = projectconstants_constants
  1286 + unused = projectconstants_constants_unicode
1255 1287
1256 # array of REFERENCE records 1288 # array of REFERENCE records
1257 check = None 1289 check = None
@@ -1263,194 +1295,230 @@ def _extract_vba(ole, vba_root, project_path, dir_path): @@ -1263,194 +1295,230 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1263 1295
1264 if check == 0x0016: 1296 if check == 0x0016:
1265 # REFERENCENAME 1297 # REFERENCENAME
1266 - REFERENCE_Id = check  
1267 - REFERENCE_SizeOfName = struct.unpack("<L", dir_stream.read(4))[0]  
1268 - REFERENCE_Name = dir_stream.read(REFERENCE_SizeOfName)  
1269 - REFERENCE_Reserved = struct.unpack("<H", dir_stream.read(2))[0]  
1270 - check_value('REFERENCE_Reserved', 0x003E, REFERENCE_Reserved)  
1271 - REFERENCE_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]  
1272 - REFERENCE_NameUnicode = dir_stream.read(REFERENCE_SizeOfNameUnicode) 1298 + reference_id = check
  1299 + reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0]
  1300 + reference_name = dir_stream.read(reference_sizeof_name)
  1301 + reference_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1302 + check_value('REFERENCE_Reserved', 0x003E, reference_reserved)
  1303 + reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1304 + reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode)
  1305 + unused = reference_id
  1306 + unused = reference_name
  1307 + unused = reference_name_unicode
1273 continue 1308 continue
1274 1309
1275 if check == 0x0033: 1310 if check == 0x0033:
1276 # REFERENCEORIGINAL (followed by REFERENCECONTROL) 1311 # REFERENCEORIGINAL (followed by REFERENCECONTROL)
1277 - REFERENCEORIGINAL_Id = check  
1278 - REFERENCEORIGINAL_SizeOfLibidOriginal = struct.unpack("<L", dir_stream.read(4))[0]  
1279 - REFERENCEORIGINAL_LibidOriginal = dir_stream.read(REFERENCEORIGINAL_SizeOfLibidOriginal) 1312 + referenceoriginal_id = check
  1313 + referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0]
  1314 + referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal)
  1315 + unused = referenceoriginal_id
  1316 + unused = referenceoriginal_libidoriginal
1280 continue 1317 continue
1281 1318
1282 if check == 0x002F: 1319 if check == 0x002F:
1283 # REFERENCECONTROL 1320 # REFERENCECONTROL
1284 - REFERENCECONTROL_Id = check  
1285 - REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore  
1286 - REFERENCECONTROL_SizeOfLibidTwiddled = struct.unpack("<L", dir_stream.read(4))[0]  
1287 - REFERENCECONTROL_LibidTwiddled = dir_stream.read(REFERENCECONTROL_SizeOfLibidTwiddled)  
1288 - REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore  
1289 - check_value('REFERENCECONTROL_Reserved1', 0x0000, REFERENCECONTROL_Reserved1)  
1290 - REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore  
1291 - check_value('REFERENCECONTROL_Reserved2', 0x0000, REFERENCECONTROL_Reserved2) 1321 + referencecontrol_id = check
  1322 + referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore
  1323 + referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0]
  1324 + referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled)
  1325 + referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore
  1326 + check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1)
  1327 + referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore
  1328 + check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2)
  1329 + unused = referencecontrol_id
  1330 + unused = referencecontrol_sizetwiddled
  1331 + unused = referencecontrol_libidtwiddled
1292 # optional field 1332 # optional field
1293 check2 = struct.unpack("<H", dir_stream.read(2))[0] 1333 check2 = struct.unpack("<H", dir_stream.read(2))[0]
1294 if check2 == 0x0016: 1334 if check2 == 0x0016:
1295 - REFERENCECONTROL_NameRecordExtended_Id = check  
1296 - REFERENCECONTROL_NameRecordExtended_SizeofName = struct.unpack("<L", dir_stream.read(4))[0]  
1297 - REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read(  
1298 - REFERENCECONTROL_NameRecordExtended_SizeofName)  
1299 - REFERENCECONTROL_NameRecordExtended_Reserved = struct.unpack("<H", dir_stream.read(2))[0] 1335 + referencecontrol_namerecordextended_id = check
  1336 + referencecontrol_namerecordextended_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0]
  1337 + referencecontrol_namerecordextended_name = dir_stream.read(
  1338 + referencecontrol_namerecordextended_sizeof_name)
  1339 + referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0]
1300 check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, 1340 check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E,
1301 - REFERENCECONTROL_NameRecordExtended_Reserved)  
1302 - REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]  
1303 - REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read(  
1304 - REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode)  
1305 - REFERENCECONTROL_Reserved3 = struct.unpack("<H", dir_stream.read(2))[0] 1341 + referencecontrol_namerecordextended_reserved)
  1342 + referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1343 + referencecontrol_namerecordextended_name_unicode = dir_stream.read(
  1344 + referencecontrol_namerecordextended_sizeof_name_unicode)
  1345 + referencecontrol_reserved3 = struct.unpack("<H", dir_stream.read(2))[0]
  1346 + unused = referencecontrol_namerecordextended_id
  1347 + unused = referencecontrol_namerecordextended_name
  1348 + unused = referencecontrol_namerecordextended_name_unicode
1306 else: 1349 else:
1307 - REFERENCECONTROL_Reserved3 = check2  
1308 -  
1309 - check_value('REFERENCECONTROL_Reserved3', 0x0030, REFERENCECONTROL_Reserved3)  
1310 - REFERENCECONTROL_SizeExtended = struct.unpack("<L", dir_stream.read(4))[0]  
1311 - REFERENCECONTROL_SizeOfLibidExtended = struct.unpack("<L", dir_stream.read(4))[0]  
1312 - REFERENCECONTROL_LibidExtended = dir_stream.read(REFERENCECONTROL_SizeOfLibidExtended)  
1313 - REFERENCECONTROL_Reserved4 = struct.unpack("<L", dir_stream.read(4))[0]  
1314 - REFERENCECONTROL_Reserved5 = struct.unpack("<H", dir_stream.read(2))[0]  
1315 - REFERENCECONTROL_OriginalTypeLib = dir_stream.read(16)  
1316 - REFERENCECONTROL_Cookie = struct.unpack("<L", dir_stream.read(4))[0] 1350 + referencecontrol_reserved3 = check2
  1351 +
  1352 + check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3)
  1353 + referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0]
  1354 + referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0]
  1355 + referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended)
  1356 + referencecontrol_reserved4 = struct.unpack("<L", dir_stream.read(4))[0]
  1357 + referencecontrol_reserved5 = struct.unpack("<H", dir_stream.read(2))[0]
  1358 + referencecontrol_originaltypelib = dir_stream.read(16)
  1359 + referencecontrol_cookie = struct.unpack("<L", dir_stream.read(4))[0]
  1360 + unused = referencecontrol_sizeextended
  1361 + unused = referencecontrol_libidextended
  1362 + unused = referencecontrol_reserved4
  1363 + unused = referencecontrol_reserved5
  1364 + unused = referencecontrol_originaltypelib
  1365 + unused = referencecontrol_cookie
1317 continue 1366 continue
1318 1367
1319 if check == 0x000D: 1368 if check == 0x000D:
1320 # REFERENCEREGISTERED 1369 # REFERENCEREGISTERED
1321 - REFERENCEREGISTERED_Id = check  
1322 - REFERENCEREGISTERED_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1323 - REFERENCEREGISTERED_SizeOfLibid = struct.unpack("<L", dir_stream.read(4))[0]  
1324 - REFERENCEREGISTERED_Libid = dir_stream.read(REFERENCEREGISTERED_SizeOfLibid)  
1325 - REFERENCEREGISTERED_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0]  
1326 - check_value('REFERENCEREGISTERED_Reserved1', 0x0000, REFERENCEREGISTERED_Reserved1)  
1327 - REFERENCEREGISTERED_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0]  
1328 - check_value('REFERENCEREGISTERED_Reserved2', 0x0000, REFERENCEREGISTERED_Reserved2) 1370 + referenceregistered_id = check
  1371 + referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0]
  1372 + referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0]
  1373 + referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid)
  1374 + referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0]
  1375 + check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1)
  1376 + referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0]
  1377 + check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2)
  1378 + unused = referenceregistered_id
  1379 + unused = referenceregistered_size
  1380 + unused = referenceregistered_libid
1329 continue 1381 continue
1330 1382
1331 if check == 0x000E: 1383 if check == 0x000E:
1332 # REFERENCEPROJECT 1384 # REFERENCEPROJECT
1333 - REFERENCEPROJECT_Id = check  
1334 - REFERENCEPROJECT_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1335 - REFERENCEPROJECT_SizeOfLibidAbsolute = struct.unpack("<L", dir_stream.read(4))[0]  
1336 - REFERENCEPROJECT_LibidAbsolute = dir_stream.read(REFERENCEPROJECT_SizeOfLibidAbsolute)  
1337 - REFERENCEPROJECT_SizeOfLibidRelative = struct.unpack("<L", dir_stream.read(4))[0]  
1338 - REFERENCEPROJECT_LibidRelative = dir_stream.read(REFERENCEPROJECT_SizeOfLibidRelative)  
1339 - REFERENCEPROJECT_MajorVersion = struct.unpack("<L", dir_stream.read(4))[0]  
1340 - REFERENCEPROJECT_MinorVersion = struct.unpack("<H", dir_stream.read(2))[0] 1385 + referenceproject_id = check
  1386 + referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0]
  1387 + referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0]
  1388 + referenceproject_libidabsolute = dir_stream.read(referenceproject_sizeof_libidabsolute)
  1389 + referenceproject_sizeof_libidrelative = struct.unpack("<L", dir_stream.read(4))[0]
  1390 + referenceproject_libidrelative = dir_stream.read(referenceproject_sizeof_libidrelative)
  1391 + referenceproject_majorversion = struct.unpack("<L", dir_stream.read(4))[0]
  1392 + referenceproject_minorversion = struct.unpack("<H", dir_stream.read(2))[0]
  1393 + unused = referenceproject_id
  1394 + unused = referenceproject_size
  1395 + unused = referenceproject_libidabsolute
  1396 + unused = referenceproject_libidrelative
  1397 + unused = referenceproject_majorversion
  1398 + unused = referenceproject_minorversion
1341 continue 1399 continue
1342 1400
1343 log.error('invalid or unknown check Id {0:04X}'.format(check)) 1401 log.error('invalid or unknown check Id {0:04X}'.format(check))
1344 sys.exit(0) 1402 sys.exit(0)
1345 1403
1346 - PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0]  
1347 - check_value('PROJECTMODULES_Id', 0x000F, PROJECTMODULES_Id)  
1348 - PROJECTMODULES_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1349 - check_value('PROJECTMODULES_Size', 0x0002, PROJECTMODULES_Size)  
1350 - PROJECTMODULES_Count = struct.unpack("<H", dir_stream.read(2))[0]  
1351 - PROJECTMODULES_ProjectCookieRecord_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1352 - check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, PROJECTMODULES_ProjectCookieRecord_Id)  
1353 - PROJECTMODULES_ProjectCookieRecord_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1354 - check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, PROJECTMODULES_ProjectCookieRecord_Size)  
1355 - PROJECTMODULES_ProjectCookieRecord_Cookie = struct.unpack("<H", dir_stream.read(2))[0]  
1356 -  
1357 - log.debug("parsing {0} modules".format(PROJECTMODULES_Count))  
1358 - for x in xrange(0, PROJECTMODULES_Count):  
1359 - MODULENAME_Id = struct.unpack("<H", dir_stream.read(2))[0]  
1360 - check_value('MODULENAME_Id', 0x0019, MODULENAME_Id)  
1361 - MODULENAME_SizeOfModuleName = struct.unpack("<L", dir_stream.read(4))[0]  
1362 - MODULENAME_ModuleName = dir_stream.read(MODULENAME_SizeOfModuleName) 1404 + projectmodules_id = check #struct.unpack("<H", dir_stream.read(2))[0]
  1405 + check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id)
  1406 + projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0]
  1407 + check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size)
  1408 + projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0]
  1409 + projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0]
  1410 + check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id)
  1411 + projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0]
  1412 + check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, projectmodules_projectcookierecord_size)
  1413 + projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0]
  1414 + unused = projectmodules_projectcookierecord_cookie
  1415 +
  1416 + log.debug("parsing {0} modules".format(projectmodules_count))
  1417 + for _ in xrange(0, projectmodules_count):
  1418 + modulename_id = struct.unpack("<H", dir_stream.read(2))[0]
  1419 + check_value('MODULENAME_Id', 0x0019, modulename_id)
  1420 + modulename_sizeof_modulename = struct.unpack("<L", dir_stream.read(4))[0]
  1421 + modulename_modulename = dir_stream.read(modulename_sizeof_modulename)
1363 # account for optional sections 1422 # account for optional sections
1364 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1423 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1365 if section_id == 0x0047: 1424 if section_id == 0x0047:
1366 - MODULENAMEUNICODE_Id = section_id  
1367 - MODULENAMEUNICODE_SizeOfModuleNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]  
1368 - MODULENAMEUNICODE_ModuleNameUnicode = dir_stream.read(MODULENAMEUNICODE_SizeOfModuleNameUnicode) 1425 + modulename_unicode_id = section_id
  1426 + modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1427 + modulename_unicode_modulename_unicode = dir_stream.read(modulename_unicode_sizeof_modulename_unicode)
  1428 + unused = modulename_unicode_id
  1429 + unused = modulename_unicode_modulename_unicode
1369 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1430 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1370 if section_id == 0x001A: 1431 if section_id == 0x001A:
1371 - MODULESTREAMNAME_id = section_id  
1372 - MODULESTREAMNAME_SizeOfStreamName = struct.unpack("<L", dir_stream.read(4))[0]  
1373 - MODULESTREAMNAME_StreamName = dir_stream.read(MODULESTREAMNAME_SizeOfStreamName)  
1374 - MODULESTREAMNAME_Reserved = struct.unpack("<H", dir_stream.read(2))[0]  
1375 - check_value('MODULESTREAMNAME_Reserved', 0x0032, MODULESTREAMNAME_Reserved)  
1376 - MODULESTREAMNAME_SizeOfStreamNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]  
1377 - MODULESTREAMNAME_StreamNameUnicode = dir_stream.read(MODULESTREAMNAME_SizeOfStreamNameUnicode) 1432 + modulestreamname_id = section_id
  1433 + modulestreamname_sizeof_streamname = struct.unpack("<L", dir_stream.read(4))[0]
  1434 + modulestreamname_streamname = dir_stream.read(modulestreamname_sizeof_streamname)
  1435 + modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1436 + check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved)
  1437 + modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1438 + modulestreamname_streamname_unicode = dir_stream.read(modulestreamname_sizeof_streamname_unicode)
  1439 + unused = modulestreamname_id
1378 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1440 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1379 if section_id == 0x001C: 1441 if section_id == 0x001C:
1380 - MODULEDOCSTRING_Id = section_id  
1381 - check_value('MODULEDOCSTRING_Id', 0x001C, MODULEDOCSTRING_Id)  
1382 - MODULEDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0]  
1383 - MODULEDOCSTRING_DocString = dir_stream.read(MODULEDOCSTRING_SizeOfDocString)  
1384 - MODULEDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0]  
1385 - check_value('MODULEDOCSTRING_Reserved', 0x0048, MODULEDOCSTRING_Reserved)  
1386 - MODULEDOCSTRING_SizeOfDocStringUnicode = struct.unpack("<L", dir_stream.read(4))[0]  
1387 - MODULEDOCSTRING_DocStringUnicode = dir_stream.read(MODULEDOCSTRING_SizeOfDocStringUnicode) 1442 + moduledocstring_id = section_id
  1443 + check_value('MODULEDOCSTRING_Id', 0x001C, moduledocstring_id)
  1444 + moduledocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0]
  1445 + moduledocstring_docstring = dir_stream.read(moduledocstring_sizeof_docstring)
  1446 + moduledocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1447 + check_value('MODULEDOCSTRING_Reserved', 0x0048, moduledocstring_reserved)
  1448 + moduledocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1449 + moduledocstring_docstring_unicode = dir_stream.read(moduledocstring_sizeof_docstring_unicode)
  1450 + unused = moduledocstring_docstring
  1451 + unused = moduledocstring_docstring_unicode
1388 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1452 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1389 if section_id == 0x0031: 1453 if section_id == 0x0031:
1390 - MODULEOFFSET_Id = section_id  
1391 - check_value('MODULEOFFSET_Id', 0x0031, MODULEOFFSET_Id)  
1392 - MODULEOFFSET_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1393 - check_value('MODULEOFFSET_Size', 0x0004, MODULEOFFSET_Size)  
1394 - MODULEOFFSET_TextOffset = struct.unpack("<L", dir_stream.read(4))[0] 1454 + moduleoffset_id = section_id
  1455 + check_value('MODULEOFFSET_Id', 0x0031, moduleoffset_id)
  1456 + moduleoffset_size = struct.unpack("<L", dir_stream.read(4))[0]
  1457 + check_value('MODULEOFFSET_Size', 0x0004, moduleoffset_size)
  1458 + moduleoffset_textoffset = struct.unpack("<L", dir_stream.read(4))[0]
1395 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1459 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1396 if section_id == 0x001E: 1460 if section_id == 0x001E:
1397 - MODULEHELPCONTEXT_Id = section_id  
1398 - check_value('MODULEHELPCONTEXT_Id', 0x001E, MODULEHELPCONTEXT_Id)  
1399 - MODULEHELPCONTEXT_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1400 - check_value('MODULEHELPCONTEXT_Size', 0x0004, MODULEHELPCONTEXT_Size)  
1401 - MODULEHELPCONTEXT_HelpContext = struct.unpack("<L", dir_stream.read(4))[0] 1461 + modulehelpcontext_id = section_id
  1462 + check_value('MODULEHELPCONTEXT_Id', 0x001E, modulehelpcontext_id)
  1463 + modulehelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0]
  1464 + check_value('MODULEHELPCONTEXT_Size', 0x0004, modulehelpcontext_size)
  1465 + modulehelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0]
  1466 + unused = modulehelpcontext_helpcontext
1402 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1467 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1403 if section_id == 0x002C: 1468 if section_id == 0x002C:
1404 - MODULECOOKIE_Id = section_id  
1405 - check_value('MODULECOOKIE_Id', 0x002C, MODULECOOKIE_Id)  
1406 - MODULECOOKIE_Size = struct.unpack("<L", dir_stream.read(4))[0]  
1407 - check_value('MODULECOOKIE_Size', 0x0002, MODULECOOKIE_Size)  
1408 - MODULECOOKIE_Cookie = struct.unpack("<H", dir_stream.read(2))[0] 1469 + modulecookie_id = section_id
  1470 + check_value('MODULECOOKIE_Id', 0x002C, modulecookie_id)
  1471 + modulecookie_size = struct.unpack("<L", dir_stream.read(4))[0]
  1472 + check_value('MODULECOOKIE_Size', 0x0002, modulecookie_size)
  1473 + modulecookie_cookie = struct.unpack("<H", dir_stream.read(2))[0]
  1474 + unused = modulecookie_cookie
1409 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1475 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1410 if section_id == 0x0021 or section_id == 0x0022: 1476 if section_id == 0x0021 or section_id == 0x0022:
1411 - MODULETYPE_Id = section_id  
1412 - MODULETYPE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] 1477 + moduletype_id = section_id
  1478 + moduletype_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1479 + unused = moduletype_id
  1480 + unused = moduletype_reserved
1413 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1481 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1414 if section_id == 0x0025: 1482 if section_id == 0x0025:
1415 - MODULEREADONLY_Id = section_id  
1416 - check_value('MODULEREADONLY_Id', 0x0025, MODULEREADONLY_Id)  
1417 - MODULEREADONLY_Reserved = struct.unpack("<L", dir_stream.read(4))[0]  
1418 - check_value('MODULEREADONLY_Reserved', 0x0000, MODULEREADONLY_Reserved) 1483 + modulereadonly_id = section_id
  1484 + check_value('MODULEREADONLY_Id', 0x0025, modulereadonly_id)
  1485 + modulereadonly_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1486 + check_value('MODULEREADONLY_Reserved', 0x0000, modulereadonly_reserved)
1419 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1487 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1420 if section_id == 0x0028: 1488 if section_id == 0x0028:
1421 - MODULEPRIVATE_Id = section_id  
1422 - check_value('MODULEPRIVATE_Id', 0x0028, MODULEPRIVATE_Id)  
1423 - MODULEPRIVATE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]  
1424 - check_value('MODULEPRIVATE_Reserved', 0x0000, MODULEPRIVATE_Reserved) 1489 + moduleprivate_id = section_id
  1490 + check_value('MODULEPRIVATE_Id', 0x0028, moduleprivate_id)
  1491 + moduleprivate_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1492 + check_value('MODULEPRIVATE_Reserved', 0x0000, moduleprivate_reserved)
1425 section_id = struct.unpack("<H", dir_stream.read(2))[0] 1493 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1426 if section_id == 0x002B: # TERMINATOR 1494 if section_id == 0x002B: # TERMINATOR
1427 - MODULE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]  
1428 - check_value('MODULE_Reserved', 0x0000, MODULE_Reserved) 1495 + module_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1496 + check_value('MODULE_Reserved', 0x0000, module_reserved)
1429 section_id = None 1497 section_id = None
1430 if section_id != None: 1498 if section_id != None:
1431 log.warning('unknown or invalid module section id {0:04X}'.format(section_id)) 1499 log.warning('unknown or invalid module section id {0:04X}'.format(section_id))
1432 1500
1433 - log.debug('Project CodePage = %d' % PROJECTCODEPAGE_CodePage)  
1434 - vba_codec = 'cp%d' % PROJECTCODEPAGE_CodePage  
1435 - log.debug("ModuleName = {0}".format(MODULENAME_ModuleName))  
1436 - log.debug("StreamName = {0}".format(repr(MODULESTREAMNAME_StreamName)))  
1437 - streamname_unicode = MODULESTREAMNAME_StreamName.decode(vba_codec) 1501 + log.debug('Project CodePage = %d' % projectcodepage_codepage)
  1502 + vba_codec = 'cp%d' % projectcodepage_codepage
  1503 + log.debug("ModuleName = {0}".format(modulename_modulename))
  1504 + log.debug("StreamName = {0}".format(repr(modulestreamname_streamname)))
  1505 + streamname_unicode = modulestreamname_streamname.decode(vba_codec)
1438 log.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode))) 1506 log.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode)))
1439 - log.debug("StreamNameUnicode = {0}".format(repr(MODULESTREAMNAME_StreamNameUnicode)))  
1440 - log.debug("TextOffset = {0}".format(MODULEOFFSET_TextOffset)) 1507 + log.debug("StreamNameUnicode = {0}".format(repr(modulestreamname_streamname_unicode)))
  1508 + log.debug("TextOffset = {0}".format(moduleoffset_textoffset))
1441 1509
1442 code_path = vba_root + u'VBA/' + streamname_unicode 1510 code_path = vba_root + u'VBA/' + streamname_unicode
1443 #TODO: test if stream exists 1511 #TODO: test if stream exists
1444 log.debug('opening VBA code stream %s' % repr(code_path)) 1512 log.debug('opening VBA code stream %s' % repr(code_path))
1445 code_data = ole.openstream(code_path).read() 1513 code_data = ole.openstream(code_path).read()
1446 log.debug("length of code_data = {0}".format(len(code_data))) 1514 log.debug("length of code_data = {0}".format(len(code_data)))
1447 - log.debug("offset of code_data = {0}".format(MODULEOFFSET_TextOffset))  
1448 - code_data = code_data[MODULEOFFSET_TextOffset:] 1515 + log.debug("offset of code_data = {0}".format(moduleoffset_textoffset))
  1516 + code_data = code_data[moduleoffset_textoffset:]
1449 if len(code_data) > 0: 1517 if len(code_data) > 0:
1450 code_data = decompress_stream(code_data) 1518 code_data = decompress_stream(code_data)
1451 # case-insensitive search in the code_modules dict to find the file extension: 1519 # case-insensitive search in the code_modules dict to find the file extension:
1452 - filext = code_modules.get(MODULENAME_ModuleName.lower(), 'bin')  
1453 - filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext) 1520 + filext = code_modules.get(modulename_modulename.lower(), 'bin')
  1521 + filename = '{0}.{1}'.format(modulename_modulename, filext)
1454 #TODO: also yield the codepage so that callers can decode it properly 1522 #TODO: also yield the codepage so that callers can decode it properly
1455 yield (code_path, filename, code_data) 1523 yield (code_path, filename, code_data)
1456 # print '-'*79 1524 # print '-'*79
@@ -1460,7 +1528,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path): @@ -1460,7 +1528,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1460 # print '' 1528 # print ''
1461 log.debug('extracted file {0}'.format(filename)) 1529 log.debug('extracted file {0}'.format(filename))
1462 else: 1530 else:
1463 - log.warning("module stream {0} has code data length 0".format(MODULESTREAMNAME_StreamName)) 1531 + log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname))
  1532 + _ = unused
1464 return 1533 return
1465 1534
1466 1535
@@ -1616,12 +1685,9 @@ def detect_base64_strings(vba_code): @@ -1616,12 +1685,9 @@ def detect_base64_strings(vba_code):
1616 decoded = base64.b64decode(value) 1685 decoded = base64.b64decode(value)
1617 results.append((value, decoded)) 1686 results.append((value, decoded))
1618 found.add(value) 1687 found.add(value)
1619 - except KeyboardInterrupt:  
1620 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
1621 - raise  
1622 - except: 1688 + except (TypeError, ValueError) as exc:
  1689 + log.debug('Failed to base64-decode (%s)' % exc)
1623 # if an exception occurs, it is likely not a base64-encoded string 1690 # if an exception occurs, it is likely not a base64-encoded string
1624 - pass  
1625 return results 1691 return results
1626 1692
1627 1693
@@ -1646,12 +1712,9 @@ def detect_dridex_strings(vba_code): @@ -1646,12 +1712,9 @@ def detect_dridex_strings(vba_code):
1646 decoded = DridexUrlDecode(value) 1712 decoded = DridexUrlDecode(value)
1647 results.append((value, decoded)) 1713 results.append((value, decoded))
1648 found.add(value) 1714 found.add(value)
1649 - except KeyboardInterrupt:  
1650 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
1651 - raise  
1652 - except: 1715 + except Exception as exc:
  1716 + log.debug('Failed to Dridex-decode (%s)' % exc)
1653 # if an exception occurs, it is likely not a dridex-encoded string 1717 # if an exception occurs, it is likely not a dridex-encoded string
1654 - pass  
1655 return results 1718 return results
1656 1719
1657 1720
@@ -1701,16 +1764,17 @@ def json2ascii(json_obj, encoding=&#39;utf8&#39;, errors=&#39;replace&#39;): @@ -1701,16 +1764,17 @@ def json2ascii(json_obj, encoding=&#39;utf8&#39;, errors=&#39;replace&#39;):
1701 elif isinstance(json_obj, (bool, int, float)): 1764 elif isinstance(json_obj, (bool, int, float)):
1702 pass 1765 pass
1703 elif isinstance(json_obj, str): 1766 elif isinstance(json_obj, str):
  1767 + # de-code and re-encode
1704 dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) 1768 dencoded = json_obj.decode(encoding, errors).encode(encoding, errors)
1705 - if dencoded != str:  
1706 - logging.info('json2ascii: replaced: {0} (len {1})'  
1707 - .format(json_obj, len(json_obj)))  
1708 - logging.info('json2ascii: with: {0} (len {1})'  
1709 - .format(dencoded, len(dencoded))) 1769 + if dencoded != json_obj:
  1770 + log.info('json2ascii: replaced: {0} (len {1})'
  1771 + .format(json_obj, len(json_obj)))
  1772 + log.info('json2ascii: with: {0} (len {1})'
  1773 + .format(dencoded, len(dencoded)))
1710 return dencoded 1774 return dencoded
1711 elif isinstance(json_obj, unicode): 1775 elif isinstance(json_obj, unicode):
1712 - logging.info('json2ascii: replaced: {0}'  
1713 - .format(json_obj.encode(encoding, errors))) 1776 + log.info('json2ascii: replaced: {0}'
  1777 + .format(json_obj.encode(encoding, errors)))
1714 # cannot put original into logger 1778 # cannot put original into logger
1715 # print 'original: ' json_obj 1779 # print 'original: ' json_obj
1716 return json_obj.encode(encoding, errors) 1780 return json_obj.encode(encoding, errors)
@@ -1721,11 +1785,50 @@ def json2ascii(json_obj, encoding=&#39;utf8&#39;, errors=&#39;replace&#39;): @@ -1721,11 +1785,50 @@ def json2ascii(json_obj, encoding=&#39;utf8&#39;, errors=&#39;replace&#39;):
1721 for item in json_obj: 1785 for item in json_obj:
1722 item = json2ascii(item) 1786 item = json2ascii(item)
1723 else: 1787 else:
1724 - logging.debug('unexpected type in json2ascii: {0} -- leave as is'  
1725 - .format(type(json_obj))) 1788 + log.debug('unexpected type in json2ascii: {0} -- leave as is'
  1789 + .format(type(json_obj)))
1726 return json_obj 1790 return json_obj
1727 1791
1728 1792
  1793 +_have_printed_json_start = False
  1794 +
  1795 +def print_json(json_dict=None, _json_is_last=False, **json_parts):
  1796 + """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1
  1797 +
  1798 + can use in two ways:
  1799 + (1) print_json(some_dict)
  1800 + (2) print_json(key1=value1, key2=value2, ...)
  1801 +
  1802 + :param bool _json_is_last: set to True only for very last entry to complete
  1803 + the top-level json-list
  1804 + """
  1805 + global _have_printed_json_start
  1806 +
  1807 + if json_dict and json_parts:
  1808 + raise ValueError('Invalid json argument: want either single dict or '
  1809 + 'key=value parts but got both)')
  1810 + elif (json_dict is not None) and (not isinstance(json_dict, dict)):
  1811 + raise ValueError('Invalid json argument: want either single dict or '
  1812 + 'key=value parts but got {} instead of dict)'
  1813 + .format(type(json_dict)))
  1814 + if json_parts:
  1815 + json_dict = json_parts
  1816 +
  1817 + if not _have_printed_json_start:
  1818 + print '['
  1819 + _have_printed_json_start = True
  1820 +
  1821 + lines = json.dumps(json2ascii(json_dict), check_circular=False,
  1822 + indent=4, ensure_ascii=False).splitlines()
  1823 + for line in lines[:-1]:
  1824 + print ' {}'.format(line)
  1825 + if _json_is_last:
  1826 + print ' {}'.format(lines[-1]) # print last line without comma
  1827 + print ']'
  1828 + else:
  1829 + print ' {},'.format(lines[-1]) # print last line with comma
  1830 +
  1831 +
1729 class VBA_Scanner(object): 1832 class VBA_Scanner(object):
1730 """ 1833 """
1731 Class to scan the source code of a VBA module to find obfuscated strings, 1834 Class to scan the source code of a VBA module to find obfuscated strings,
@@ -1924,6 +2027,8 @@ class VBA_Parser(object): @@ -1924,6 +2027,8 @@ class VBA_Parser(object):
1924 2027
1925 :param container: str, path and filename of container if the file is within 2028 :param container: str, path and filename of container if the file is within
1926 a zip archive, None otherwise. 2029 a zip archive, None otherwise.
  2030 +
  2031 + raises a FileOpenError if all attemps to interpret the data header failed
1927 """ 2032 """
1928 #TODO: filename should only be a string, data should be used for the file-like object 2033 #TODO: filename should only be a string, data should be used for the file-like object
1929 #TODO: filename should be mandatory, optional data is a string or file-like object 2034 #TODO: filename should be mandatory, optional data is a string or file-like object
@@ -2000,8 +2105,8 @@ class VBA_Parser(object): @@ -2000,8 +2105,8 @@ class VBA_Parser(object):
2000 if self.type is None: 2105 if self.type is None:
2001 # At this stage, could not match a known format: 2106 # At this stage, could not match a known format:
2002 msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename 2107 msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename
2003 - log.error(msg)  
2004 - raise TypeError(msg) 2108 + log.info(msg)
  2109 + raise FileOpenError(msg)
2005 2110
2006 def open_ole(self, _file): 2111 def open_ole(self, _file):
2007 """ 2112 """
@@ -2016,13 +2121,10 @@ class VBA_Parser(object): @@ -2016,13 +2121,10 @@ class VBA_Parser(object):
2016 # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet 2121 # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet
2017 # set type only if parsing succeeds 2122 # set type only if parsing succeeds
2018 self.type = TYPE_OLE 2123 self.type = TYPE_OLE
2019 - except KeyboardInterrupt:  
2020 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2021 - raise  
2022 - except: 2124 + except (IOError, TypeError, ValueError) as exc:
2023 # TODO: handle OLE parsing exceptions 2125 # TODO: handle OLE parsing exceptions
2024 - log.exception('Failed OLE parsing for file %r' % self.filename)  
2025 - pass 2126 + log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc))
  2127 + log.debug('Trace:', exc_info=True)
2026 2128
2027 2129
2028 def open_openxml(self, _file): 2130 def open_openxml(self, _file):
@@ -2048,22 +2150,17 @@ class VBA_Parser(object): @@ -2048,22 +2150,17 @@ class VBA_Parser(object):
2048 ole_data = z.open(subfile).read() 2150 ole_data = z.open(subfile).read()
2049 try: 2151 try:
2050 self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data)) 2152 self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data))
2051 - except KeyboardInterrupt:  
2052 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2053 - raise  
2054 - except:  
2055 - log.debug('%s is not a valid OLE file' % subfile) 2153 + except FileOpenError as exc:
  2154 + log.info('%s is not a valid OLE file (%s)' % (subfile, exc))
2056 continue 2155 continue
2057 z.close() 2156 z.close()
2058 # set type only if parsing succeeds 2157 # set type only if parsing succeeds
2059 self.type = TYPE_OpenXML 2158 self.type = TYPE_OpenXML
2060 - except KeyboardInterrupt:  
2061 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2062 - raise  
2063 - except: 2159 + except (RuntimeError, zipfile.BadZipfile, zipfile.LargeZipFile, IOError) as exc:
2064 # TODO: handle parsing exceptions 2160 # TODO: handle parsing exceptions
2065 - log.exception('Failed Zip/OpenXML parsing for file %r' % self.filename)  
2066 - pass 2161 + log.info('Failed Zip/OpenXML parsing for file %r (%s)'
  2162 + % (self.filename, exc))
  2163 + log.debug('Trace:', exc_info=True)
2067 2164
2068 def open_word2003xml(self, data): 2165 def open_word2003xml(self, data):
2069 """ 2166 """
@@ -2087,25 +2184,25 @@ class VBA_Parser(object): @@ -2087,25 +2184,25 @@ class VBA_Parser(object):
2087 if is_mso_file(mso_data): 2184 if is_mso_file(mso_data):
2088 # decompress the zlib data stored in the MSO file, which is the OLE container: 2185 # decompress the zlib data stored in the MSO file, which is the OLE container:
2089 # TODO: handle different offsets => separate function 2186 # TODO: handle different offsets => separate function
2090 - ole_data = mso_file_extract(mso_data)  
2091 try: 2187 try:
  2188 + ole_data = mso_file_extract(mso_data)
2092 self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) 2189 self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
2093 - except KeyboardInterrupt:  
2094 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2095 - raise  
2096 - except:  
2097 - log.error('%s does not contain a valid OLE file' % fname) 2190 + except MsoExtractionError:
  2191 + log.info('Failed decompressing an MSO container in %r - %s'
  2192 + % (fname, MSG_OLEVBA_ISSUES))
  2193 + log.debug('Trace:', exc_info=True)
  2194 + except FileOpenError as exc:
  2195 + log.debug('%s is not a valid OLE sub file (%s)' % (fname, exc))
2098 else: 2196 else:
2099 - log.error('%s is not a valid MSO file' % fname) 2197 + log.info('%s is not a valid MSO file' % fname)
2100 # set type only if parsing succeeds 2198 # set type only if parsing succeeds
2101 self.type = TYPE_Word2003_XML 2199 self.type = TYPE_Word2003_XML
2102 - except KeyboardInterrupt:  
2103 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2104 - raise  
2105 - except: 2200 + except Exception as exc:
2106 # TODO: differentiate exceptions for each parsing stage 2201 # TODO: differentiate exceptions for each parsing stage
2107 - log.exception('Failed XML parsing for file %r' % self.filename)  
2108 - pass 2202 + # (but ET is different libs, no good exception description in API)
  2203 + # found: XMLSyntaxError
  2204 + log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc))
  2205 + log.debug('Trace:', exc_info=True)
2109 2206
2110 def open_mht(self, data): 2207 def open_mht(self, data):
2111 """ 2208 """
@@ -2148,40 +2245,30 @@ class VBA_Parser(object): @@ -2148,40 +2245,30 @@ class VBA_Parser(object):
2148 log.debug('Found ActiveMime header, decompressing MSO container') 2245 log.debug('Found ActiveMime header, decompressing MSO container')
2149 try: 2246 try:
2150 ole_data = mso_file_extract(part_data) 2247 ole_data = mso_file_extract(part_data)
2151 - try:  
2152 - # TODO: check if it is actually an OLE file  
2153 - # TODO: get the MSO filename from content_location?  
2154 - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))  
2155 - except KeyboardInterrupt:  
2156 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2157 - raise  
2158 - except:  
2159 - log.debug('%s does not contain a valid OLE file' % fname)  
2160 - except KeyboardInterrupt:  
2161 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2162 - raise  
2163 - except:  
2164 - log.exception('Failed decompressing an MSO container in %r - %s' 2248 +
  2249 + # TODO: check if it is actually an OLE file
  2250 + # TODO: get the MSO filename from content_location?
  2251 + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
  2252 + except MsoExtractionError:
  2253 + log.info('Failed decompressing an MSO container in %r - %s'
2165 % (fname, MSG_OLEVBA_ISSUES)) 2254 % (fname, MSG_OLEVBA_ISSUES))
  2255 + log.debug('Trace:', exc_info=True)
2166 # TODO: bug here - need to split in smaller functions/classes? 2256 # TODO: bug here - need to split in smaller functions/classes?
  2257 + except FileOpenError as exc:
  2258 + log.debug('%s does not contain a valid OLE file (%s)'
  2259 + % (fname, exc))
2167 else: 2260 else:
  2261 + log.debug('type(part_data) = %s' % type(part_data))
2168 try: 2262 try:
2169 - log.debug('type(part_data) = %s' % type(part_data))  
2170 log.debug('part_data[0:20] = %r' % part_data[0:20]) 2263 log.debug('part_data[0:20] = %r' % part_data[0:20])
2171 - except KeyboardInterrupt:  
2172 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2173 - raise  
2174 - except:  
2175 - pass 2264 + except TypeError as err:
  2265 + log.debug('part_data has no __getitem__')
2176 # set type only if parsing succeeds 2266 # set type only if parsing succeeds
2177 self.type = TYPE_MHTML 2267 self.type = TYPE_MHTML
2178 - except KeyboardInterrupt:  
2179 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2180 - raise  
2181 - except:  
2182 - log.exception('Failed MIME parsing for file %r - %s'  
2183 - % (self.filename, MSG_OLEVBA_ISSUES))  
2184 - pass 2268 + except Exception:
  2269 + log.info('Failed MIME parsing for file %r - %s'
  2270 + % (self.filename, MSG_OLEVBA_ISSUES))
  2271 + log.debug('Trace:', exc_info=True)
2185 2272
2186 2273
2187 def open_text(self, data): 2274 def open_text(self, data):
@@ -2191,19 +2278,11 @@ class VBA_Parser(object): @@ -2191,19 +2278,11 @@ class VBA_Parser(object):
2191 :return: nothing 2278 :return: nothing
2192 """ 2279 """
2193 log.info('Opening text file %s' % self.filename) 2280 log.info('Opening text file %s' % self.filename)
2194 - try:  
2195 - # directly store the source code:  
2196 - self.vba_code_all_modules = data  
2197 - self.contains_macros = True  
2198 - # set type only if parsing succeeds  
2199 - self.type = TYPE_TEXT  
2200 - except KeyboardInterrupt:  
2201 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2202 - raise  
2203 - except:  
2204 - log.exception('Failed text parsing for file %r - %s'  
2205 - % (self.filename, MSG_OLEVBA_ISSUES))  
2206 - pass 2281 + # directly store the source code:
  2282 + self.vba_code_all_modules = data
  2283 + self.contains_macros = True
  2284 + # set type only if parsing succeeds
  2285 + self.type = TYPE_TEXT
2207 2286
2208 2287
2209 def find_vba_projects(self): 2288 def find_vba_projects(self):
@@ -2247,6 +2326,15 @@ class VBA_Parser(object): @@ -2247,6 +2326,15 @@ class VBA_Parser(object):
2247 # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream 2326 # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream
2248 # - all names are case-insensitive 2327 # - all names are case-insensitive
2249 2328
  2329 + def check_vba_stream(ole, vba_root, stream_path):
  2330 + full_path = vba_root + stream_path
  2331 + if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM:
  2332 + log.debug('Found %s stream: %s' % (stream_path, full_path))
  2333 + return full_path
  2334 + else:
  2335 + log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path)
  2336 + return False
  2337 +
2250 # start with an empty list: 2338 # start with an empty list:
2251 self.vba_projects = [] 2339 self.vba_projects = []
2252 # Look for any storage containing those storage/streams: 2340 # Look for any storage containing those storage/streams:
@@ -2263,15 +2351,6 @@ class VBA_Parser(object): @@ -2263,15 +2351,6 @@ class VBA_Parser(object):
2263 vba_root += '/' 2351 vba_root += '/'
2264 log.debug('Checking vba_root="%s"' % vba_root) 2352 log.debug('Checking vba_root="%s"' % vba_root)
2265 2353
2266 - def check_vba_stream(ole, vba_root, stream_path):  
2267 - full_path = vba_root + stream_path  
2268 - if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM:  
2269 - log.debug('Found %s stream: %s' % (stream_path, full_path))  
2270 - return full_path  
2271 - else:  
2272 - log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path)  
2273 - return False  
2274 -  
2275 # Check if the VBA root storage also contains a PROJECT stream: 2354 # Check if the VBA root storage also contains a PROJECT stream:
2276 project_path = check_vba_stream(ole, vba_root, 'PROJECT') 2355 project_path = check_vba_stream(ole, vba_root, 'PROJECT')
2277 if not project_path: continue 2356 if not project_path: continue
@@ -2436,10 +2515,10 @@ class VBA_Parser(object): @@ -2436,10 +2515,10 @@ class VBA_Parser(object):
2436 # variable to merge source code from all modules: 2515 # variable to merge source code from all modules:
2437 if self.vba_code_all_modules is None: 2516 if self.vba_code_all_modules is None:
2438 self.vba_code_all_modules = '' 2517 self.vba_code_all_modules = ''
2439 - for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): 2518 + for (_, _, _, vba_code) in self.extract_all_macros():
2440 #TODO: filter code? (each module) 2519 #TODO: filter code? (each module)
2441 self.vba_code_all_modules += vba_code + '\n' 2520 self.vba_code_all_modules += vba_code + '\n'
2442 - for (subfilename, form_path, form_string) in self.extract_form_strings(): 2521 + for (_, _, form_string) in self.extract_form_strings():
2443 self.vba_code_all_modules += form_string + '\n' 2522 self.vba_code_all_modules += form_string + '\n'
2444 # Analyze the whole code at once: 2523 # Analyze the whole code at once:
2445 scanner = VBA_Scanner(self.vba_code_all_modules) 2524 scanner = VBA_Scanner(self.vba_code_all_modules)
@@ -2587,8 +2666,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2587,8 +2666,7 @@ class VBA_Parser_CLI(VBA_Parser):
2587 def __init__(self, filename, data=None, container=None): 2666 def __init__(self, filename, data=None, container=None):
2588 """ 2667 """
2589 Constructor for VBA_Parser_CLI. 2668 Constructor for VBA_Parser_CLI.
2590 - Calls __init__ from VBA_Parser, but handles the TypeError exception  
2591 - when the file type is not supported. 2669 + Calls __init__ from VBA_Parser
2592 2670
2593 :param filename: filename or path of file to parse, or file-like object 2671 :param filename: filename or path of file to parse, or file-like object
2594 2672
@@ -2599,11 +2677,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2599,11 +2677,7 @@ class VBA_Parser_CLI(VBA_Parser):
2599 :param container: str, path and filename of container if the file is within 2677 :param container: str, path and filename of container if the file is within
2600 a zip archive, None otherwise. 2678 a zip archive, None otherwise.
2601 """ 2679 """
2602 - try:  
2603 - VBA_Parser.__init__(self, filename, data=data, container=container)  
2604 - except TypeError:  
2605 - # in that case, self.type=None  
2606 - pass 2680 + super(VBA_Parser_CLI, self).__init__(filename, data=data, container=container)
2607 2681
2608 2682
2609 def print_analysis(self, show_decoded_strings=False, deobfuscate=False): 2683 def print_analysis(self, show_decoded_strings=False, deobfuscate=False):
@@ -2653,7 +2727,7 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2653,7 +2727,7 @@ class VBA_Parser_CLI(VBA_Parser):
2653 for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)] 2727 for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)]
2654 2728
2655 def process_file(self, show_decoded_strings=False, 2729 def process_file(self, show_decoded_strings=False,
2656 - display_code=True, global_analysis=True, hide_attributes=True, 2730 + display_code=True, hide_attributes=True,
2657 vba_code_only=False, show_deobfuscated_code=False, 2731 vba_code_only=False, show_deobfuscated_code=False,
2658 deobfuscate=False): 2732 deobfuscate=False):
2659 """ 2733 """
@@ -2699,19 +2773,12 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2699,19 +2773,12 @@ class VBA_Parser_CLI(VBA_Parser):
2699 print '(empty macro)' 2773 print '(empty macro)'
2700 else: 2774 else:
2701 print vba_code_filtered 2775 print vba_code_filtered
2702 - if not global_analysis and not vba_code_only:  
2703 - #TODO: remove this option  
2704 - raise NotImplementedError  
2705 - print '- ' * 39  
2706 - print 'ANALYSIS:'  
2707 - # analyse each module's code, filtered to avoid false positives:  
2708 - self.print_analysis(show_decoded_strings, deobfuscate)  
2709 for (subfilename, stream_path, form_string) in self.extract_form_strings(): 2776 for (subfilename, stream_path, form_string) in self.extract_form_strings():
2710 print '-' * 79 2777 print '-' * 79
2711 print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path) 2778 print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)
2712 print '- ' * 39 2779 print '- ' * 39
2713 print form_string 2780 print form_string
2714 - if global_analysis and not vba_code_only: 2781 + if not vba_code_only:
2715 # analyse the code from all modules at once: 2782 # analyse the code from all modules at once:
2716 self.print_analysis(show_decoded_strings, deobfuscate) 2783 self.print_analysis(show_decoded_strings, deobfuscate)
2717 if show_deobfuscated_code: 2784 if show_deobfuscated_code:
@@ -2719,20 +2786,16 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2719,20 +2786,16 @@ class VBA_Parser_CLI(VBA_Parser):
2719 print self.reveal() 2786 print self.reveal()
2720 else: 2787 else:
2721 print 'No VBA macros found.' 2788 print 'No VBA macros found.'
2722 - except KeyboardInterrupt:  
2723 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2724 - raise  
2725 - except: #TypeError:  
2726 - #raise  
2727 - #TODO: print more info if debug mode  
2728 - #print sys.exc_value  
2729 - # display the exception with full stack trace for debugging, but do not stop:  
2730 - traceback.print_exc() 2789 + except Exception as exc:
  2790 + # display the exception with full stack trace for debugging
  2791 + log.info('Error processing file %s (%s)' % (self.filename, exc))
  2792 + log.debug('Traceback:', exc_info=True)
  2793 + raise ProcessingError(self.filename, exc)
2731 print '' 2794 print ''
2732 2795
2733 2796
2734 def process_file_json(self, show_decoded_strings=False, 2797 def process_file_json(self, show_decoded_strings=False,
2735 - display_code=True, global_analysis=True, hide_attributes=True, 2798 + display_code=True, hide_attributes=True,
2736 vba_code_only=False, show_deobfuscated_code=False): 2799 vba_code_only=False, show_deobfuscated_code=False):
2737 """ 2800 """
2738 Process a single file 2801 Process a single file
@@ -2781,27 +2844,19 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2781,27 +2844,19 @@ class VBA_Parser_CLI(VBA_Parser):
2781 curr_macro['ole_stream'] = stream_path 2844 curr_macro['ole_stream'] = stream_path
2782 if display_code: 2845 if display_code:
2783 curr_macro['code'] = vba_code_filtered.strip() 2846 curr_macro['code'] = vba_code_filtered.strip()
2784 - if not global_analysis and not vba_code_only:  
2785 - # analyse each module's code, filtered to avoid false positives:  
2786 - #TODO: remove this option  
2787 - curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings)  
2788 macros.append(curr_macro) 2847 macros.append(curr_macro)
2789 - if global_analysis and not vba_code_only: 2848 + if not vba_code_only:
2790 # analyse the code from all modules at once: 2849 # analyse the code from all modules at once:
2791 result['analysis'] = self.print_analysis_json(show_decoded_strings) 2850 result['analysis'] = self.print_analysis_json(show_decoded_strings)
2792 if show_deobfuscated_code: 2851 if show_deobfuscated_code:
2793 result['code_deobfuscated'] = self.reveal() 2852 result['code_deobfuscated'] = self.reveal()
2794 result['macros'] = macros 2853 result['macros'] = macros
2795 result['json_conversion_successful'] = True 2854 result['json_conversion_successful'] = True
2796 - except KeyboardInterrupt:  
2797 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2798 - raise  
2799 - except: #TypeError:  
2800 - #raise  
2801 - #TODO: print more info if debug mode  
2802 - #print sys.exc_value  
2803 - # display the exception with full stack trace for debugging, but do not stop:  
2804 - traceback.print_exc() 2855 + except Exception as exc:
  2856 + # display the exception with full stack trace for debugging
  2857 + log.info('Error processing file %s (%s)' % (self.filename, exc))
  2858 + log.debug('Traceback:', exc_info=True)
  2859 + raise ProcessingError(self.filename, exc)
2805 2860
2806 return result 2861 return result
2807 2862
@@ -2811,57 +2866,46 @@ class VBA_Parser_CLI(VBA_Parser): @@ -2811,57 +2866,46 @@ class VBA_Parser_CLI(VBA_Parser):
2811 Process a file in triage mode, showing only summary results on one line. 2866 Process a file in triage mode, showing only summary results on one line.
2812 """ 2867 """
2813 #TODO: replace print by writing to a provided output file (sys.stdout by default) 2868 #TODO: replace print by writing to a provided output file (sys.stdout by default)
2814 - message = ''  
2815 try: 2869 try:
2816 - if self.type is not None:  
2817 - #TODO: handle olefile errors, when an OLE file is malformed  
2818 - if self.detect_vba_macros():  
2819 - # print a waiting message only if the output is not redirected to a file:  
2820 - if sys.stdout.isatty():  
2821 - print 'Analysis...\r',  
2822 - sys.stdout.flush()  
2823 - self.analyze_macros(show_decoded_strings=show_decoded_strings,  
2824 - deobfuscate=deobfuscate)  
2825 - flags = TYPE2TAG[self.type]  
2826 - macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'  
2827 - if self.contains_macros: macros = 'M'  
2828 - if self.nb_autoexec: autoexec = 'A'  
2829 - if self.nb_suspicious: suspicious = 'S'  
2830 - if self.nb_iocs: iocs = 'I'  
2831 - if self.nb_hexstrings: hexstrings = 'H'  
2832 - if self.nb_base64strings: base64obf = 'B'  
2833 - if self.nb_dridexstrings: dridex = 'D'  
2834 - if self.nb_vbastrings: vba_obf = 'V'  
2835 - flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,  
2836 - base64obf, dridex, vba_obf)  
2837 - # old table display:  
2838 - # macros = autoexec = suspicious = iocs = hexstrings = 'no'  
2839 - # if nb_macros: macros = 'YES:%d' % nb_macros  
2840 - # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec  
2841 - # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious  
2842 - # if nb_iocs: iocs = 'YES:%d' % nb_iocs  
2843 - # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings  
2844 - # # 2nd line = info  
2845 - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings)  
2846 - else:  
2847 - # self.type==None  
2848 - # file type not OLE nor OpenXML  
2849 - flags = '?'  
2850 - message = 'File format not supported'  
2851 - except KeyboardInterrupt:  
2852 - # do not ignore exceptions when the user presses Ctrl+C/Pause:  
2853 - raise  
2854 - except:  
2855 - # another error occurred  
2856 - #raise  
2857 - #TODO: print more info if debug mode  
2858 - #TODO: distinguish real errors from incorrect file types  
2859 - flags = '!ERROR'  
2860 - message = sys.exc_value  
2861 - line = '%-12s %s' % (flags, self.filename)  
2862 - if message:  
2863 - line += ' - %s' % message  
2864 - print line 2870 + #TODO: handle olefile errors, when an OLE file is malformed
  2871 + if self.detect_vba_macros():
  2872 + # print a waiting message only if the output is not redirected to a file:
  2873 + if sys.stdout.isatty():
  2874 + print 'Analysis...\r',
  2875 + sys.stdout.flush()
  2876 + self.analyze_macros(show_decoded_strings=show_decoded_strings,
  2877 + deobfuscate=deobfuscate)
  2878 + flags = TYPE2TAG[self.type]
  2879 + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
  2880 + if self.contains_macros: macros = 'M'
  2881 + if self.nb_autoexec: autoexec = 'A'
  2882 + if self.nb_suspicious: suspicious = 'S'
  2883 + if self.nb_iocs: iocs = 'I'
  2884 + if self.nb_hexstrings: hexstrings = 'H'
  2885 + if self.nb_base64strings: base64obf = 'B'
  2886 + if self.nb_dridexstrings: dridex = 'D'
  2887 + if self.nb_vbastrings: vba_obf = 'V'
  2888 + flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
  2889 + base64obf, dridex, vba_obf)
  2890 +
  2891 + line = '%-12s %s' % (flags, self.filename)
  2892 + print line
  2893 +
  2894 + # old table display:
  2895 + # macros = autoexec = suspicious = iocs = hexstrings = 'no'
  2896 + # if nb_macros: macros = 'YES:%d' % nb_macros
  2897 + # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
  2898 + # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
  2899 + # if nb_iocs: iocs = 'YES:%d' % nb_iocs
  2900 + # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
  2901 + # # 2nd line = info
  2902 + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings)
  2903 + except Exception as exc:
  2904 + # display the exception with full stack trace for debugging only
  2905 + log.debug('Error processing file %s (%s)' % (self.filename, exc),
  2906 + exc_info=True)
  2907 + raise ProcessingError(self.filename, exc)
  2908 +
2865 2909
2866 # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'), 2910 # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
2867 # header=False, border=False) 2911 # header=False, border=False)
@@ -2883,7 +2927,6 @@ def main(): @@ -2883,7 +2927,6 @@ def main():
2883 """ 2927 """
2884 Main function, called when olevba is run from the command line 2928 Main function, called when olevba is run from the command line
2885 """ 2929 """
2886 - global log  
2887 DEFAULT_LOG_LEVEL = "warning" # Default log level 2930 DEFAULT_LOG_LEVEL = "warning" # Default log level
2888 LOG_LEVELS = { 2931 LOG_LEVELS = {
2889 'debug': logging.DEBUG, 2932 'debug': logging.DEBUG,
@@ -2939,13 +2982,14 @@ def main(): @@ -2939,13 +2982,14 @@ def main():
2939 if len(args) == 0: 2982 if len(args) == 0:
2940 print __doc__ 2983 print __doc__
2941 parser.print_help() 2984 parser.print_help()
2942 - sys.exit() 2985 + sys.exit(RETURN_WRONG_ARGS)
2943 2986
2944 # provide info about tool and its version 2987 # provide info about tool and its version
2945 if options.output_mode == 'json': 2988 if options.output_mode == 'json':
2946 - json_results = [dict(script_name='olevba', version=__version__,  
2947 - url='http://decalage.info/python/oletools',  
2948 - type='MetaInformation'), ] 2989 + # prints opening [
  2990 + print_json(script_name='olevba', version=__version__,
  2991 + url='http://decalage.info/python/oletools',
  2992 + type='MetaInformation')
2949 else: 2993 else:
2950 print 'olevba %s - http://decalage.info/python/oletools' % __version__ 2994 print 'olevba %s - http://decalage.info/python/oletools' % __version__
2951 2995
@@ -2971,65 +3015,120 @@ def main(): @@ -2971,65 +3015,120 @@ def main():
2971 count = 0 3015 count = 0
2972 container = filename = data = None 3016 container = filename = data = None
2973 vba_parser = None 3017 vba_parser = None
2974 - for container, filename, data in xglob.iter_files(args, recursive=options.recursive,  
2975 - zip_password=options.zip_password, zip_fname=options.zip_fname):  
2976 - # ignore directory names stored in zip files:  
2977 - if container and filename.endswith('/'):  
2978 - continue  
2979 - # Open the file  
2980 - vba_parser = VBA_Parser_CLI(filename, data=data, container=container)  
2981 - if options.output_mode == 'detailed':  
2982 - # fully detailed output  
2983 - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,  
2984 - display_code=options.display_code, global_analysis=True, #options.global_analysis,  
2985 - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,  
2986 - show_deobfuscated_code=options.show_deobfuscated_code,  
2987 - deobfuscate=options.deobfuscate)  
2988 - elif options.output_mode in ('triage', 'unspecified'):  
2989 - # print container name when it changes:  
2990 - if container != previous_container:  
2991 - if container is not None:  
2992 - print '\nFiles in %s:' % container  
2993 - previous_container = container  
2994 - # summarized output for triage:  
2995 - vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,  
2996 - deobfuscate=options.deobfuscate)  
2997 - elif options.output_mode == 'json':  
2998 - json_results.append(  
2999 - vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,  
3000 - display_code=options.display_code, global_analysis=True, #options.global_analysis,  
3001 - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,  
3002 - show_deobfuscated_code=options.show_deobfuscated_code))  
3003 - else: # (should be impossible)  
3004 - raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))  
3005 - count += 1  
3006 - if options.output_mode == 'triage':  
3007 - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \  
3008 - 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \  
3009 - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n'  
3010 -  
3011 - if count == 1 and options.output_mode == 'unspecified':  
3012 - # if options -t, -d and -j were not specified and it's a single file, print details:  
3013 - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,  
3014 - display_code=options.display_code, global_analysis=True, #options.global_analysis,  
3015 - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,  
3016 - show_deobfuscated_code=options.show_deobfuscated_code,  
3017 - deobfuscate=options.deobfuscate)  
3018 -  
3019 - if options.output_mode == 'json':  
3020 - json_options = dict(check_circular=False, indent=4, ensure_ascii=False)  
3021 -  
3022 - # json.dump[s] cannot deal with unicode objects that are not properly  
3023 - # encoded --> encode in own function:  
3024 - json_results = json2ascii(json_results)  
3025 - #print_json(json_results)  
3026 -  
3027 - # if False: # options.outfile: # (option currently commented out)  
3028 - # with open(outfile, 'w') as write_handle:  
3029 - # json.dump(write_handle, **json_options)  
3030 - # else:  
3031 - print json.dumps(json_results, **json_options) 3018 + return_code = RETURN_OK
  3019 + try:
  3020 + for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
  3021 + zip_password=options.zip_password, zip_fname=options.zip_fname):
  3022 + # ignore directory names stored in zip files:
  3023 + if container and filename.endswith('/'):
  3024 + continue
  3025 +
  3026 + # handle errors from xglob
  3027 + if isinstance(data, Exception):
  3028 + if isinstance(data, PathNotFoundException):
  3029 + if options.output_mode in ('triage', 'unspecified'):
  3030 + print '%-12s %s - File not found' % ('?', filename)
  3031 + elif options.output_mode != 'json':
  3032 + log.error('Given path %r does not exist!' % filename)
  3033 + return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \
  3034 + else RETURN_SEVERAL_ERRS
  3035 + else:
  3036 + if options.output_mode in ('triage', 'unspecified'):
  3037 + print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container)
  3038 + elif options.output_mode != 'json':
  3039 + log.error('Exception opening/reading %r from zip file %r: %s'
  3040 + % (filename, container, data))
  3041 + return_code = RETURN_XGLOB_ERR if return_code == 0 \
  3042 + else RETURN_SEVERAL_ERRS
  3043 + if options.output_mode == 'json':
  3044 + print_json(file=filename, type='error',
  3045 + error=type(data).__name__, message=str(data))
  3046 + continue
3032 3047
  3048 + try:
  3049 + # Open the file
  3050 + vba_parser = VBA_Parser_CLI(filename, data=data, container=container)
  3051 +
  3052 + if options.output_mode == 'detailed':
  3053 + # fully detailed output
  3054 + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
  3055 + display_code=options.display_code,
  3056 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3057 + show_deobfuscated_code=options.show_deobfuscated_code,
  3058 + deobfuscate=options.deobfuscate)
  3059 + elif options.output_mode in ('triage', 'unspecified'):
  3060 + # print container name when it changes:
  3061 + if container != previous_container:
  3062 + if container is not None:
  3063 + print '\nFiles in %s:' % container
  3064 + previous_container = container
  3065 + # summarized output for triage:
  3066 + vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
  3067 + deobfuscate=options.deobfuscate)
  3068 + elif options.output_mode == 'json':
  3069 + print_json(
  3070 + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
  3071 + display_code=options.display_code,
  3072 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3073 + show_deobfuscated_code=options.show_deobfuscated_code))
  3074 + else: # (should be impossible)
  3075 + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
  3076 + count += 1
  3077 +
  3078 + except FileOpenError as exc:
  3079 + if options.output_mode in ('triage', 'unspecified'):
  3080 + print '%-12s %s - File format not supported' % ('?', filename)
  3081 + elif options.output_mode == 'json':
  3082 + print_json(file=filename, type='error',
  3083 + error=type(exc).__name__, message=str(exc))
  3084 + else:
  3085 + log.exception('Failed to open %s -- probably not supported!' % filename)
  3086 + return_code = RETURN_OPEN_ERROR if return_code == 0 \
  3087 + else RETURN_SEVERAL_ERRS
  3088 + except ProcessingError as exc:
  3089 + if options.output_mode in ('triage', 'unspecified'):
  3090 + print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exception)
  3091 + elif options.output_mode == 'json':
  3092 + print_json(file=filename, type='error',
  3093 + error=type(exc).__name__,
  3094 + message=str(exc.orig_exception))
  3095 + else:
  3096 + log.exception('Error processing file %s (%s)!'
  3097 + % (filename, exc.orig_exception))
  3098 + return_code = RETURN_PARSE_ERROR if return_code == 0 \
  3099 + else RETURN_SEVERAL_ERRS
  3100 + finally:
  3101 + if vba_parser is not None:
  3102 + vba_parser.close()
  3103 +
  3104 + if options.output_mode == 'triage':
  3105 + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
  3106 + 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
  3107 + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n'
  3108 +
  3109 + if count == 1 and options.output_mode == 'unspecified':
  3110 + # if options -t, -d and -j were not specified and it's a single file, print details:
  3111 + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
  3112 + display_code=options.display_code,
  3113 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3114 + show_deobfuscated_code=options.show_deobfuscated_code,
  3115 + deobfuscate=options.deobfuscate)
  3116 +
  3117 + if options.output_mode == 'json':
  3118 + # print last json entry (a last one without a comma) and closing ]
  3119 + print_json(type='MetaInformation', return_code=return_code,
  3120 + n_processed=count, _json_is_last=True)
  3121 +
  3122 + except Exception as exc:
  3123 + # some unexpected error, maybe some of the types caught in except clauses
  3124 + # above were not sufficient. This is very bad, so log complete trace at exception level
  3125 + # and do not care about output mode
  3126 + log.exception('Unhandled exception in main: %s' % exc, exc_info=True)
  3127 + return_code = RETURN_UNEXPECTED # even if there were others before -- this is more important
  3128 +
  3129 + # done. exit
  3130 + log.debug('will exit now with code %s' % return_code)
  3131 + sys.exit(return_code)
3033 3132
3034 if __name__ == '__main__': 3133 if __name__ == '__main__':
3035 main() 3134 main()
oletools/thirdparty/xglob/xglob.py
@@ -60,6 +60,15 @@ __version__ = &#39;0.05&#39; @@ -60,6 +60,15 @@ __version__ = &#39;0.05&#39;
60 60
61 import os, fnmatch, glob, zipfile 61 import os, fnmatch, glob, zipfile
62 62
  63 +#=== EXCEPTIONS ==============================================================
  64 +
  65 +class PathNotFoundException(Exception):
  66 + """ raised if given a fixed file/dir (not a glob) that does not exist """
  67 + def __init__(self, path):
  68 + super(PathNotFoundException, self).__init__(
  69 + 'Given path does not exist: %r' % path)
  70 +
  71 +
63 #=== FUNCTIONS =============================================================== 72 #=== FUNCTIONS ===============================================================
64 73
65 # recursive glob function to find files in any subfolder: 74 # recursive glob function to find files in any subfolder:
@@ -118,8 +127,11 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname=&#39;*&#39;): @@ -118,8 +127,11 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname=&#39;*&#39;):
118 - then files matching zip_fname are opened from the zip archive 127 - then files matching zip_fname are opened from the zip archive
119 128
120 Iterator: yields (container, filename, data) for each file. If zip_password is None, then 129 Iterator: yields (container, filename, data) for each file. If zip_password is None, then
121 - only the filename is returned, container and data=None. Otherwise container si the  
122 - filename of the container (zip file), and data is the file content. 130 + only the filename is returned, container and data=None. Otherwise container is the
  131 + filename of the container (zip file), and data is the file content (or an exception).
  132 + If a given filename is not a glob and does not exist, the triplet
  133 + (None, filename, PathNotFoundException) is yielded. (Globs matching nothing
  134 + do not trigger exceptions)
123 """ 135 """
124 #TODO: catch exceptions and yield them for the caller (no file found, file is not zip, wrong password, etc) 136 #TODO: catch exceptions and yield them for the caller (no file found, file is not zip, wrong password, etc)
125 #TODO: use logging instead of printing 137 #TODO: use logging instead of printing
@@ -131,6 +143,9 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname=&#39;*&#39;): @@ -131,6 +143,9 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname=&#39;*&#39;):
131 else: 143 else:
132 iglob = glob.iglob 144 iglob = glob.iglob
133 for filespec in files: 145 for filespec in files:
  146 + if not is_glob(filespec) and not os.path.exists(filespec):
  147 + yield None, filespec, PathNotFoundException(filespec)
  148 + continue
134 for filename in iglob(filespec): 149 for filename in iglob(filespec):
135 if zip_password is not None: 150 if zip_password is not None:
136 # Each file is expected to be a zip archive: 151 # Each file is expected to be a zip archive:
@@ -153,3 +168,39 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname=&#39;*&#39;): @@ -153,3 +168,39 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname=&#39;*&#39;):
153 #data = open(filename, 'rb').read() 168 #data = open(filename, 'rb').read()
154 #yield None, filename, data 169 #yield None, filename, data
155 170
  171 +
  172 +def is_glob(filespec):
  173 + """ determine if given file specification is a single file name or a glob
  174 +
  175 + python's glob and fnmatch can only interpret ?, *, [list], and [ra-nge],
  176 + (and combinations: hex_*_[A-Fabcdef0-9]).
  177 + The special chars *?[-] can only be escaped using []
  178 + --> file_name is not a glob
  179 + --> file?name is a glob
  180 + --> file* is a glob
  181 + --> file[-._]name is a glob
  182 + --> file[?]name is not a glob (matches literal "file?name")
  183 + --> file[*]name is not a glob (matches literal "file*name")
  184 + --> file[-]name is not a glob (matches literal "file-name")
  185 + --> file-name is not a glob
  186 +
  187 + Also, obviously incorrect globs are treated as non-globs
  188 + --> file[name is not a glob (matches literal "file[name")
  189 + --> file]-[name is treated as a glob
  190 + (it is not a valid glob but detecting errors like this requires
  191 + sophisticated regular expression matching)
  192 +
  193 + Python's glob also works with globs in directory-part of path
  194 + --> dir-part of path is analyzed just like filename-part
  195 + --> thirdparty/*/xglob.py is a (valid) glob
  196 +
  197 + TODO: create a correct regexp to test for validity of ranges
  198 + """
  199 +
  200 + # remove escaped special chars
  201 + cleaned = filespec.replace('[*]', '').replace('[?]', '') \
  202 + .replace('[[]', '').replace('[]]', '').replace('[-]', '')
  203 +
  204 + # check if special chars remain
  205 + return '*' in cleaned or '?' in cleaned or \
  206 + ('[' in cleaned and ']' in cleaned)