Commit ffa04426f10dfe4cd8a805fe35a85f6121694213

Authored by Philippe Lagadec
2 parents 5e019d00 b0033e5f

olevba: many improvements and fixes by Christian Herdtweck (exit code, exception…

… handling, JSON output)
oletools/olevba.py
... ... @@ -76,7 +76,7 @@ https://github.com/unixfreak0037/officeparser
76 76 # CHANGELOG:
77 77 # 2014-08-05 v0.01 PL: - first version based on officeparser code
78 78 # 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser
79   -# 2014-08-15 PL: - fixed incorrect value check in PROJECTHELPFILEPATH Record
  79 +# 2014-08-15 PL: - fixed incorrect value check in projecthelpfilepath Record
80 80 # 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats
81 81 # and to find the VBA project root anywhere in the file
82 82 # 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL
... ... @@ -169,6 +169,9 @@ https://github.com/unixfreak0037/officeparser
169 169 # 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate
170 170 # - updated suspicious keywords
171 171 # 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans
  172 +# 2016-04-28 CH: - return an exit code depending on the results
  173 +# - improved error and exception handling
  174 +# - improved JSON output
172 175  
173 176 __version__ = '0.47'
174 177  
... ... @@ -212,10 +215,8 @@ import math
212 215 import zipfile
213 216 import re
214 217 import optparse
215   -import os.path
216 218 import binascii
217 219 import base64
218   -import traceback
219 220 import zlib
220 221 import email # for MHTML parsing
221 222 import string # for printable
... ... @@ -240,8 +241,12 @@ except ImportError:
240 241  
241 242 import thirdparty.olefile as olefile
242 243 from thirdparty.prettytable import prettytable
243   -from thirdparty.xglob import xglob
244   -from thirdparty.pyparsing.pyparsing import *
  244 +from thirdparty.xglob import xglob, PathNotFoundException
  245 +from thirdparty.pyparsing.pyparsing import \
  246 + CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \
  247 + Optional, QuotedString,Regex, Suppress, Word, WordStart, \
  248 + alphanums, alphas, hexnums,nums, opAssoc, srange, \
  249 + infixNotation
245 250  
246 251 # monkeypatch email to fix issue #32:
247 252 # allow header lines without ":"
... ... @@ -291,8 +296,51 @@ def get_logger(name, level=logging.CRITICAL+1):
291 296 log = get_logger('olevba')
292 297  
293 298  
  299 +#=== EXCEPTIONS ==============================================================
  300 +
  301 +class FileOpenError(Exception):
  302 + """ raised by VBA_Parser constructor if all open_... attempts failed
  303 +
  304 + probably means the file type is not supported
  305 + """
  306 +
  307 + def __init__(self, filename):
  308 + super(FileOpenError, self).__init__(
  309 + 'Failed to open file %s ... probably not supported' % filename)
  310 + self.filename = filename
  311 +
  312 +
  313 +class ProcessingError(Exception):
  314 + """ raised by VBA_Parser.process_file* functions """
  315 +
  316 + def __init__(self, filename, orig_exception):
  317 + super(ProcessingError, self).__init__(
  318 + 'Error processing file %s (%s)' % (filename, orig_exception))
  319 + self.filename = filename
  320 + self.orig_exception = orig_exception
  321 +
  322 +
  323 +class MsoExtractionError(RuntimeError):
  324 + """ raised by mso_file_extract if parsing MSO/ActiveMIME data failed """
  325 +
  326 + def __init__(self, msg):
  327 + super(MsoExtractionError, self).__init__(msg)
  328 + self.msg = msg
  329 +
  330 +
294 331 #--- CONSTANTS ----------------------------------------------------------------
295 332  
  333 +# return codes
  334 +RETURN_OK = 0
  335 +RETURN_WARNINGS = 1 # (reserved, not used yet)
  336 +RETURN_WRONG_ARGS = 2 # (fixed, built into optparse)
  337 +RETURN_FILE_NOT_FOUND = 3
  338 +RETURN_XGLOB_ERR = 4
  339 +RETURN_OPEN_ERROR = 5
  340 +RETURN_PARSE_ERROR = 6
  341 +RETURN_SEVERAL_ERRS = 7
  342 +RETURN_UNEXPECTED = 8
  343 +
296 344 # URL and message to report issues:
297 345 URL_OLEVBA_ISSUES = 'https://bitbucket.org/decalage/oletools/issues'
298 346 MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES
... ... @@ -846,36 +894,37 @@ def mso_file_extract(data):
846 894 :param data: bytes string, MSO/ActiveMime file content
847 895 :return: bytes string, extracted data (uncompressed)
848 896  
849   - raise a RuntimeError if the data cannot be extracted
  897 + raise a MsoExtractionError if the data cannot be extracted
850 898 """
851 899 # check the magic:
852 900 assert is_mso_file(data)
  901 +
  902 + # In all the samples seen so far, Word always uses an offset of 0x32,
  903 + # and Excel 0x22A. But we read the offset from the header to be more
  904 + # generic.
  905 + offsets = [0x32, 0x22A]
  906 +
853 907 # First, attempt to get the compressed data offset from the header
854 908 # According to my tests, it should be an unsigned 16 bits integer,
855 909 # at offset 0x1E (little endian) + add 46:
856 910 try:
857 911 offset = struct.unpack_from('<H', data, offset=0x1E)[0] + 46
858 912 log.debug('Parsing MSO file: data offset = 0x%X' % offset)
859   - except KeyboardInterrupt:
860   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
861   - raise
862   - except:
863   - log.exception('Unable to parse MSO/ActiveMime file header')
864   - raise RuntimeError('Unable to parse MSO/ActiveMime file header')
865   - # In all the samples seen so far, Word always uses an offset of 0x32,
866   - # and Excel 0x22A. But we read the offset from the header to be more
867   - # generic.
868   - # Let's try that offset, then 0x32 and 0x22A, just in case:
869   - for start in (offset, 0x32, 0x22A):
  913 + offsets.insert(0, offset) # insert at beginning of offsets
  914 + except struct.error as exc:
  915 + log.info('Unable to parse MSO/ActiveMime file header (%s)' % exc)
  916 + log.debug('Trace:', exc_info=True)
  917 + raise MsoExtractionError('Unable to parse MSO/ActiveMime file header')
  918 + # now try offsets
  919 + for start in offsets:
870 920 try:
871 921 log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start)
872 922 extracted_data = zlib.decompress(data[start:])
873 923 return extracted_data
874   - except KeyboardInterrupt:
875   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
876   - raise
877   - except:
878   - log.exception('zlib decompression failed')
  924 + except zlib.error as exc:
  925 + log.info('zlib decompression failed for offset %s (%s)'
  926 + % (start, exc))
  927 + log.debug('Trace:', exc_info=True)
879 928 # None of the guessed offsets worked, let's try brute-forcing by looking
880 929 # for potential zlib-compressed blocks starting with 0x78:
881 930 log.debug('Looking for potential zlib-compressed blocks in MSO file')
... ... @@ -885,12 +934,10 @@ def mso_file_extract(data):
885 934 log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start)
886 935 extracted_data = zlib.decompress(data[start:])
887 936 return extracted_data
888   - except KeyboardInterrupt:
889   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
890   - raise
891   - except:
892   - log.exception('zlib decompression failed')
893   - raise RuntimeError('Unable to decompress data from a MSO/ActiveMime file')
  937 + except zlib.error as exc:
  938 + log.info('zlib decompression failed (%s)' % exc)
  939 + log.debug('Trace:', exc_info=True)
  940 + raise MsoExtractionError('Unable to decompress data from a MSO/ActiveMime file')
894 941  
895 942  
896 943 #--- FUNCTIONS ----------------------------------------------------------------
... ... @@ -911,29 +958,6 @@ def is_printable(s):
911 958 return set(s).issubset(_PRINTABLE_SET)
912 959  
913 960  
914   -def print_json(j):
915   - """
916   - Print a dictionary, a list or any other object to stdout
917   - :param j: object to be printed
918   - :return:
919   - """
920   - if isinstance(j, dict):
921   - for key, val in j.items():
922   - print_json(key)
923   - print_json(val)
924   - elif isinstance(j, list):
925   - for elem in j:
926   - print_json(elem)
927   - else:
928   - try:
929   - if len(j) > 20:
930   - print type(j), repr(j[:20]), '...(len {0})'.format(len(j))
931   - else:
932   - print type(j), repr(j)
933   - except TypeError:
934   - print type(j), repr(j)
935   -
936   -
937 961 def copytoken_help(decompressed_current, decompressed_chunk_start):
938 962 """
939 963 compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help
... ... @@ -1057,7 +1081,7 @@ def decompress_stream(compressed_container):
1057 1081 copy_token = \
1058 1082 struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0]
1059 1083 #TODO: check this
1060   - length_mask, offset_mask, bit_count, maximum_length = copytoken_help(
  1084 + length_mask, offset_mask, bit_count, _ = copytoken_help(
1061 1085 len(decompressed_container), decompressed_chunk_start)
1062 1086 length = (copy_token & length_mask) + 3
1063 1087 temp1 = copy_token & offset_mask
... ... @@ -1136,122 +1160,130 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1136 1160 dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed))
1137 1161  
1138 1162 # PROJECTSYSKIND Record
1139   - PROJECTSYSKIND_Id = struct.unpack("<H", dir_stream.read(2))[0]
1140   - check_value('PROJECTSYSKIND_Id', 0x0001, PROJECTSYSKIND_Id)
1141   - PROJECTSYSKIND_Size = struct.unpack("<L", dir_stream.read(4))[0]
1142   - check_value('PROJECTSYSKIND_Size', 0x0004, PROJECTSYSKIND_Size)
1143   - PROJECTSYSKIND_SysKind = struct.unpack("<L", dir_stream.read(4))[0]
1144   - if PROJECTSYSKIND_SysKind == 0x00:
  1163 + projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0]
  1164 + check_value('PROJECTSYSKIND_Id', 0x0001, projectsyskind_id)
  1165 + projectsyskind_size = struct.unpack("<L", dir_stream.read(4))[0]
  1166 + check_value('PROJECTSYSKIND_Size', 0x0004, projectsyskind_size)
  1167 + projectsyskind_syskind = struct.unpack("<L", dir_stream.read(4))[0]
  1168 + if projectsyskind_syskind == 0x00:
1145 1169 log.debug("16-bit Windows")
1146   - elif PROJECTSYSKIND_SysKind == 0x01:
  1170 + elif projectsyskind_syskind == 0x01:
1147 1171 log.debug("32-bit Windows")
1148   - elif PROJECTSYSKIND_SysKind == 0x02:
  1172 + elif projectsyskind_syskind == 0x02:
1149 1173 log.debug("Macintosh")
1150   - elif PROJECTSYSKIND_SysKind == 0x03:
  1174 + elif projectsyskind_syskind == 0x03:
1151 1175 log.debug("64-bit Windows")
1152 1176 else:
1153   - log.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(PROJECTSYSKIND_SysKind))
  1177 + log.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(projectsyskind_syskind))
1154 1178  
1155 1179 # PROJECTLCID Record
1156   - PROJECTLCID_Id = struct.unpack("<H", dir_stream.read(2))[0]
1157   - check_value('PROJECTLCID_Id', 0x0002, PROJECTLCID_Id)
1158   - PROJECTLCID_Size = struct.unpack("<L", dir_stream.read(4))[0]
1159   - check_value('PROJECTLCID_Size', 0x0004, PROJECTLCID_Size)
1160   - PROJECTLCID_Lcid = struct.unpack("<L", dir_stream.read(4))[0]
1161   - check_value('PROJECTLCID_Lcid', 0x409, PROJECTLCID_Lcid)
  1180 + projectlcid_id = struct.unpack("<H", dir_stream.read(2))[0]
  1181 + check_value('PROJECTLCID_Id', 0x0002, projectlcid_id)
  1182 + projectlcid_size = struct.unpack("<L", dir_stream.read(4))[0]
  1183 + check_value('PROJECTLCID_Size', 0x0004, projectlcid_size)
  1184 + projectlcid_lcid = struct.unpack("<L", dir_stream.read(4))[0]
  1185 + check_value('PROJECTLCID_Lcid', 0x409, projectlcid_lcid)
1162 1186  
1163 1187 # PROJECTLCIDINVOKE Record
1164   - PROJECTLCIDINVOKE_Id = struct.unpack("<H", dir_stream.read(2))[0]
1165   - check_value('PROJECTLCIDINVOKE_Id', 0x0014, PROJECTLCIDINVOKE_Id)
1166   - PROJECTLCIDINVOKE_Size = struct.unpack("<L", dir_stream.read(4))[0]
1167   - check_value('PROJECTLCIDINVOKE_Size', 0x0004, PROJECTLCIDINVOKE_Size)
1168   - PROJECTLCIDINVOKE_LcidInvoke = struct.unpack("<L", dir_stream.read(4))[0]
1169   - check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, PROJECTLCIDINVOKE_LcidInvoke)
  1188 + projectlcidinvoke_id = struct.unpack("<H", dir_stream.read(2))[0]
  1189 + check_value('PROJECTLCIDINVOKE_Id', 0x0014, projectlcidinvoke_id)
  1190 + projectlcidinvoke_size = struct.unpack("<L", dir_stream.read(4))[0]
  1191 + check_value('PROJECTLCIDINVOKE_Size', 0x0004, projectlcidinvoke_size)
  1192 + projectlcidinvoke_lcidinvoke = struct.unpack("<L", dir_stream.read(4))[0]
  1193 + check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, projectlcidinvoke_lcidinvoke)
1170 1194  
1171 1195 # PROJECTCODEPAGE Record
1172   - PROJECTCODEPAGE_Id = struct.unpack("<H", dir_stream.read(2))[0]
1173   - check_value('PROJECTCODEPAGE_Id', 0x0003, PROJECTCODEPAGE_Id)
1174   - PROJECTCODEPAGE_Size = struct.unpack("<L", dir_stream.read(4))[0]
1175   - check_value('PROJECTCODEPAGE_Size', 0x0002, PROJECTCODEPAGE_Size)
1176   - PROJECTCODEPAGE_CodePage = struct.unpack("<H", dir_stream.read(2))[0]
  1196 + projectcodepage_id = struct.unpack("<H", dir_stream.read(2))[0]
  1197 + check_value('PROJECTCODEPAGE_Id', 0x0003, projectcodepage_id)
  1198 + projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0]
  1199 + check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size)
  1200 + projectcodepage_codepage = struct.unpack("<H", dir_stream.read(2))[0]
1177 1201  
1178 1202 # PROJECTNAME Record
1179   - PROJECTNAME_Id = struct.unpack("<H", dir_stream.read(2))[0]
1180   - check_value('PROJECTNAME_Id', 0x0004, PROJECTNAME_Id)
1181   - PROJECTNAME_SizeOfProjectName = struct.unpack("<L", dir_stream.read(4))[0]
1182   - if PROJECTNAME_SizeOfProjectName < 1 or PROJECTNAME_SizeOfProjectName > 128:
1183   - log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(PROJECTNAME_SizeOfProjectName))
1184   - PROJECTNAME_ProjectName = dir_stream.read(PROJECTNAME_SizeOfProjectName)
  1203 + projectname_id = struct.unpack("<H", dir_stream.read(2))[0]
  1204 + check_value('PROJECTNAME_Id', 0x0004, projectname_id)
  1205 + projectname_sizeof_projectname = struct.unpack("<L", dir_stream.read(4))[0]
  1206 + if projectname_sizeof_projectname < 1 or projectname_sizeof_projectname > 128:
  1207 + log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(projectname_sizeof_projectname))
  1208 + projectname_projectname = dir_stream.read(projectname_sizeof_projectname)
  1209 + unused = projectname_projectname
1185 1210  
1186 1211 # PROJECTDOCSTRING Record
1187   - PROJECTDOCSTRING_Id = struct.unpack("<H", dir_stream.read(2))[0]
1188   - check_value('PROJECTDOCSTRING_Id', 0x0005, PROJECTDOCSTRING_Id)
1189   - PROJECTDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0]
1190   - if PROJECTNAME_SizeOfProjectName > 2000:
  1212 + projectdocstring_id = struct.unpack("<H", dir_stream.read(2))[0]
  1213 + check_value('PROJECTDOCSTRING_Id', 0x0005, projectdocstring_id)
  1214 + projectdocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0]
  1215 + if projectdocstring_sizeof_docstring > 2000:
1191 1216 log.error(
1192   - "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString))
1193   - PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString)
1194   - PROJECTDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
1195   - check_value('PROJECTDOCSTRING_Reserved', 0x0040, PROJECTDOCSTRING_Reserved)
1196   - PROJECTDOCSTRING_SizeOfDocStringUnicode = struct.unpack("<L", dir_stream.read(4))[0]
1197   - if PROJECTDOCSTRING_SizeOfDocStringUnicode % 2 != 0:
  1217 + "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring))
  1218 + projectdocstring_docstring = dir_stream.read(projectdocstring_sizeof_docstring)
  1219 + projectdocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1220 + check_value('PROJECTDOCSTRING_Reserved', 0x0040, projectdocstring_reserved)
  1221 + projectdocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1222 + if projectdocstring_sizeof_docstring_unicode % 2 != 0:
1198 1223 log.error("PROJECTDOCSTRING_SizeOfDocStringUnicode is not even")
1199   - PROJECTDOCSTRING_DocStringUnicode = dir_stream.read(PROJECTDOCSTRING_SizeOfDocStringUnicode)
  1224 + projectdocstring_docstring_unicode = dir_stream.read(projectdocstring_sizeof_docstring_unicode)
  1225 + unused = projectdocstring_docstring
  1226 + unused = projectdocstring_docstring_unicode
1200 1227  
1201 1228 # PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7
1202   - PROJECTHELPFILEPATH_Id = struct.unpack("<H", dir_stream.read(2))[0]
1203   - check_value('PROJECTHELPFILEPATH_Id', 0x0006, PROJECTHELPFILEPATH_Id)
1204   - PROJECTHELPFILEPATH_SizeOfHelpFile1 = struct.unpack("<L", dir_stream.read(4))[0]
1205   - if PROJECTHELPFILEPATH_SizeOfHelpFile1 > 260:
  1229 + projecthelpfilepath_id = struct.unpack("<H", dir_stream.read(2))[0]
  1230 + check_value('PROJECTHELPFILEPATH_Id', 0x0006, projecthelpfilepath_id)
  1231 + projecthelpfilepath_sizeof_helpfile1 = struct.unpack("<L", dir_stream.read(4))[0]
  1232 + if projecthelpfilepath_sizeof_helpfile1 > 260:
1206 1233 log.error(
1207   - "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1))
1208   - PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1)
1209   - PROJECTHELPFILEPATH_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
1210   - check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, PROJECTHELPFILEPATH_Reserved)
1211   - PROJECTHELPFILEPATH_SizeOfHelpFile2 = struct.unpack("<L", dir_stream.read(4))[0]
1212   - if PROJECTHELPFILEPATH_SizeOfHelpFile2 != PROJECTHELPFILEPATH_SizeOfHelpFile1:
  1234 + "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1))
  1235 + projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1)
  1236 + projecthelpfilepath_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1237 + check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, projecthelpfilepath_reserved)
  1238 + projecthelpfilepath_sizeof_helpfile2 = struct.unpack("<L", dir_stream.read(4))[0]
  1239 + if projecthelpfilepath_sizeof_helpfile2 != projecthelpfilepath_sizeof_helpfile1:
1213 1240 log.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 does not equal PROJECTHELPFILEPATH_SizeOfHelpFile2")
1214   - PROJECTHELPFILEPATH_HelpFile2 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile2)
1215   - if PROJECTHELPFILEPATH_HelpFile2 != PROJECTHELPFILEPATH_HelpFile1:
  1241 + projecthelpfilepath_helpfile2 = dir_stream.read(projecthelpfilepath_sizeof_helpfile2)
  1242 + if projecthelpfilepath_helpfile2 != projecthelpfilepath_helpfile1:
1216 1243 log.error("PROJECTHELPFILEPATH_HelpFile1 does not equal PROJECTHELPFILEPATH_HelpFile2")
1217 1244  
1218 1245 # PROJECTHELPCONTEXT Record
1219   - PROJECTHELPCONTEXT_Id = struct.unpack("<H", dir_stream.read(2))[0]
1220   - check_value('PROJECTHELPCONTEXT_Id', 0x0007, PROJECTHELPCONTEXT_Id)
1221   - PROJECTHELPCONTEXT_Size = struct.unpack("<L", dir_stream.read(4))[0]
1222   - check_value('PROJECTHELPCONTEXT_Size', 0x0004, PROJECTHELPCONTEXT_Size)
1223   - PROJECTHELPCONTEXT_HelpContext = struct.unpack("<L", dir_stream.read(4))[0]
  1246 + projecthelpcontext_id = struct.unpack("<H", dir_stream.read(2))[0]
  1247 + check_value('PROJECTHELPCONTEXT_Id', 0x0007, projecthelpcontext_id)
  1248 + projecthelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0]
  1249 + check_value('PROJECTHELPCONTEXT_Size', 0x0004, projecthelpcontext_size)
  1250 + projecthelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0]
  1251 + unused = projecthelpcontext_helpcontext
1224 1252  
1225 1253 # PROJECTLIBFLAGS Record
1226   - PROJECTLIBFLAGS_Id = struct.unpack("<H", dir_stream.read(2))[0]
1227   - check_value('PROJECTLIBFLAGS_Id', 0x0008, PROJECTLIBFLAGS_Id)
1228   - PROJECTLIBFLAGS_Size = struct.unpack("<L", dir_stream.read(4))[0]
1229   - check_value('PROJECTLIBFLAGS_Size', 0x0004, PROJECTLIBFLAGS_Size)
1230   - PROJECTLIBFLAGS_ProjectLibFlags = struct.unpack("<L", dir_stream.read(4))[0]
1231   - check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, PROJECTLIBFLAGS_ProjectLibFlags)
  1254 + projectlibflags_id = struct.unpack("<H", dir_stream.read(2))[0]
  1255 + check_value('PROJECTLIBFLAGS_Id', 0x0008, projectlibflags_id)
  1256 + projectlibflags_size = struct.unpack("<L", dir_stream.read(4))[0]
  1257 + check_value('PROJECTLIBFLAGS_Size', 0x0004, projectlibflags_size)
  1258 + projectlibflags_projectlibflags = struct.unpack("<L", dir_stream.read(4))[0]
  1259 + check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, projectlibflags_projectlibflags)
1232 1260  
1233 1261 # PROJECTVERSION Record
1234   - PROJECTVERSION_Id = struct.unpack("<H", dir_stream.read(2))[0]
1235   - check_value('PROJECTVERSION_Id', 0x0009, PROJECTVERSION_Id)
1236   - PROJECTVERSION_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
1237   - check_value('PROJECTVERSION_Reserved', 0x0004, PROJECTVERSION_Reserved)
1238   - PROJECTVERSION_VersionMajor = struct.unpack("<L", dir_stream.read(4))[0]
1239   - PROJECTVERSION_VersionMinor = struct.unpack("<H", dir_stream.read(2))[0]
  1262 + projectversion_id = struct.unpack("<H", dir_stream.read(2))[0]
  1263 + check_value('PROJECTVERSION_Id', 0x0009, projectversion_id)
  1264 + projectversion_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1265 + check_value('PROJECTVERSION_Reserved', 0x0004, projectversion_reserved)
  1266 + projectversion_versionmajor = struct.unpack("<L", dir_stream.read(4))[0]
  1267 + projectversion_versionminor = struct.unpack("<H", dir_stream.read(2))[0]
  1268 + unused = projectversion_versionmajor
  1269 + unused = projectversion_versionminor
1240 1270  
1241 1271 # PROJECTCONSTANTS Record
1242   - PROJECTCONSTANTS_Id = struct.unpack("<H", dir_stream.read(2))[0]
1243   - check_value('PROJECTCONSTANTS_Id', 0x000C, PROJECTCONSTANTS_Id)
1244   - PROJECTCONSTANTS_SizeOfConstants = struct.unpack("<L", dir_stream.read(4))[0]
1245   - if PROJECTCONSTANTS_SizeOfConstants > 1015:
  1272 + projectconstants_id = struct.unpack("<H", dir_stream.read(2))[0]
  1273 + check_value('PROJECTCONSTANTS_Id', 0x000C, projectconstants_id)
  1274 + projectconstants_sizeof_constants = struct.unpack("<L", dir_stream.read(4))[0]
  1275 + if projectconstants_sizeof_constants > 1015:
1246 1276 log.error(
1247   - "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants))
1248   - PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants)
1249   - PROJECTCONSTANTS_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
1250   - check_value('PROJECTCONSTANTS_Reserved', 0x003C, PROJECTCONSTANTS_Reserved)
1251   - PROJECTCONSTANTS_SizeOfConstantsUnicode = struct.unpack("<L", dir_stream.read(4))[0]
1252   - if PROJECTCONSTANTS_SizeOfConstantsUnicode % 2 != 0:
  1277 + "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants))
  1278 + projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants)
  1279 + projectconstants_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1280 + check_value('PROJECTCONSTANTS_Reserved', 0x003C, projectconstants_reserved)
  1281 + projectconstants_sizeof_constants_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1282 + if projectconstants_sizeof_constants_unicode % 2 != 0:
1253 1283 log.error("PROJECTCONSTANTS_SizeOfConstantsUnicode is not even")
1254   - PROJECTCONSTANTS_ConstantsUnicode = dir_stream.read(PROJECTCONSTANTS_SizeOfConstantsUnicode)
  1284 + projectconstants_constants_unicode = dir_stream.read(projectconstants_sizeof_constants_unicode)
  1285 + unused = projectconstants_constants
  1286 + unused = projectconstants_constants_unicode
1255 1287  
1256 1288 # array of REFERENCE records
1257 1289 check = None
... ... @@ -1263,194 +1295,230 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1263 1295  
1264 1296 if check == 0x0016:
1265 1297 # REFERENCENAME
1266   - REFERENCE_Id = check
1267   - REFERENCE_SizeOfName = struct.unpack("<L", dir_stream.read(4))[0]
1268   - REFERENCE_Name = dir_stream.read(REFERENCE_SizeOfName)
1269   - REFERENCE_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
1270   - check_value('REFERENCE_Reserved', 0x003E, REFERENCE_Reserved)
1271   - REFERENCE_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]
1272   - REFERENCE_NameUnicode = dir_stream.read(REFERENCE_SizeOfNameUnicode)
  1298 + reference_id = check
  1299 + reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0]
  1300 + reference_name = dir_stream.read(reference_sizeof_name)
  1301 + reference_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1302 + check_value('REFERENCE_Reserved', 0x003E, reference_reserved)
  1303 + reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1304 + reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode)
  1305 + unused = reference_id
  1306 + unused = reference_name
  1307 + unused = reference_name_unicode
1273 1308 continue
1274 1309  
1275 1310 if check == 0x0033:
1276 1311 # REFERENCEORIGINAL (followed by REFERENCECONTROL)
1277   - REFERENCEORIGINAL_Id = check
1278   - REFERENCEORIGINAL_SizeOfLibidOriginal = struct.unpack("<L", dir_stream.read(4))[0]
1279   - REFERENCEORIGINAL_LibidOriginal = dir_stream.read(REFERENCEORIGINAL_SizeOfLibidOriginal)
  1312 + referenceoriginal_id = check
  1313 + referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0]
  1314 + referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal)
  1315 + unused = referenceoriginal_id
  1316 + unused = referenceoriginal_libidoriginal
1280 1317 continue
1281 1318  
1282 1319 if check == 0x002F:
1283 1320 # REFERENCECONTROL
1284   - REFERENCECONTROL_Id = check
1285   - REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore
1286   - REFERENCECONTROL_SizeOfLibidTwiddled = struct.unpack("<L", dir_stream.read(4))[0]
1287   - REFERENCECONTROL_LibidTwiddled = dir_stream.read(REFERENCECONTROL_SizeOfLibidTwiddled)
1288   - REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore
1289   - check_value('REFERENCECONTROL_Reserved1', 0x0000, REFERENCECONTROL_Reserved1)
1290   - REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore
1291   - check_value('REFERENCECONTROL_Reserved2', 0x0000, REFERENCECONTROL_Reserved2)
  1321 + referencecontrol_id = check
  1322 + referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore
  1323 + referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0]
  1324 + referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled)
  1325 + referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore
  1326 + check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1)
  1327 + referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore
  1328 + check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2)
  1329 + unused = referencecontrol_id
  1330 + unused = referencecontrol_sizetwiddled
  1331 + unused = referencecontrol_libidtwiddled
1292 1332 # optional field
1293 1333 check2 = struct.unpack("<H", dir_stream.read(2))[0]
1294 1334 if check2 == 0x0016:
1295   - REFERENCECONTROL_NameRecordExtended_Id = check
1296   - REFERENCECONTROL_NameRecordExtended_SizeofName = struct.unpack("<L", dir_stream.read(4))[0]
1297   - REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read(
1298   - REFERENCECONTROL_NameRecordExtended_SizeofName)
1299   - REFERENCECONTROL_NameRecordExtended_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1335 + referencecontrol_namerecordextended_id = check
  1336 + referencecontrol_namerecordextended_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0]
  1337 + referencecontrol_namerecordextended_name = dir_stream.read(
  1338 + referencecontrol_namerecordextended_sizeof_name)
  1339 + referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0]
1300 1340 check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E,
1301   - REFERENCECONTROL_NameRecordExtended_Reserved)
1302   - REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]
1303   - REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read(
1304   - REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode)
1305   - REFERENCECONTROL_Reserved3 = struct.unpack("<H", dir_stream.read(2))[0]
  1341 + referencecontrol_namerecordextended_reserved)
  1342 + referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1343 + referencecontrol_namerecordextended_name_unicode = dir_stream.read(
  1344 + referencecontrol_namerecordextended_sizeof_name_unicode)
  1345 + referencecontrol_reserved3 = struct.unpack("<H", dir_stream.read(2))[0]
  1346 + unused = referencecontrol_namerecordextended_id
  1347 + unused = referencecontrol_namerecordextended_name
  1348 + unused = referencecontrol_namerecordextended_name_unicode
1306 1349 else:
1307   - REFERENCECONTROL_Reserved3 = check2
1308   -
1309   - check_value('REFERENCECONTROL_Reserved3', 0x0030, REFERENCECONTROL_Reserved3)
1310   - REFERENCECONTROL_SizeExtended = struct.unpack("<L", dir_stream.read(4))[0]
1311   - REFERENCECONTROL_SizeOfLibidExtended = struct.unpack("<L", dir_stream.read(4))[0]
1312   - REFERENCECONTROL_LibidExtended = dir_stream.read(REFERENCECONTROL_SizeOfLibidExtended)
1313   - REFERENCECONTROL_Reserved4 = struct.unpack("<L", dir_stream.read(4))[0]
1314   - REFERENCECONTROL_Reserved5 = struct.unpack("<H", dir_stream.read(2))[0]
1315   - REFERENCECONTROL_OriginalTypeLib = dir_stream.read(16)
1316   - REFERENCECONTROL_Cookie = struct.unpack("<L", dir_stream.read(4))[0]
  1350 + referencecontrol_reserved3 = check2
  1351 +
  1352 + check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3)
  1353 + referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0]
  1354 + referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0]
  1355 + referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended)
  1356 + referencecontrol_reserved4 = struct.unpack("<L", dir_stream.read(4))[0]
  1357 + referencecontrol_reserved5 = struct.unpack("<H", dir_stream.read(2))[0]
  1358 + referencecontrol_originaltypelib = dir_stream.read(16)
  1359 + referencecontrol_cookie = struct.unpack("<L", dir_stream.read(4))[0]
  1360 + unused = referencecontrol_sizeextended
  1361 + unused = referencecontrol_libidextended
  1362 + unused = referencecontrol_reserved4
  1363 + unused = referencecontrol_reserved5
  1364 + unused = referencecontrol_originaltypelib
  1365 + unused = referencecontrol_cookie
1317 1366 continue
1318 1367  
1319 1368 if check == 0x000D:
1320 1369 # REFERENCEREGISTERED
1321   - REFERENCEREGISTERED_Id = check
1322   - REFERENCEREGISTERED_Size = struct.unpack("<L", dir_stream.read(4))[0]
1323   - REFERENCEREGISTERED_SizeOfLibid = struct.unpack("<L", dir_stream.read(4))[0]
1324   - REFERENCEREGISTERED_Libid = dir_stream.read(REFERENCEREGISTERED_SizeOfLibid)
1325   - REFERENCEREGISTERED_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0]
1326   - check_value('REFERENCEREGISTERED_Reserved1', 0x0000, REFERENCEREGISTERED_Reserved1)
1327   - REFERENCEREGISTERED_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0]
1328   - check_value('REFERENCEREGISTERED_Reserved2', 0x0000, REFERENCEREGISTERED_Reserved2)
  1370 + referenceregistered_id = check
  1371 + referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0]
  1372 + referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0]
  1373 + referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid)
  1374 + referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0]
  1375 + check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1)
  1376 + referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0]
  1377 + check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2)
  1378 + unused = referenceregistered_id
  1379 + unused = referenceregistered_size
  1380 + unused = referenceregistered_libid
1329 1381 continue
1330 1382  
1331 1383 if check == 0x000E:
1332 1384 # REFERENCEPROJECT
1333   - REFERENCEPROJECT_Id = check
1334   - REFERENCEPROJECT_Size = struct.unpack("<L", dir_stream.read(4))[0]
1335   - REFERENCEPROJECT_SizeOfLibidAbsolute = struct.unpack("<L", dir_stream.read(4))[0]
1336   - REFERENCEPROJECT_LibidAbsolute = dir_stream.read(REFERENCEPROJECT_SizeOfLibidAbsolute)
1337   - REFERENCEPROJECT_SizeOfLibidRelative = struct.unpack("<L", dir_stream.read(4))[0]
1338   - REFERENCEPROJECT_LibidRelative = dir_stream.read(REFERENCEPROJECT_SizeOfLibidRelative)
1339   - REFERENCEPROJECT_MajorVersion = struct.unpack("<L", dir_stream.read(4))[0]
1340   - REFERENCEPROJECT_MinorVersion = struct.unpack("<H", dir_stream.read(2))[0]
  1385 + referenceproject_id = check
  1386 + referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0]
  1387 + referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0]
  1388 + referenceproject_libidabsolute = dir_stream.read(referenceproject_sizeof_libidabsolute)
  1389 + referenceproject_sizeof_libidrelative = struct.unpack("<L", dir_stream.read(4))[0]
  1390 + referenceproject_libidrelative = dir_stream.read(referenceproject_sizeof_libidrelative)
  1391 + referenceproject_majorversion = struct.unpack("<L", dir_stream.read(4))[0]
  1392 + referenceproject_minorversion = struct.unpack("<H", dir_stream.read(2))[0]
  1393 + unused = referenceproject_id
  1394 + unused = referenceproject_size
  1395 + unused = referenceproject_libidabsolute
  1396 + unused = referenceproject_libidrelative
  1397 + unused = referenceproject_majorversion
  1398 + unused = referenceproject_minorversion
1341 1399 continue
1342 1400  
1343 1401 log.error('invalid or unknown check Id {0:04X}'.format(check))
1344 1402 sys.exit(0)
1345 1403  
1346   - PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0]
1347   - check_value('PROJECTMODULES_Id', 0x000F, PROJECTMODULES_Id)
1348   - PROJECTMODULES_Size = struct.unpack("<L", dir_stream.read(4))[0]
1349   - check_value('PROJECTMODULES_Size', 0x0002, PROJECTMODULES_Size)
1350   - PROJECTMODULES_Count = struct.unpack("<H", dir_stream.read(2))[0]
1351   - PROJECTMODULES_ProjectCookieRecord_Id = struct.unpack("<H", dir_stream.read(2))[0]
1352   - check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, PROJECTMODULES_ProjectCookieRecord_Id)
1353   - PROJECTMODULES_ProjectCookieRecord_Size = struct.unpack("<L", dir_stream.read(4))[0]
1354   - check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, PROJECTMODULES_ProjectCookieRecord_Size)
1355   - PROJECTMODULES_ProjectCookieRecord_Cookie = struct.unpack("<H", dir_stream.read(2))[0]
1356   -
1357   - log.debug("parsing {0} modules".format(PROJECTMODULES_Count))
1358   - for x in xrange(0, PROJECTMODULES_Count):
1359   - MODULENAME_Id = struct.unpack("<H", dir_stream.read(2))[0]
1360   - check_value('MODULENAME_Id', 0x0019, MODULENAME_Id)
1361   - MODULENAME_SizeOfModuleName = struct.unpack("<L", dir_stream.read(4))[0]
1362   - MODULENAME_ModuleName = dir_stream.read(MODULENAME_SizeOfModuleName)
  1404 + projectmodules_id = check #struct.unpack("<H", dir_stream.read(2))[0]
  1405 + check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id)
  1406 + projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0]
  1407 + check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size)
  1408 + projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0]
  1409 + projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0]
  1410 + check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id)
  1411 + projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0]
  1412 + check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, projectmodules_projectcookierecord_size)
  1413 + projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0]
  1414 + unused = projectmodules_projectcookierecord_cookie
  1415 +
  1416 + log.debug("parsing {0} modules".format(projectmodules_count))
  1417 + for _ in xrange(0, projectmodules_count):
  1418 + modulename_id = struct.unpack("<H", dir_stream.read(2))[0]
  1419 + check_value('MODULENAME_Id', 0x0019, modulename_id)
  1420 + modulename_sizeof_modulename = struct.unpack("<L", dir_stream.read(4))[0]
  1421 + modulename_modulename = dir_stream.read(modulename_sizeof_modulename)
1363 1422 # account for optional sections
1364 1423 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1365 1424 if section_id == 0x0047:
1366   - MODULENAMEUNICODE_Id = section_id
1367   - MODULENAMEUNICODE_SizeOfModuleNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]
1368   - MODULENAMEUNICODE_ModuleNameUnicode = dir_stream.read(MODULENAMEUNICODE_SizeOfModuleNameUnicode)
  1425 + modulename_unicode_id = section_id
  1426 + modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1427 + modulename_unicode_modulename_unicode = dir_stream.read(modulename_unicode_sizeof_modulename_unicode)
  1428 + unused = modulename_unicode_id
  1429 + unused = modulename_unicode_modulename_unicode
1369 1430 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1370 1431 if section_id == 0x001A:
1371   - MODULESTREAMNAME_id = section_id
1372   - MODULESTREAMNAME_SizeOfStreamName = struct.unpack("<L", dir_stream.read(4))[0]
1373   - MODULESTREAMNAME_StreamName = dir_stream.read(MODULESTREAMNAME_SizeOfStreamName)
1374   - MODULESTREAMNAME_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
1375   - check_value('MODULESTREAMNAME_Reserved', 0x0032, MODULESTREAMNAME_Reserved)
1376   - MODULESTREAMNAME_SizeOfStreamNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]
1377   - MODULESTREAMNAME_StreamNameUnicode = dir_stream.read(MODULESTREAMNAME_SizeOfStreamNameUnicode)
  1432 + modulestreamname_id = section_id
  1433 + modulestreamname_sizeof_streamname = struct.unpack("<L", dir_stream.read(4))[0]
  1434 + modulestreamname_streamname = dir_stream.read(modulestreamname_sizeof_streamname)
  1435 + modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1436 + check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved)
  1437 + modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1438 + modulestreamname_streamname_unicode = dir_stream.read(modulestreamname_sizeof_streamname_unicode)
  1439 + unused = modulestreamname_id
1378 1440 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1379 1441 if section_id == 0x001C:
1380   - MODULEDOCSTRING_Id = section_id
1381   - check_value('MODULEDOCSTRING_Id', 0x001C, MODULEDOCSTRING_Id)
1382   - MODULEDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0]
1383   - MODULEDOCSTRING_DocString = dir_stream.read(MODULEDOCSTRING_SizeOfDocString)
1384   - MODULEDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
1385   - check_value('MODULEDOCSTRING_Reserved', 0x0048, MODULEDOCSTRING_Reserved)
1386   - MODULEDOCSTRING_SizeOfDocStringUnicode = struct.unpack("<L", dir_stream.read(4))[0]
1387   - MODULEDOCSTRING_DocStringUnicode = dir_stream.read(MODULEDOCSTRING_SizeOfDocStringUnicode)
  1442 + moduledocstring_id = section_id
  1443 + check_value('MODULEDOCSTRING_Id', 0x001C, moduledocstring_id)
  1444 + moduledocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0]
  1445 + moduledocstring_docstring = dir_stream.read(moduledocstring_sizeof_docstring)
  1446 + moduledocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0]
  1447 + check_value('MODULEDOCSTRING_Reserved', 0x0048, moduledocstring_reserved)
  1448 + moduledocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0]
  1449 + moduledocstring_docstring_unicode = dir_stream.read(moduledocstring_sizeof_docstring_unicode)
  1450 + unused = moduledocstring_docstring
  1451 + unused = moduledocstring_docstring_unicode
1388 1452 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1389 1453 if section_id == 0x0031:
1390   - MODULEOFFSET_Id = section_id
1391   - check_value('MODULEOFFSET_Id', 0x0031, MODULEOFFSET_Id)
1392   - MODULEOFFSET_Size = struct.unpack("<L", dir_stream.read(4))[0]
1393   - check_value('MODULEOFFSET_Size', 0x0004, MODULEOFFSET_Size)
1394   - MODULEOFFSET_TextOffset = struct.unpack("<L", dir_stream.read(4))[0]
  1454 + moduleoffset_id = section_id
  1455 + check_value('MODULEOFFSET_Id', 0x0031, moduleoffset_id)
  1456 + moduleoffset_size = struct.unpack("<L", dir_stream.read(4))[0]
  1457 + check_value('MODULEOFFSET_Size', 0x0004, moduleoffset_size)
  1458 + moduleoffset_textoffset = struct.unpack("<L", dir_stream.read(4))[0]
1395 1459 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1396 1460 if section_id == 0x001E:
1397   - MODULEHELPCONTEXT_Id = section_id
1398   - check_value('MODULEHELPCONTEXT_Id', 0x001E, MODULEHELPCONTEXT_Id)
1399   - MODULEHELPCONTEXT_Size = struct.unpack("<L", dir_stream.read(4))[0]
1400   - check_value('MODULEHELPCONTEXT_Size', 0x0004, MODULEHELPCONTEXT_Size)
1401   - MODULEHELPCONTEXT_HelpContext = struct.unpack("<L", dir_stream.read(4))[0]
  1461 + modulehelpcontext_id = section_id
  1462 + check_value('MODULEHELPCONTEXT_Id', 0x001E, modulehelpcontext_id)
  1463 + modulehelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0]
  1464 + check_value('MODULEHELPCONTEXT_Size', 0x0004, modulehelpcontext_size)
  1465 + modulehelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0]
  1466 + unused = modulehelpcontext_helpcontext
1402 1467 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1403 1468 if section_id == 0x002C:
1404   - MODULECOOKIE_Id = section_id
1405   - check_value('MODULECOOKIE_Id', 0x002C, MODULECOOKIE_Id)
1406   - MODULECOOKIE_Size = struct.unpack("<L", dir_stream.read(4))[0]
1407   - check_value('MODULECOOKIE_Size', 0x0002, MODULECOOKIE_Size)
1408   - MODULECOOKIE_Cookie = struct.unpack("<H", dir_stream.read(2))[0]
  1469 + modulecookie_id = section_id
  1470 + check_value('MODULECOOKIE_Id', 0x002C, modulecookie_id)
  1471 + modulecookie_size = struct.unpack("<L", dir_stream.read(4))[0]
  1472 + check_value('MODULECOOKIE_Size', 0x0002, modulecookie_size)
  1473 + modulecookie_cookie = struct.unpack("<H", dir_stream.read(2))[0]
  1474 + unused = modulecookie_cookie
1409 1475 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1410 1476 if section_id == 0x0021 or section_id == 0x0022:
1411   - MODULETYPE_Id = section_id
1412   - MODULETYPE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1477 + moduletype_id = section_id
  1478 + moduletype_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1479 + unused = moduletype_id
  1480 + unused = moduletype_reserved
1413 1481 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1414 1482 if section_id == 0x0025:
1415   - MODULEREADONLY_Id = section_id
1416   - check_value('MODULEREADONLY_Id', 0x0025, MODULEREADONLY_Id)
1417   - MODULEREADONLY_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
1418   - check_value('MODULEREADONLY_Reserved', 0x0000, MODULEREADONLY_Reserved)
  1483 + modulereadonly_id = section_id
  1484 + check_value('MODULEREADONLY_Id', 0x0025, modulereadonly_id)
  1485 + modulereadonly_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1486 + check_value('MODULEREADONLY_Reserved', 0x0000, modulereadonly_reserved)
1419 1487 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1420 1488 if section_id == 0x0028:
1421   - MODULEPRIVATE_Id = section_id
1422   - check_value('MODULEPRIVATE_Id', 0x0028, MODULEPRIVATE_Id)
1423   - MODULEPRIVATE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
1424   - check_value('MODULEPRIVATE_Reserved', 0x0000, MODULEPRIVATE_Reserved)
  1489 + moduleprivate_id = section_id
  1490 + check_value('MODULEPRIVATE_Id', 0x0028, moduleprivate_id)
  1491 + moduleprivate_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1492 + check_value('MODULEPRIVATE_Reserved', 0x0000, moduleprivate_reserved)
1425 1493 section_id = struct.unpack("<H", dir_stream.read(2))[0]
1426 1494 if section_id == 0x002B: # TERMINATOR
1427   - MODULE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
1428   - check_value('MODULE_Reserved', 0x0000, MODULE_Reserved)
  1495 + module_reserved = struct.unpack("<L", dir_stream.read(4))[0]
  1496 + check_value('MODULE_Reserved', 0x0000, module_reserved)
1429 1497 section_id = None
1430 1498 if section_id != None:
1431 1499 log.warning('unknown or invalid module section id {0:04X}'.format(section_id))
1432 1500  
1433   - log.debug('Project CodePage = %d' % PROJECTCODEPAGE_CodePage)
1434   - vba_codec = 'cp%d' % PROJECTCODEPAGE_CodePage
1435   - log.debug("ModuleName = {0}".format(MODULENAME_ModuleName))
1436   - log.debug("StreamName = {0}".format(repr(MODULESTREAMNAME_StreamName)))
1437   - streamname_unicode = MODULESTREAMNAME_StreamName.decode(vba_codec)
  1501 + log.debug('Project CodePage = %d' % projectcodepage_codepage)
  1502 + vba_codec = 'cp%d' % projectcodepage_codepage
  1503 + log.debug("ModuleName = {0}".format(modulename_modulename))
  1504 + log.debug("StreamName = {0}".format(repr(modulestreamname_streamname)))
  1505 + streamname_unicode = modulestreamname_streamname.decode(vba_codec)
1438 1506 log.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode)))
1439   - log.debug("StreamNameUnicode = {0}".format(repr(MODULESTREAMNAME_StreamNameUnicode)))
1440   - log.debug("TextOffset = {0}".format(MODULEOFFSET_TextOffset))
  1507 + log.debug("StreamNameUnicode = {0}".format(repr(modulestreamname_streamname_unicode)))
  1508 + log.debug("TextOffset = {0}".format(moduleoffset_textoffset))
1441 1509  
1442 1510 code_path = vba_root + u'VBA/' + streamname_unicode
1443 1511 #TODO: test if stream exists
1444 1512 log.debug('opening VBA code stream %s' % repr(code_path))
1445 1513 code_data = ole.openstream(code_path).read()
1446 1514 log.debug("length of code_data = {0}".format(len(code_data)))
1447   - log.debug("offset of code_data = {0}".format(MODULEOFFSET_TextOffset))
1448   - code_data = code_data[MODULEOFFSET_TextOffset:]
  1515 + log.debug("offset of code_data = {0}".format(moduleoffset_textoffset))
  1516 + code_data = code_data[moduleoffset_textoffset:]
1449 1517 if len(code_data) > 0:
1450 1518 code_data = decompress_stream(code_data)
1451 1519 # case-insensitive search in the code_modules dict to find the file extension:
1452   - filext = code_modules.get(MODULENAME_ModuleName.lower(), 'bin')
1453   - filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext)
  1520 + filext = code_modules.get(modulename_modulename.lower(), 'bin')
  1521 + filename = '{0}.{1}'.format(modulename_modulename, filext)
1454 1522 #TODO: also yield the codepage so that callers can decode it properly
1455 1523 yield (code_path, filename, code_data)
1456 1524 # print '-'*79
... ... @@ -1460,7 +1528,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path):
1460 1528 # print ''
1461 1529 log.debug('extracted file {0}'.format(filename))
1462 1530 else:
1463   - log.warning("module stream {0} has code data length 0".format(MODULESTREAMNAME_StreamName))
  1531 + log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname))
  1532 + _ = unused
1464 1533 return
1465 1534  
1466 1535  
... ... @@ -1616,12 +1685,9 @@ def detect_base64_strings(vba_code):
1616 1685 decoded = base64.b64decode(value)
1617 1686 results.append((value, decoded))
1618 1687 found.add(value)
1619   - except KeyboardInterrupt:
1620   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
1621   - raise
1622   - except:
  1688 + except (TypeError, ValueError) as exc:
  1689 + log.debug('Failed to base64-decode (%s)' % exc)
1623 1690 # if an exception occurs, it is likely not a base64-encoded string
1624   - pass
1625 1691 return results
1626 1692  
1627 1693  
... ... @@ -1646,12 +1712,9 @@ def detect_dridex_strings(vba_code):
1646 1712 decoded = DridexUrlDecode(value)
1647 1713 results.append((value, decoded))
1648 1714 found.add(value)
1649   - except KeyboardInterrupt:
1650   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
1651   - raise
1652   - except:
  1715 + except Exception as exc:
  1716 + log.debug('Failed to Dridex-decode (%s)' % exc)
1653 1717 # if an exception occurs, it is likely not a dridex-encoded string
1654   - pass
1655 1718 return results
1656 1719  
1657 1720  
... ... @@ -1701,16 +1764,17 @@ def json2ascii(json_obj, encoding=&#39;utf8&#39;, errors=&#39;replace&#39;):
1701 1764 elif isinstance(json_obj, (bool, int, float)):
1702 1765 pass
1703 1766 elif isinstance(json_obj, str):
  1767 + # de-code and re-encode
1704 1768 dencoded = json_obj.decode(encoding, errors).encode(encoding, errors)
1705   - if dencoded != str:
1706   - logging.info('json2ascii: replaced: {0} (len {1})'
1707   - .format(json_obj, len(json_obj)))
1708   - logging.info('json2ascii: with: {0} (len {1})'
1709   - .format(dencoded, len(dencoded)))
  1769 + if dencoded != json_obj:
  1770 + log.info('json2ascii: replaced: {0} (len {1})'
  1771 + .format(json_obj, len(json_obj)))
  1772 + log.info('json2ascii: with: {0} (len {1})'
  1773 + .format(dencoded, len(dencoded)))
1710 1774 return dencoded
1711 1775 elif isinstance(json_obj, unicode):
1712   - logging.info('json2ascii: replaced: {0}'
1713   - .format(json_obj.encode(encoding, errors)))
  1776 + log.info('json2ascii: replaced: {0}'
  1777 + .format(json_obj.encode(encoding, errors)))
1714 1778 # cannot put original into logger
1715 1779 # print 'original: ' json_obj
1716 1780 return json_obj.encode(encoding, errors)
... ... @@ -1721,11 +1785,50 @@ def json2ascii(json_obj, encoding=&#39;utf8&#39;, errors=&#39;replace&#39;):
1721 1785 for item in json_obj:
1722 1786 item = json2ascii(item)
1723 1787 else:
1724   - logging.debug('unexpected type in json2ascii: {0} -- leave as is'
1725   - .format(type(json_obj)))
  1788 + log.debug('unexpected type in json2ascii: {0} -- leave as is'
  1789 + .format(type(json_obj)))
1726 1790 return json_obj
1727 1791  
1728 1792  
  1793 +_have_printed_json_start = False
  1794 +
  1795 +def print_json(json_dict=None, _json_is_last=False, **json_parts):
  1796 + """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1
  1797 +
  1798 + can use in two ways:
  1799 + (1) print_json(some_dict)
  1800 + (2) print_json(key1=value1, key2=value2, ...)
  1801 +
  1802 + :param bool _json_is_last: set to True only for very last entry to complete
  1803 + the top-level json-list
  1804 + """
  1805 + global _have_printed_json_start
  1806 +
  1807 + if json_dict and json_parts:
  1808 + raise ValueError('Invalid json argument: want either single dict or '
  1809 + 'key=value parts but got both)')
  1810 + elif (json_dict is not None) and (not isinstance(json_dict, dict)):
  1811 + raise ValueError('Invalid json argument: want either single dict or '
  1812 + 'key=value parts but got {} instead of dict)'
  1813 + .format(type(json_dict)))
  1814 + if json_parts:
  1815 + json_dict = json_parts
  1816 +
  1817 + if not _have_printed_json_start:
  1818 + print '['
  1819 + _have_printed_json_start = True
  1820 +
  1821 + lines = json.dumps(json2ascii(json_dict), check_circular=False,
  1822 + indent=4, ensure_ascii=False).splitlines()
  1823 + for line in lines[:-1]:
  1824 + print ' {}'.format(line)
  1825 + if _json_is_last:
  1826 + print ' {}'.format(lines[-1]) # print last line without comma
  1827 + print ']'
  1828 + else:
  1829 + print ' {},'.format(lines[-1]) # print last line with comma
  1830 +
  1831 +
1729 1832 class VBA_Scanner(object):
1730 1833 """
1731 1834 Class to scan the source code of a VBA module to find obfuscated strings,
... ... @@ -1924,6 +2027,8 @@ class VBA_Parser(object):
1924 2027  
1925 2028 :param container: str, path and filename of container if the file is within
1926 2029 a zip archive, None otherwise.
  2030 +
  2031 + raises a FileOpenError if all attemps to interpret the data header failed
1927 2032 """
1928 2033 #TODO: filename should only be a string, data should be used for the file-like object
1929 2034 #TODO: filename should be mandatory, optional data is a string or file-like object
... ... @@ -2000,8 +2105,8 @@ class VBA_Parser(object):
2000 2105 if self.type is None:
2001 2106 # At this stage, could not match a known format:
2002 2107 msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename
2003   - log.error(msg)
2004   - raise TypeError(msg)
  2108 + log.info(msg)
  2109 + raise FileOpenError(msg)
2005 2110  
2006 2111 def open_ole(self, _file):
2007 2112 """
... ... @@ -2016,13 +2121,10 @@ class VBA_Parser(object):
2016 2121 # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet
2017 2122 # set type only if parsing succeeds
2018 2123 self.type = TYPE_OLE
2019   - except KeyboardInterrupt:
2020   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2021   - raise
2022   - except:
  2124 + except (IOError, TypeError, ValueError) as exc:
2023 2125 # TODO: handle OLE parsing exceptions
2024   - log.exception('Failed OLE parsing for file %r' % self.filename)
2025   - pass
  2126 + log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc))
  2127 + log.debug('Trace:', exc_info=True)
2026 2128  
2027 2129  
2028 2130 def open_openxml(self, _file):
... ... @@ -2048,22 +2150,17 @@ class VBA_Parser(object):
2048 2150 ole_data = z.open(subfile).read()
2049 2151 try:
2050 2152 self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data))
2051   - except KeyboardInterrupt:
2052   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2053   - raise
2054   - except:
2055   - log.debug('%s is not a valid OLE file' % subfile)
  2153 + except FileOpenError as exc:
  2154 + log.info('%s is not a valid OLE file (%s)' % (subfile, exc))
2056 2155 continue
2057 2156 z.close()
2058 2157 # set type only if parsing succeeds
2059 2158 self.type = TYPE_OpenXML
2060   - except KeyboardInterrupt:
2061   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2062   - raise
2063   - except:
  2159 + except (RuntimeError, zipfile.BadZipfile, zipfile.LargeZipFile, IOError) as exc:
2064 2160 # TODO: handle parsing exceptions
2065   - log.exception('Failed Zip/OpenXML parsing for file %r' % self.filename)
2066   - pass
  2161 + log.info('Failed Zip/OpenXML parsing for file %r (%s)'
  2162 + % (self.filename, exc))
  2163 + log.debug('Trace:', exc_info=True)
2067 2164  
2068 2165 def open_word2003xml(self, data):
2069 2166 """
... ... @@ -2087,25 +2184,25 @@ class VBA_Parser(object):
2087 2184 if is_mso_file(mso_data):
2088 2185 # decompress the zlib data stored in the MSO file, which is the OLE container:
2089 2186 # TODO: handle different offsets => separate function
2090   - ole_data = mso_file_extract(mso_data)
2091 2187 try:
  2188 + ole_data = mso_file_extract(mso_data)
2092 2189 self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
2093   - except KeyboardInterrupt:
2094   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2095   - raise
2096   - except:
2097   - log.error('%s does not contain a valid OLE file' % fname)
  2190 + except MsoExtractionError:
  2191 + log.info('Failed decompressing an MSO container in %r - %s'
  2192 + % (fname, MSG_OLEVBA_ISSUES))
  2193 + log.debug('Trace:', exc_info=True)
  2194 + except FileOpenError as exc:
  2195 + log.debug('%s is not a valid OLE sub file (%s)' % (fname, exc))
2098 2196 else:
2099   - log.error('%s is not a valid MSO file' % fname)
  2197 + log.info('%s is not a valid MSO file' % fname)
2100 2198 # set type only if parsing succeeds
2101 2199 self.type = TYPE_Word2003_XML
2102   - except KeyboardInterrupt:
2103   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2104   - raise
2105   - except:
  2200 + except Exception as exc:
2106 2201 # TODO: differentiate exceptions for each parsing stage
2107   - log.exception('Failed XML parsing for file %r' % self.filename)
2108   - pass
  2202 + # (but ET is different libs, no good exception description in API)
  2203 + # found: XMLSyntaxError
  2204 + log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc))
  2205 + log.debug('Trace:', exc_info=True)
2109 2206  
2110 2207 def open_mht(self, data):
2111 2208 """
... ... @@ -2148,40 +2245,30 @@ class VBA_Parser(object):
2148 2245 log.debug('Found ActiveMime header, decompressing MSO container')
2149 2246 try:
2150 2247 ole_data = mso_file_extract(part_data)
2151   - try:
2152   - # TODO: check if it is actually an OLE file
2153   - # TODO: get the MSO filename from content_location?
2154   - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
2155   - except KeyboardInterrupt:
2156   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2157   - raise
2158   - except:
2159   - log.debug('%s does not contain a valid OLE file' % fname)
2160   - except KeyboardInterrupt:
2161   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2162   - raise
2163   - except:
2164   - log.exception('Failed decompressing an MSO container in %r - %s'
  2248 +
  2249 + # TODO: check if it is actually an OLE file
  2250 + # TODO: get the MSO filename from content_location?
  2251 + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data))
  2252 + except MsoExtractionError:
  2253 + log.info('Failed decompressing an MSO container in %r - %s'
2165 2254 % (fname, MSG_OLEVBA_ISSUES))
  2255 + log.debug('Trace:', exc_info=True)
2166 2256 # TODO: bug here - need to split in smaller functions/classes?
  2257 + except FileOpenError as exc:
  2258 + log.debug('%s does not contain a valid OLE file (%s)'
  2259 + % (fname, exc))
2167 2260 else:
  2261 + log.debug('type(part_data) = %s' % type(part_data))
2168 2262 try:
2169   - log.debug('type(part_data) = %s' % type(part_data))
2170 2263 log.debug('part_data[0:20] = %r' % part_data[0:20])
2171   - except KeyboardInterrupt:
2172   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2173   - raise
2174   - except:
2175   - pass
  2264 + except TypeError as err:
  2265 + log.debug('part_data has no __getitem__')
2176 2266 # set type only if parsing succeeds
2177 2267 self.type = TYPE_MHTML
2178   - except KeyboardInterrupt:
2179   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2180   - raise
2181   - except:
2182   - log.exception('Failed MIME parsing for file %r - %s'
2183   - % (self.filename, MSG_OLEVBA_ISSUES))
2184   - pass
  2268 + except Exception:
  2269 + log.info('Failed MIME parsing for file %r - %s'
  2270 + % (self.filename, MSG_OLEVBA_ISSUES))
  2271 + log.debug('Trace:', exc_info=True)
2185 2272  
2186 2273  
2187 2274 def open_text(self, data):
... ... @@ -2191,19 +2278,11 @@ class VBA_Parser(object):
2191 2278 :return: nothing
2192 2279 """
2193 2280 log.info('Opening text file %s' % self.filename)
2194   - try:
2195   - # directly store the source code:
2196   - self.vba_code_all_modules = data
2197   - self.contains_macros = True
2198   - # set type only if parsing succeeds
2199   - self.type = TYPE_TEXT
2200   - except KeyboardInterrupt:
2201   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2202   - raise
2203   - except:
2204   - log.exception('Failed text parsing for file %r - %s'
2205   - % (self.filename, MSG_OLEVBA_ISSUES))
2206   - pass
  2281 + # directly store the source code:
  2282 + self.vba_code_all_modules = data
  2283 + self.contains_macros = True
  2284 + # set type only if parsing succeeds
  2285 + self.type = TYPE_TEXT
2207 2286  
2208 2287  
2209 2288 def find_vba_projects(self):
... ... @@ -2247,6 +2326,15 @@ class VBA_Parser(object):
2247 2326 # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream
2248 2327 # - all names are case-insensitive
2249 2328  
  2329 + def check_vba_stream(ole, vba_root, stream_path):
  2330 + full_path = vba_root + stream_path
  2331 + if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM:
  2332 + log.debug('Found %s stream: %s' % (stream_path, full_path))
  2333 + return full_path
  2334 + else:
  2335 + log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path)
  2336 + return False
  2337 +
2250 2338 # start with an empty list:
2251 2339 self.vba_projects = []
2252 2340 # Look for any storage containing those storage/streams:
... ... @@ -2263,15 +2351,6 @@ class VBA_Parser(object):
2263 2351 vba_root += '/'
2264 2352 log.debug('Checking vba_root="%s"' % vba_root)
2265 2353  
2266   - def check_vba_stream(ole, vba_root, stream_path):
2267   - full_path = vba_root + stream_path
2268   - if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM:
2269   - log.debug('Found %s stream: %s' % (stream_path, full_path))
2270   - return full_path
2271   - else:
2272   - log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path)
2273   - return False
2274   -
2275 2354 # Check if the VBA root storage also contains a PROJECT stream:
2276 2355 project_path = check_vba_stream(ole, vba_root, 'PROJECT')
2277 2356 if not project_path: continue
... ... @@ -2436,10 +2515,10 @@ class VBA_Parser(object):
2436 2515 # variable to merge source code from all modules:
2437 2516 if self.vba_code_all_modules is None:
2438 2517 self.vba_code_all_modules = ''
2439   - for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros():
  2518 + for (_, _, _, vba_code) in self.extract_all_macros():
2440 2519 #TODO: filter code? (each module)
2441 2520 self.vba_code_all_modules += vba_code + '\n'
2442   - for (subfilename, form_path, form_string) in self.extract_form_strings():
  2521 + for (_, _, form_string) in self.extract_form_strings():
2443 2522 self.vba_code_all_modules += form_string + '\n'
2444 2523 # Analyze the whole code at once:
2445 2524 scanner = VBA_Scanner(self.vba_code_all_modules)
... ... @@ -2587,8 +2666,7 @@ class VBA_Parser_CLI(VBA_Parser):
2587 2666 def __init__(self, filename, data=None, container=None):
2588 2667 """
2589 2668 Constructor for VBA_Parser_CLI.
2590   - Calls __init__ from VBA_Parser, but handles the TypeError exception
2591   - when the file type is not supported.
  2669 + Calls __init__ from VBA_Parser
2592 2670  
2593 2671 :param filename: filename or path of file to parse, or file-like object
2594 2672  
... ... @@ -2599,11 +2677,7 @@ class VBA_Parser_CLI(VBA_Parser):
2599 2677 :param container: str, path and filename of container if the file is within
2600 2678 a zip archive, None otherwise.
2601 2679 """
2602   - try:
2603   - VBA_Parser.__init__(self, filename, data=data, container=container)
2604   - except TypeError:
2605   - # in that case, self.type=None
2606   - pass
  2680 + super(VBA_Parser_CLI, self).__init__(filename, data=data, container=container)
2607 2681  
2608 2682  
2609 2683 def print_analysis(self, show_decoded_strings=False, deobfuscate=False):
... ... @@ -2653,7 +2727,7 @@ class VBA_Parser_CLI(VBA_Parser):
2653 2727 for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)]
2654 2728  
2655 2729 def process_file(self, show_decoded_strings=False,
2656   - display_code=True, global_analysis=True, hide_attributes=True,
  2730 + display_code=True, hide_attributes=True,
2657 2731 vba_code_only=False, show_deobfuscated_code=False,
2658 2732 deobfuscate=False):
2659 2733 """
... ... @@ -2699,19 +2773,12 @@ class VBA_Parser_CLI(VBA_Parser):
2699 2773 print '(empty macro)'
2700 2774 else:
2701 2775 print vba_code_filtered
2702   - if not global_analysis and not vba_code_only:
2703   - #TODO: remove this option
2704   - raise NotImplementedError
2705   - print '- ' * 39
2706   - print 'ANALYSIS:'
2707   - # analyse each module's code, filtered to avoid false positives:
2708   - self.print_analysis(show_decoded_strings, deobfuscate)
2709 2776 for (subfilename, stream_path, form_string) in self.extract_form_strings():
2710 2777 print '-' * 79
2711 2778 print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path)
2712 2779 print '- ' * 39
2713 2780 print form_string
2714   - if global_analysis and not vba_code_only:
  2781 + if not vba_code_only:
2715 2782 # analyse the code from all modules at once:
2716 2783 self.print_analysis(show_decoded_strings, deobfuscate)
2717 2784 if show_deobfuscated_code:
... ... @@ -2719,20 +2786,16 @@ class VBA_Parser_CLI(VBA_Parser):
2719 2786 print self.reveal()
2720 2787 else:
2721 2788 print 'No VBA macros found.'
2722   - except KeyboardInterrupt:
2723   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2724   - raise
2725   - except: #TypeError:
2726   - #raise
2727   - #TODO: print more info if debug mode
2728   - #print sys.exc_value
2729   - # display the exception with full stack trace for debugging, but do not stop:
2730   - traceback.print_exc()
  2789 + except Exception as exc:
  2790 + # display the exception with full stack trace for debugging
  2791 + log.info('Error processing file %s (%s)' % (self.filename, exc))
  2792 + log.debug('Traceback:', exc_info=True)
  2793 + raise ProcessingError(self.filename, exc)
2731 2794 print ''
2732 2795  
2733 2796  
2734 2797 def process_file_json(self, show_decoded_strings=False,
2735   - display_code=True, global_analysis=True, hide_attributes=True,
  2798 + display_code=True, hide_attributes=True,
2736 2799 vba_code_only=False, show_deobfuscated_code=False):
2737 2800 """
2738 2801 Process a single file
... ... @@ -2781,27 +2844,19 @@ class VBA_Parser_CLI(VBA_Parser):
2781 2844 curr_macro['ole_stream'] = stream_path
2782 2845 if display_code:
2783 2846 curr_macro['code'] = vba_code_filtered.strip()
2784   - if not global_analysis and not vba_code_only:
2785   - # analyse each module's code, filtered to avoid false positives:
2786   - #TODO: remove this option
2787   - curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings)
2788 2847 macros.append(curr_macro)
2789   - if global_analysis and not vba_code_only:
  2848 + if not vba_code_only:
2790 2849 # analyse the code from all modules at once:
2791 2850 result['analysis'] = self.print_analysis_json(show_decoded_strings)
2792 2851 if show_deobfuscated_code:
2793 2852 result['code_deobfuscated'] = self.reveal()
2794 2853 result['macros'] = macros
2795 2854 result['json_conversion_successful'] = True
2796   - except KeyboardInterrupt:
2797   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2798   - raise
2799   - except: #TypeError:
2800   - #raise
2801   - #TODO: print more info if debug mode
2802   - #print sys.exc_value
2803   - # display the exception with full stack trace for debugging, but do not stop:
2804   - traceback.print_exc()
  2855 + except Exception as exc:
  2856 + # display the exception with full stack trace for debugging
  2857 + log.info('Error processing file %s (%s)' % (self.filename, exc))
  2858 + log.debug('Traceback:', exc_info=True)
  2859 + raise ProcessingError(self.filename, exc)
2805 2860  
2806 2861 return result
2807 2862  
... ... @@ -2811,57 +2866,46 @@ class VBA_Parser_CLI(VBA_Parser):
2811 2866 Process a file in triage mode, showing only summary results on one line.
2812 2867 """
2813 2868 #TODO: replace print by writing to a provided output file (sys.stdout by default)
2814   - message = ''
2815 2869 try:
2816   - if self.type is not None:
2817   - #TODO: handle olefile errors, when an OLE file is malformed
2818   - if self.detect_vba_macros():
2819   - # print a waiting message only if the output is not redirected to a file:
2820   - if sys.stdout.isatty():
2821   - print 'Analysis...\r',
2822   - sys.stdout.flush()
2823   - self.analyze_macros(show_decoded_strings=show_decoded_strings,
2824   - deobfuscate=deobfuscate)
2825   - flags = TYPE2TAG[self.type]
2826   - macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
2827   - if self.contains_macros: macros = 'M'
2828   - if self.nb_autoexec: autoexec = 'A'
2829   - if self.nb_suspicious: suspicious = 'S'
2830   - if self.nb_iocs: iocs = 'I'
2831   - if self.nb_hexstrings: hexstrings = 'H'
2832   - if self.nb_base64strings: base64obf = 'B'
2833   - if self.nb_dridexstrings: dridex = 'D'
2834   - if self.nb_vbastrings: vba_obf = 'V'
2835   - flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
2836   - base64obf, dridex, vba_obf)
2837   - # old table display:
2838   - # macros = autoexec = suspicious = iocs = hexstrings = 'no'
2839   - # if nb_macros: macros = 'YES:%d' % nb_macros
2840   - # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
2841   - # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
2842   - # if nb_iocs: iocs = 'YES:%d' % nb_iocs
2843   - # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
2844   - # # 2nd line = info
2845   - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings)
2846   - else:
2847   - # self.type==None
2848   - # file type not OLE nor OpenXML
2849   - flags = '?'
2850   - message = 'File format not supported'
2851   - except KeyboardInterrupt:
2852   - # do not ignore exceptions when the user presses Ctrl+C/Pause:
2853   - raise
2854   - except:
2855   - # another error occurred
2856   - #raise
2857   - #TODO: print more info if debug mode
2858   - #TODO: distinguish real errors from incorrect file types
2859   - flags = '!ERROR'
2860   - message = sys.exc_value
2861   - line = '%-12s %s' % (flags, self.filename)
2862   - if message:
2863   - line += ' - %s' % message
2864   - print line
  2870 + #TODO: handle olefile errors, when an OLE file is malformed
  2871 + if self.detect_vba_macros():
  2872 + # print a waiting message only if the output is not redirected to a file:
  2873 + if sys.stdout.isatty():
  2874 + print 'Analysis...\r',
  2875 + sys.stdout.flush()
  2876 + self.analyze_macros(show_decoded_strings=show_decoded_strings,
  2877 + deobfuscate=deobfuscate)
  2878 + flags = TYPE2TAG[self.type]
  2879 + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-'
  2880 + if self.contains_macros: macros = 'M'
  2881 + if self.nb_autoexec: autoexec = 'A'
  2882 + if self.nb_suspicious: suspicious = 'S'
  2883 + if self.nb_iocs: iocs = 'I'
  2884 + if self.nb_hexstrings: hexstrings = 'H'
  2885 + if self.nb_base64strings: base64obf = 'B'
  2886 + if self.nb_dridexstrings: dridex = 'D'
  2887 + if self.nb_vbastrings: vba_obf = 'V'
  2888 + flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings,
  2889 + base64obf, dridex, vba_obf)
  2890 +
  2891 + line = '%-12s %s' % (flags, self.filename)
  2892 + print line
  2893 +
  2894 + # old table display:
  2895 + # macros = autoexec = suspicious = iocs = hexstrings = 'no'
  2896 + # if nb_macros: macros = 'YES:%d' % nb_macros
  2897 + # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
  2898 + # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
  2899 + # if nb_iocs: iocs = 'YES:%d' % nb_iocs
  2900 + # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
  2901 + # # 2nd line = info
  2902 + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings)
  2903 + except Exception as exc:
  2904 + # display the exception with full stack trace for debugging only
  2905 + log.debug('Error processing file %s (%s)' % (self.filename, exc),
  2906 + exc_info=True)
  2907 + raise ProcessingError(self.filename, exc)
  2908 +
2865 2909  
2866 2910 # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
2867 2911 # header=False, border=False)
... ... @@ -2883,7 +2927,6 @@ def main():
2883 2927 """
2884 2928 Main function, called when olevba is run from the command line
2885 2929 """
2886   - global log
2887 2930 DEFAULT_LOG_LEVEL = "warning" # Default log level
2888 2931 LOG_LEVELS = {
2889 2932 'debug': logging.DEBUG,
... ... @@ -2939,13 +2982,14 @@ def main():
2939 2982 if len(args) == 0:
2940 2983 print __doc__
2941 2984 parser.print_help()
2942   - sys.exit()
  2985 + sys.exit(RETURN_WRONG_ARGS)
2943 2986  
2944 2987 # provide info about tool and its version
2945 2988 if options.output_mode == 'json':
2946   - json_results = [dict(script_name='olevba', version=__version__,
2947   - url='http://decalage.info/python/oletools',
2948   - type='MetaInformation'), ]
  2989 + # prints opening [
  2990 + print_json(script_name='olevba', version=__version__,
  2991 + url='http://decalage.info/python/oletools',
  2992 + type='MetaInformation')
2949 2993 else:
2950 2994 print 'olevba %s - http://decalage.info/python/oletools' % __version__
2951 2995  
... ... @@ -2971,65 +3015,120 @@ def main():
2971 3015 count = 0
2972 3016 container = filename = data = None
2973 3017 vba_parser = None
2974   - for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
2975   - zip_password=options.zip_password, zip_fname=options.zip_fname):
2976   - # ignore directory names stored in zip files:
2977   - if container and filename.endswith('/'):
2978   - continue
2979   - # Open the file
2980   - vba_parser = VBA_Parser_CLI(filename, data=data, container=container)
2981   - if options.output_mode == 'detailed':
2982   - # fully detailed output
2983   - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
2984   - display_code=options.display_code, global_analysis=True, #options.global_analysis,
2985   - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
2986   - show_deobfuscated_code=options.show_deobfuscated_code,
2987   - deobfuscate=options.deobfuscate)
2988   - elif options.output_mode in ('triage', 'unspecified'):
2989   - # print container name when it changes:
2990   - if container != previous_container:
2991   - if container is not None:
2992   - print '\nFiles in %s:' % container
2993   - previous_container = container
2994   - # summarized output for triage:
2995   - vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
2996   - deobfuscate=options.deobfuscate)
2997   - elif options.output_mode == 'json':
2998   - json_results.append(
2999   - vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
3000   - display_code=options.display_code, global_analysis=True, #options.global_analysis,
3001   - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
3002   - show_deobfuscated_code=options.show_deobfuscated_code))
3003   - else: # (should be impossible)
3004   - raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
3005   - count += 1
3006   - if options.output_mode == 'triage':
3007   - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
3008   - 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
3009   - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n'
3010   -
3011   - if count == 1 and options.output_mode == 'unspecified':
3012   - # if options -t, -d and -j were not specified and it's a single file, print details:
3013   - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
3014   - display_code=options.display_code, global_analysis=True, #options.global_analysis,
3015   - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
3016   - show_deobfuscated_code=options.show_deobfuscated_code,
3017   - deobfuscate=options.deobfuscate)
3018   -
3019   - if options.output_mode == 'json':
3020   - json_options = dict(check_circular=False, indent=4, ensure_ascii=False)
3021   -
3022   - # json.dump[s] cannot deal with unicode objects that are not properly
3023   - # encoded --> encode in own function:
3024   - json_results = json2ascii(json_results)
3025   - #print_json(json_results)
3026   -
3027   - # if False: # options.outfile: # (option currently commented out)
3028   - # with open(outfile, 'w') as write_handle:
3029   - # json.dump(write_handle, **json_options)
3030   - # else:
3031   - print json.dumps(json_results, **json_options)
  3018 + return_code = RETURN_OK
  3019 + try:
  3020 + for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
  3021 + zip_password=options.zip_password, zip_fname=options.zip_fname):
  3022 + # ignore directory names stored in zip files:
  3023 + if container and filename.endswith('/'):
  3024 + continue
  3025 +
  3026 + # handle errors from xglob
  3027 + if isinstance(data, Exception):
  3028 + if isinstance(data, PathNotFoundException):
  3029 + if options.output_mode in ('triage', 'unspecified'):
  3030 + print '%-12s %s - File not found' % ('?', filename)
  3031 + elif options.output_mode != 'json':
  3032 + log.error('Given path %r does not exist!' % filename)
  3033 + return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \
  3034 + else RETURN_SEVERAL_ERRS
  3035 + else:
  3036 + if options.output_mode in ('triage', 'unspecified'):
  3037 + print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container)
  3038 + elif options.output_mode != 'json':
  3039 + log.error('Exception opening/reading %r from zip file %r: %s'
  3040 + % (filename, container, data))
  3041 + return_code = RETURN_XGLOB_ERR if return_code == 0 \
  3042 + else RETURN_SEVERAL_ERRS
  3043 + if options.output_mode == 'json':
  3044 + print_json(file=filename, type='error',
  3045 + error=type(data).__name__, message=str(data))
  3046 + continue
3032 3047  
  3048 + try:
  3049 + # Open the file
  3050 + vba_parser = VBA_Parser_CLI(filename, data=data, container=container)
  3051 +
  3052 + if options.output_mode == 'detailed':
  3053 + # fully detailed output
  3054 + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
  3055 + display_code=options.display_code,
  3056 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3057 + show_deobfuscated_code=options.show_deobfuscated_code,
  3058 + deobfuscate=options.deobfuscate)
  3059 + elif options.output_mode in ('triage', 'unspecified'):
  3060 + # print container name when it changes:
  3061 + if container != previous_container:
  3062 + if container is not None:
  3063 + print '\nFiles in %s:' % container
  3064 + previous_container = container
  3065 + # summarized output for triage:
  3066 + vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
  3067 + deobfuscate=options.deobfuscate)
  3068 + elif options.output_mode == 'json':
  3069 + print_json(
  3070 + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
  3071 + display_code=options.display_code,
  3072 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3073 + show_deobfuscated_code=options.show_deobfuscated_code))
  3074 + else: # (should be impossible)
  3075 + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
  3076 + count += 1
  3077 +
  3078 + except FileOpenError as exc:
  3079 + if options.output_mode in ('triage', 'unspecified'):
  3080 + print '%-12s %s - File format not supported' % ('?', filename)
  3081 + elif options.output_mode == 'json':
  3082 + print_json(file=filename, type='error',
  3083 + error=type(exc).__name__, message=str(exc))
  3084 + else:
  3085 + log.exception('Failed to open %s -- probably not supported!' % filename)
  3086 + return_code = RETURN_OPEN_ERROR if return_code == 0 \
  3087 + else RETURN_SEVERAL_ERRS
  3088 + except ProcessingError as exc:
  3089 + if options.output_mode in ('triage', 'unspecified'):
  3090 + print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exception)
  3091 + elif options.output_mode == 'json':
  3092 + print_json(file=filename, type='error',
  3093 + error=type(exc).__name__,
  3094 + message=str(exc.orig_exception))
  3095 + else:
  3096 + log.exception('Error processing file %s (%s)!'
  3097 + % (filename, exc.orig_exception))
  3098 + return_code = RETURN_PARSE_ERROR if return_code == 0 \
  3099 + else RETURN_SEVERAL_ERRS
  3100 + finally:
  3101 + if vba_parser is not None:
  3102 + vba_parser.close()
  3103 +
  3104 + if options.output_mode == 'triage':
  3105 + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \
  3106 + 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
  3107 + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n'
  3108 +
  3109 + if count == 1 and options.output_mode == 'unspecified':
  3110 + # if options -t, -d and -j were not specified and it's a single file, print details:
  3111 + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
  3112 + display_code=options.display_code,
  3113 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3114 + show_deobfuscated_code=options.show_deobfuscated_code,
  3115 + deobfuscate=options.deobfuscate)
  3116 +
  3117 + if options.output_mode == 'json':
  3118 + # print last json entry (a last one without a comma) and closing ]
  3119 + print_json(type='MetaInformation', return_code=return_code,
  3120 + n_processed=count, _json_is_last=True)
  3121 +
  3122 + except Exception as exc:
  3123 + # some unexpected error, maybe some of the types caught in except clauses
  3124 + # above were not sufficient. This is very bad, so log complete trace at exception level
  3125 + # and do not care about output mode
  3126 + log.exception('Unhandled exception in main: %s' % exc, exc_info=True)
  3127 + return_code = RETURN_UNEXPECTED # even if there were others before -- this is more important
  3128 +
  3129 + # done. exit
  3130 + log.debug('will exit now with code %s' % return_code)
  3131 + sys.exit(return_code)
3033 3132  
3034 3133 if __name__ == '__main__':
3035 3134 main()
... ...
oletools/thirdparty/xglob/xglob.py
... ... @@ -60,6 +60,15 @@ __version__ = &#39;0.05&#39;
60 60  
61 61 import os, fnmatch, glob, zipfile
62 62  
  63 +#=== EXCEPTIONS ==============================================================
  64 +
  65 +class PathNotFoundException(Exception):
  66 + """ raised if given a fixed file/dir (not a glob) that does not exist """
  67 + def __init__(self, path):
  68 + super(PathNotFoundException, self).__init__(
  69 + 'Given path does not exist: %r' % path)
  70 +
  71 +
63 72 #=== FUNCTIONS ===============================================================
64 73  
65 74 # recursive glob function to find files in any subfolder:
... ... @@ -118,8 +127,11 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname=&#39;*&#39;):
118 127 - then files matching zip_fname are opened from the zip archive
119 128  
120 129 Iterator: yields (container, filename, data) for each file. If zip_password is None, then
121   - only the filename is returned, container and data=None. Otherwise container si the
122   - filename of the container (zip file), and data is the file content.
  130 + only the filename is returned, container and data=None. Otherwise container is the
  131 + filename of the container (zip file), and data is the file content (or an exception).
  132 + If a given filename is not a glob and does not exist, the triplet
  133 + (None, filename, PathNotFoundException) is yielded. (Globs matching nothing
  134 + do not trigger exceptions)
123 135 """
124 136 #TODO: catch exceptions and yield them for the caller (no file found, file is not zip, wrong password, etc)
125 137 #TODO: use logging instead of printing
... ... @@ -131,6 +143,9 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname=&#39;*&#39;):
131 143 else:
132 144 iglob = glob.iglob
133 145 for filespec in files:
  146 + if not is_glob(filespec) and not os.path.exists(filespec):
  147 + yield None, filespec, PathNotFoundException(filespec)
  148 + continue
134 149 for filename in iglob(filespec):
135 150 if zip_password is not None:
136 151 # Each file is expected to be a zip archive:
... ... @@ -153,3 +168,39 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname=&#39;*&#39;):
153 168 #data = open(filename, 'rb').read()
154 169 #yield None, filename, data
155 170  
  171 +
  172 +def is_glob(filespec):
  173 + """ determine if given file specification is a single file name or a glob
  174 +
  175 + python's glob and fnmatch can only interpret ?, *, [list], and [ra-nge],
  176 + (and combinations: hex_*_[A-Fabcdef0-9]).
  177 + The special chars *?[-] can only be escaped using []
  178 + --> file_name is not a glob
  179 + --> file?name is a glob
  180 + --> file* is a glob
  181 + --> file[-._]name is a glob
  182 + --> file[?]name is not a glob (matches literal "file?name")
  183 + --> file[*]name is not a glob (matches literal "file*name")
  184 + --> file[-]name is not a glob (matches literal "file-name")
  185 + --> file-name is not a glob
  186 +
  187 + Also, obviously incorrect globs are treated as non-globs
  188 + --> file[name is not a glob (matches literal "file[name")
  189 + --> file]-[name is treated as a glob
  190 + (it is not a valid glob but detecting errors like this requires
  191 + sophisticated regular expression matching)
  192 +
  193 + Python's glob also works with globs in directory-part of path
  194 + --> dir-part of path is analyzed just like filename-part
  195 + --> thirdparty/*/xglob.py is a (valid) glob
  196 +
  197 + TODO: create a correct regexp to test for validity of ranges
  198 + """
  199 +
  200 + # remove escaped special chars
  201 + cleaned = filespec.replace('[*]', '').replace('[?]', '') \
  202 + .replace('[[]', '').replace('[]]', '').replace('[-]', '')
  203 +
  204 + # check if special chars remain
  205 + return '*' in cleaned or '?' in cleaned or \
  206 + ('[' in cleaned and ']' in cleaned)
... ...