Commit ffa04426f10dfe4cd8a805fe35a85f6121694213
olevba: many improvements and fixes by Christian Herdtweck (exit code, exception…
… handling, JSON output)
Showing
2 changed files
with
671 additions
and
521 deletions
oletools/olevba.py
| ... | ... | @@ -76,7 +76,7 @@ https://github.com/unixfreak0037/officeparser |
| 76 | 76 | # CHANGELOG: |
| 77 | 77 | # 2014-08-05 v0.01 PL: - first version based on officeparser code |
| 78 | 78 | # 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser |
| 79 | -# 2014-08-15 PL: - fixed incorrect value check in PROJECTHELPFILEPATH Record | |
| 79 | +# 2014-08-15 PL: - fixed incorrect value check in projecthelpfilepath Record | |
| 80 | 80 | # 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats |
| 81 | 81 | # and to find the VBA project root anywhere in the file |
| 82 | 82 | # 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL |
| ... | ... | @@ -169,6 +169,9 @@ https://github.com/unixfreak0037/officeparser |
| 169 | 169 | # 2016-04-19 v0.46 PL: - new option --deobf instead of --no-deobfuscate |
| 170 | 170 | # - updated suspicious keywords |
| 171 | 171 | # 2016-05-04 v0.47 PL: - look for VBA code in any stream including orphans |
| 172 | +# 2016-04-28 CH: - return an exit code depending on the results | |
| 173 | +# - improved error and exception handling | |
| 174 | +# - improved JSON output | |
| 172 | 175 | |
| 173 | 176 | __version__ = '0.47' |
| 174 | 177 | |
| ... | ... | @@ -212,10 +215,8 @@ import math |
| 212 | 215 | import zipfile |
| 213 | 216 | import re |
| 214 | 217 | import optparse |
| 215 | -import os.path | |
| 216 | 218 | import binascii |
| 217 | 219 | import base64 |
| 218 | -import traceback | |
| 219 | 220 | import zlib |
| 220 | 221 | import email # for MHTML parsing |
| 221 | 222 | import string # for printable |
| ... | ... | @@ -240,8 +241,12 @@ except ImportError: |
| 240 | 241 | |
| 241 | 242 | import thirdparty.olefile as olefile |
| 242 | 243 | from thirdparty.prettytable import prettytable |
| 243 | -from thirdparty.xglob import xglob | |
| 244 | -from thirdparty.pyparsing.pyparsing import * | |
| 244 | +from thirdparty.xglob import xglob, PathNotFoundException | |
| 245 | +from thirdparty.pyparsing.pyparsing import \ | |
| 246 | + CaselessKeyword, CaselessLiteral, Combine, Forward, Literal, \ | |
| 247 | + Optional, QuotedString,Regex, Suppress, Word, WordStart, \ | |
| 248 | + alphanums, alphas, hexnums,nums, opAssoc, srange, \ | |
| 249 | + infixNotation | |
| 245 | 250 | |
| 246 | 251 | # monkeypatch email to fix issue #32: |
| 247 | 252 | # allow header lines without ":" |
| ... | ... | @@ -291,8 +296,51 @@ def get_logger(name, level=logging.CRITICAL+1): |
| 291 | 296 | log = get_logger('olevba') |
| 292 | 297 | |
| 293 | 298 | |
| 299 | +#=== EXCEPTIONS ============================================================== | |
| 300 | + | |
| 301 | +class FileOpenError(Exception): | |
| 302 | + """ raised by VBA_Parser constructor if all open_... attempts failed | |
| 303 | + | |
| 304 | + probably means the file type is not supported | |
| 305 | + """ | |
| 306 | + | |
| 307 | + def __init__(self, filename): | |
| 308 | + super(FileOpenError, self).__init__( | |
| 309 | + 'Failed to open file %s ... probably not supported' % filename) | |
| 310 | + self.filename = filename | |
| 311 | + | |
| 312 | + | |
| 313 | +class ProcessingError(Exception): | |
| 314 | + """ raised by VBA_Parser.process_file* functions """ | |
| 315 | + | |
| 316 | + def __init__(self, filename, orig_exception): | |
| 317 | + super(ProcessingError, self).__init__( | |
| 318 | + 'Error processing file %s (%s)' % (filename, orig_exception)) | |
| 319 | + self.filename = filename | |
| 320 | + self.orig_exception = orig_exception | |
| 321 | + | |
| 322 | + | |
| 323 | +class MsoExtractionError(RuntimeError): | |
| 324 | + """ raised by mso_file_extract if parsing MSO/ActiveMIME data failed """ | |
| 325 | + | |
| 326 | + def __init__(self, msg): | |
| 327 | + super(MsoExtractionError, self).__init__(msg) | |
| 328 | + self.msg = msg | |
| 329 | + | |
| 330 | + | |
| 294 | 331 | #--- CONSTANTS ---------------------------------------------------------------- |
| 295 | 332 | |
| 333 | +# return codes | |
| 334 | +RETURN_OK = 0 | |
| 335 | +RETURN_WARNINGS = 1 # (reserved, not used yet) | |
| 336 | +RETURN_WRONG_ARGS = 2 # (fixed, built into optparse) | |
| 337 | +RETURN_FILE_NOT_FOUND = 3 | |
| 338 | +RETURN_XGLOB_ERR = 4 | |
| 339 | +RETURN_OPEN_ERROR = 5 | |
| 340 | +RETURN_PARSE_ERROR = 6 | |
| 341 | +RETURN_SEVERAL_ERRS = 7 | |
| 342 | +RETURN_UNEXPECTED = 8 | |
| 343 | + | |
| 296 | 344 | # URL and message to report issues: |
| 297 | 345 | URL_OLEVBA_ISSUES = 'https://bitbucket.org/decalage/oletools/issues' |
| 298 | 346 | MSG_OLEVBA_ISSUES = 'Please report this issue on %s' % URL_OLEVBA_ISSUES |
| ... | ... | @@ -846,36 +894,37 @@ def mso_file_extract(data): |
| 846 | 894 | :param data: bytes string, MSO/ActiveMime file content |
| 847 | 895 | :return: bytes string, extracted data (uncompressed) |
| 848 | 896 | |
| 849 | - raise a RuntimeError if the data cannot be extracted | |
| 897 | + raise a MsoExtractionError if the data cannot be extracted | |
| 850 | 898 | """ |
| 851 | 899 | # check the magic: |
| 852 | 900 | assert is_mso_file(data) |
| 901 | + | |
| 902 | + # In all the samples seen so far, Word always uses an offset of 0x32, | |
| 903 | + # and Excel 0x22A. But we read the offset from the header to be more | |
| 904 | + # generic. | |
| 905 | + offsets = [0x32, 0x22A] | |
| 906 | + | |
| 853 | 907 | # First, attempt to get the compressed data offset from the header |
| 854 | 908 | # According to my tests, it should be an unsigned 16 bits integer, |
| 855 | 909 | # at offset 0x1E (little endian) + add 46: |
| 856 | 910 | try: |
| 857 | 911 | offset = struct.unpack_from('<H', data, offset=0x1E)[0] + 46 |
| 858 | 912 | log.debug('Parsing MSO file: data offset = 0x%X' % offset) |
| 859 | - except KeyboardInterrupt: | |
| 860 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 861 | - raise | |
| 862 | - except: | |
| 863 | - log.exception('Unable to parse MSO/ActiveMime file header') | |
| 864 | - raise RuntimeError('Unable to parse MSO/ActiveMime file header') | |
| 865 | - # In all the samples seen so far, Word always uses an offset of 0x32, | |
| 866 | - # and Excel 0x22A. But we read the offset from the header to be more | |
| 867 | - # generic. | |
| 868 | - # Let's try that offset, then 0x32 and 0x22A, just in case: | |
| 869 | - for start in (offset, 0x32, 0x22A): | |
| 913 | + offsets.insert(0, offset) # insert at beginning of offsets | |
| 914 | + except struct.error as exc: | |
| 915 | + log.info('Unable to parse MSO/ActiveMime file header (%s)' % exc) | |
| 916 | + log.debug('Trace:', exc_info=True) | |
| 917 | + raise MsoExtractionError('Unable to parse MSO/ActiveMime file header') | |
| 918 | + # now try offsets | |
| 919 | + for start in offsets: | |
| 870 | 920 | try: |
| 871 | 921 | log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start) |
| 872 | 922 | extracted_data = zlib.decompress(data[start:]) |
| 873 | 923 | return extracted_data |
| 874 | - except KeyboardInterrupt: | |
| 875 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 876 | - raise | |
| 877 | - except: | |
| 878 | - log.exception('zlib decompression failed') | |
| 924 | + except zlib.error as exc: | |
| 925 | + log.info('zlib decompression failed for offset %s (%s)' | |
| 926 | + % (start, exc)) | |
| 927 | + log.debug('Trace:', exc_info=True) | |
| 879 | 928 | # None of the guessed offsets worked, let's try brute-forcing by looking |
| 880 | 929 | # for potential zlib-compressed blocks starting with 0x78: |
| 881 | 930 | log.debug('Looking for potential zlib-compressed blocks in MSO file') |
| ... | ... | @@ -885,12 +934,10 @@ def mso_file_extract(data): |
| 885 | 934 | log.debug('Attempting zlib decompression from MSO file offset 0x%X' % start) |
| 886 | 935 | extracted_data = zlib.decompress(data[start:]) |
| 887 | 936 | return extracted_data |
| 888 | - except KeyboardInterrupt: | |
| 889 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 890 | - raise | |
| 891 | - except: | |
| 892 | - log.exception('zlib decompression failed') | |
| 893 | - raise RuntimeError('Unable to decompress data from a MSO/ActiveMime file') | |
| 937 | + except zlib.error as exc: | |
| 938 | + log.info('zlib decompression failed (%s)' % exc) | |
| 939 | + log.debug('Trace:', exc_info=True) | |
| 940 | + raise MsoExtractionError('Unable to decompress data from a MSO/ActiveMime file') | |
| 894 | 941 | |
| 895 | 942 | |
| 896 | 943 | #--- FUNCTIONS ---------------------------------------------------------------- |
| ... | ... | @@ -911,29 +958,6 @@ def is_printable(s): |
| 911 | 958 | return set(s).issubset(_PRINTABLE_SET) |
| 912 | 959 | |
| 913 | 960 | |
| 914 | -def print_json(j): | |
| 915 | - """ | |
| 916 | - Print a dictionary, a list or any other object to stdout | |
| 917 | - :param j: object to be printed | |
| 918 | - :return: | |
| 919 | - """ | |
| 920 | - if isinstance(j, dict): | |
| 921 | - for key, val in j.items(): | |
| 922 | - print_json(key) | |
| 923 | - print_json(val) | |
| 924 | - elif isinstance(j, list): | |
| 925 | - for elem in j: | |
| 926 | - print_json(elem) | |
| 927 | - else: | |
| 928 | - try: | |
| 929 | - if len(j) > 20: | |
| 930 | - print type(j), repr(j[:20]), '...(len {0})'.format(len(j)) | |
| 931 | - else: | |
| 932 | - print type(j), repr(j) | |
| 933 | - except TypeError: | |
| 934 | - print type(j), repr(j) | |
| 935 | - | |
| 936 | - | |
| 937 | 961 | def copytoken_help(decompressed_current, decompressed_chunk_start): |
| 938 | 962 | """ |
| 939 | 963 | compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help |
| ... | ... | @@ -1057,7 +1081,7 @@ def decompress_stream(compressed_container): |
| 1057 | 1081 | copy_token = \ |
| 1058 | 1082 | struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0] |
| 1059 | 1083 | #TODO: check this |
| 1060 | - length_mask, offset_mask, bit_count, maximum_length = copytoken_help( | |
| 1084 | + length_mask, offset_mask, bit_count, _ = copytoken_help( | |
| 1061 | 1085 | len(decompressed_container), decompressed_chunk_start) |
| 1062 | 1086 | length = (copy_token & length_mask) + 3 |
| 1063 | 1087 | temp1 = copy_token & offset_mask |
| ... | ... | @@ -1136,122 +1160,130 @@ def _extract_vba(ole, vba_root, project_path, dir_path): |
| 1136 | 1160 | dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed)) |
| 1137 | 1161 | |
| 1138 | 1162 | # PROJECTSYSKIND Record |
| 1139 | - PROJECTSYSKIND_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1140 | - check_value('PROJECTSYSKIND_Id', 0x0001, PROJECTSYSKIND_Id) | |
| 1141 | - PROJECTSYSKIND_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1142 | - check_value('PROJECTSYSKIND_Size', 0x0004, PROJECTSYSKIND_Size) | |
| 1143 | - PROJECTSYSKIND_SysKind = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1144 | - if PROJECTSYSKIND_SysKind == 0x00: | |
| 1163 | + projectsyskind_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1164 | + check_value('PROJECTSYSKIND_Id', 0x0001, projectsyskind_id) | |
| 1165 | + projectsyskind_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1166 | + check_value('PROJECTSYSKIND_Size', 0x0004, projectsyskind_size) | |
| 1167 | + projectsyskind_syskind = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1168 | + if projectsyskind_syskind == 0x00: | |
| 1145 | 1169 | log.debug("16-bit Windows") |
| 1146 | - elif PROJECTSYSKIND_SysKind == 0x01: | |
| 1170 | + elif projectsyskind_syskind == 0x01: | |
| 1147 | 1171 | log.debug("32-bit Windows") |
| 1148 | - elif PROJECTSYSKIND_SysKind == 0x02: | |
| 1172 | + elif projectsyskind_syskind == 0x02: | |
| 1149 | 1173 | log.debug("Macintosh") |
| 1150 | - elif PROJECTSYSKIND_SysKind == 0x03: | |
| 1174 | + elif projectsyskind_syskind == 0x03: | |
| 1151 | 1175 | log.debug("64-bit Windows") |
| 1152 | 1176 | else: |
| 1153 | - log.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(PROJECTSYSKIND_SysKind)) | |
| 1177 | + log.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(projectsyskind_syskind)) | |
| 1154 | 1178 | |
| 1155 | 1179 | # PROJECTLCID Record |
| 1156 | - PROJECTLCID_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1157 | - check_value('PROJECTLCID_Id', 0x0002, PROJECTLCID_Id) | |
| 1158 | - PROJECTLCID_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1159 | - check_value('PROJECTLCID_Size', 0x0004, PROJECTLCID_Size) | |
| 1160 | - PROJECTLCID_Lcid = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1161 | - check_value('PROJECTLCID_Lcid', 0x409, PROJECTLCID_Lcid) | |
| 1180 | + projectlcid_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1181 | + check_value('PROJECTLCID_Id', 0x0002, projectlcid_id) | |
| 1182 | + projectlcid_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1183 | + check_value('PROJECTLCID_Size', 0x0004, projectlcid_size) | |
| 1184 | + projectlcid_lcid = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1185 | + check_value('PROJECTLCID_Lcid', 0x409, projectlcid_lcid) | |
| 1162 | 1186 | |
| 1163 | 1187 | # PROJECTLCIDINVOKE Record |
| 1164 | - PROJECTLCIDINVOKE_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1165 | - check_value('PROJECTLCIDINVOKE_Id', 0x0014, PROJECTLCIDINVOKE_Id) | |
| 1166 | - PROJECTLCIDINVOKE_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1167 | - check_value('PROJECTLCIDINVOKE_Size', 0x0004, PROJECTLCIDINVOKE_Size) | |
| 1168 | - PROJECTLCIDINVOKE_LcidInvoke = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1169 | - check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, PROJECTLCIDINVOKE_LcidInvoke) | |
| 1188 | + projectlcidinvoke_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1189 | + check_value('PROJECTLCIDINVOKE_Id', 0x0014, projectlcidinvoke_id) | |
| 1190 | + projectlcidinvoke_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1191 | + check_value('PROJECTLCIDINVOKE_Size', 0x0004, projectlcidinvoke_size) | |
| 1192 | + projectlcidinvoke_lcidinvoke = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1193 | + check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, projectlcidinvoke_lcidinvoke) | |
| 1170 | 1194 | |
| 1171 | 1195 | # PROJECTCODEPAGE Record |
| 1172 | - PROJECTCODEPAGE_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1173 | - check_value('PROJECTCODEPAGE_Id', 0x0003, PROJECTCODEPAGE_Id) | |
| 1174 | - PROJECTCODEPAGE_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1175 | - check_value('PROJECTCODEPAGE_Size', 0x0002, PROJECTCODEPAGE_Size) | |
| 1176 | - PROJECTCODEPAGE_CodePage = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1196 | + projectcodepage_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1197 | + check_value('PROJECTCODEPAGE_Id', 0x0003, projectcodepage_id) | |
| 1198 | + projectcodepage_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1199 | + check_value('PROJECTCODEPAGE_Size', 0x0002, projectcodepage_size) | |
| 1200 | + projectcodepage_codepage = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1177 | 1201 | |
| 1178 | 1202 | # PROJECTNAME Record |
| 1179 | - PROJECTNAME_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1180 | - check_value('PROJECTNAME_Id', 0x0004, PROJECTNAME_Id) | |
| 1181 | - PROJECTNAME_SizeOfProjectName = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1182 | - if PROJECTNAME_SizeOfProjectName < 1 or PROJECTNAME_SizeOfProjectName > 128: | |
| 1183 | - log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(PROJECTNAME_SizeOfProjectName)) | |
| 1184 | - PROJECTNAME_ProjectName = dir_stream.read(PROJECTNAME_SizeOfProjectName) | |
| 1203 | + projectname_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1204 | + check_value('PROJECTNAME_Id', 0x0004, projectname_id) | |
| 1205 | + projectname_sizeof_projectname = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1206 | + if projectname_sizeof_projectname < 1 or projectname_sizeof_projectname > 128: | |
| 1207 | + log.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(projectname_sizeof_projectname)) | |
| 1208 | + projectname_projectname = dir_stream.read(projectname_sizeof_projectname) | |
| 1209 | + unused = projectname_projectname | |
| 1185 | 1210 | |
| 1186 | 1211 | # PROJECTDOCSTRING Record |
| 1187 | - PROJECTDOCSTRING_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1188 | - check_value('PROJECTDOCSTRING_Id', 0x0005, PROJECTDOCSTRING_Id) | |
| 1189 | - PROJECTDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1190 | - if PROJECTNAME_SizeOfProjectName > 2000: | |
| 1212 | + projectdocstring_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1213 | + check_value('PROJECTDOCSTRING_Id', 0x0005, projectdocstring_id) | |
| 1214 | + projectdocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1215 | + if projectdocstring_sizeof_docstring > 2000: | |
| 1191 | 1216 | log.error( |
| 1192 | - "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString)) | |
| 1193 | - PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString) | |
| 1194 | - PROJECTDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1195 | - check_value('PROJECTDOCSTRING_Reserved', 0x0040, PROJECTDOCSTRING_Reserved) | |
| 1196 | - PROJECTDOCSTRING_SizeOfDocStringUnicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1197 | - if PROJECTDOCSTRING_SizeOfDocStringUnicode % 2 != 0: | |
| 1217 | + "PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(projectdocstring_sizeof_docstring)) | |
| 1218 | + projectdocstring_docstring = dir_stream.read(projectdocstring_sizeof_docstring) | |
| 1219 | + projectdocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1220 | + check_value('PROJECTDOCSTRING_Reserved', 0x0040, projectdocstring_reserved) | |
| 1221 | + projectdocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1222 | + if projectdocstring_sizeof_docstring_unicode % 2 != 0: | |
| 1198 | 1223 | log.error("PROJECTDOCSTRING_SizeOfDocStringUnicode is not even") |
| 1199 | - PROJECTDOCSTRING_DocStringUnicode = dir_stream.read(PROJECTDOCSTRING_SizeOfDocStringUnicode) | |
| 1224 | + projectdocstring_docstring_unicode = dir_stream.read(projectdocstring_sizeof_docstring_unicode) | |
| 1225 | + unused = projectdocstring_docstring | |
| 1226 | + unused = projectdocstring_docstring_unicode | |
| 1200 | 1227 | |
| 1201 | 1228 | # PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7 |
| 1202 | - PROJECTHELPFILEPATH_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1203 | - check_value('PROJECTHELPFILEPATH_Id', 0x0006, PROJECTHELPFILEPATH_Id) | |
| 1204 | - PROJECTHELPFILEPATH_SizeOfHelpFile1 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1205 | - if PROJECTHELPFILEPATH_SizeOfHelpFile1 > 260: | |
| 1229 | + projecthelpfilepath_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1230 | + check_value('PROJECTHELPFILEPATH_Id', 0x0006, projecthelpfilepath_id) | |
| 1231 | + projecthelpfilepath_sizeof_helpfile1 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1232 | + if projecthelpfilepath_sizeof_helpfile1 > 260: | |
| 1206 | 1233 | log.error( |
| 1207 | - "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1)) | |
| 1208 | - PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1) | |
| 1209 | - PROJECTHELPFILEPATH_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1210 | - check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, PROJECTHELPFILEPATH_Reserved) | |
| 1211 | - PROJECTHELPFILEPATH_SizeOfHelpFile2 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1212 | - if PROJECTHELPFILEPATH_SizeOfHelpFile2 != PROJECTHELPFILEPATH_SizeOfHelpFile1: | |
| 1234 | + "PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(projecthelpfilepath_sizeof_helpfile1)) | |
| 1235 | + projecthelpfilepath_helpfile1 = dir_stream.read(projecthelpfilepath_sizeof_helpfile1) | |
| 1236 | + projecthelpfilepath_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1237 | + check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, projecthelpfilepath_reserved) | |
| 1238 | + projecthelpfilepath_sizeof_helpfile2 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1239 | + if projecthelpfilepath_sizeof_helpfile2 != projecthelpfilepath_sizeof_helpfile1: | |
| 1213 | 1240 | log.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 does not equal PROJECTHELPFILEPATH_SizeOfHelpFile2") |
| 1214 | - PROJECTHELPFILEPATH_HelpFile2 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile2) | |
| 1215 | - if PROJECTHELPFILEPATH_HelpFile2 != PROJECTHELPFILEPATH_HelpFile1: | |
| 1241 | + projecthelpfilepath_helpfile2 = dir_stream.read(projecthelpfilepath_sizeof_helpfile2) | |
| 1242 | + if projecthelpfilepath_helpfile2 != projecthelpfilepath_helpfile1: | |
| 1216 | 1243 | log.error("PROJECTHELPFILEPATH_HelpFile1 does not equal PROJECTHELPFILEPATH_HelpFile2") |
| 1217 | 1244 | |
| 1218 | 1245 | # PROJECTHELPCONTEXT Record |
| 1219 | - PROJECTHELPCONTEXT_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1220 | - check_value('PROJECTHELPCONTEXT_Id', 0x0007, PROJECTHELPCONTEXT_Id) | |
| 1221 | - PROJECTHELPCONTEXT_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1222 | - check_value('PROJECTHELPCONTEXT_Size', 0x0004, PROJECTHELPCONTEXT_Size) | |
| 1223 | - PROJECTHELPCONTEXT_HelpContext = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1246 | + projecthelpcontext_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1247 | + check_value('PROJECTHELPCONTEXT_Id', 0x0007, projecthelpcontext_id) | |
| 1248 | + projecthelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1249 | + check_value('PROJECTHELPCONTEXT_Size', 0x0004, projecthelpcontext_size) | |
| 1250 | + projecthelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1251 | + unused = projecthelpcontext_helpcontext | |
| 1224 | 1252 | |
| 1225 | 1253 | # PROJECTLIBFLAGS Record |
| 1226 | - PROJECTLIBFLAGS_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1227 | - check_value('PROJECTLIBFLAGS_Id', 0x0008, PROJECTLIBFLAGS_Id) | |
| 1228 | - PROJECTLIBFLAGS_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1229 | - check_value('PROJECTLIBFLAGS_Size', 0x0004, PROJECTLIBFLAGS_Size) | |
| 1230 | - PROJECTLIBFLAGS_ProjectLibFlags = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1231 | - check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, PROJECTLIBFLAGS_ProjectLibFlags) | |
| 1254 | + projectlibflags_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1255 | + check_value('PROJECTLIBFLAGS_Id', 0x0008, projectlibflags_id) | |
| 1256 | + projectlibflags_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1257 | + check_value('PROJECTLIBFLAGS_Size', 0x0004, projectlibflags_size) | |
| 1258 | + projectlibflags_projectlibflags = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1259 | + check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, projectlibflags_projectlibflags) | |
| 1232 | 1260 | |
| 1233 | 1261 | # PROJECTVERSION Record |
| 1234 | - PROJECTVERSION_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1235 | - check_value('PROJECTVERSION_Id', 0x0009, PROJECTVERSION_Id) | |
| 1236 | - PROJECTVERSION_Reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1237 | - check_value('PROJECTVERSION_Reserved', 0x0004, PROJECTVERSION_Reserved) | |
| 1238 | - PROJECTVERSION_VersionMajor = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1239 | - PROJECTVERSION_VersionMinor = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1262 | + projectversion_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1263 | + check_value('PROJECTVERSION_Id', 0x0009, projectversion_id) | |
| 1264 | + projectversion_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1265 | + check_value('PROJECTVERSION_Reserved', 0x0004, projectversion_reserved) | |
| 1266 | + projectversion_versionmajor = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1267 | + projectversion_versionminor = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1268 | + unused = projectversion_versionmajor | |
| 1269 | + unused = projectversion_versionminor | |
| 1240 | 1270 | |
| 1241 | 1271 | # PROJECTCONSTANTS Record |
| 1242 | - PROJECTCONSTANTS_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1243 | - check_value('PROJECTCONSTANTS_Id', 0x000C, PROJECTCONSTANTS_Id) | |
| 1244 | - PROJECTCONSTANTS_SizeOfConstants = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1245 | - if PROJECTCONSTANTS_SizeOfConstants > 1015: | |
| 1272 | + projectconstants_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1273 | + check_value('PROJECTCONSTANTS_Id', 0x000C, projectconstants_id) | |
| 1274 | + projectconstants_sizeof_constants = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1275 | + if projectconstants_sizeof_constants > 1015: | |
| 1246 | 1276 | log.error( |
| 1247 | - "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants)) | |
| 1248 | - PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants) | |
| 1249 | - PROJECTCONSTANTS_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1250 | - check_value('PROJECTCONSTANTS_Reserved', 0x003C, PROJECTCONSTANTS_Reserved) | |
| 1251 | - PROJECTCONSTANTS_SizeOfConstantsUnicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1252 | - if PROJECTCONSTANTS_SizeOfConstantsUnicode % 2 != 0: | |
| 1277 | + "PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(projectconstants_sizeof_constants)) | |
| 1278 | + projectconstants_constants = dir_stream.read(projectconstants_sizeof_constants) | |
| 1279 | + projectconstants_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1280 | + check_value('PROJECTCONSTANTS_Reserved', 0x003C, projectconstants_reserved) | |
| 1281 | + projectconstants_sizeof_constants_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1282 | + if projectconstants_sizeof_constants_unicode % 2 != 0: | |
| 1253 | 1283 | log.error("PROJECTCONSTANTS_SizeOfConstantsUnicode is not even") |
| 1254 | - PROJECTCONSTANTS_ConstantsUnicode = dir_stream.read(PROJECTCONSTANTS_SizeOfConstantsUnicode) | |
| 1284 | + projectconstants_constants_unicode = dir_stream.read(projectconstants_sizeof_constants_unicode) | |
| 1285 | + unused = projectconstants_constants | |
| 1286 | + unused = projectconstants_constants_unicode | |
| 1255 | 1287 | |
| 1256 | 1288 | # array of REFERENCE records |
| 1257 | 1289 | check = None |
| ... | ... | @@ -1263,194 +1295,230 @@ def _extract_vba(ole, vba_root, project_path, dir_path): |
| 1263 | 1295 | |
| 1264 | 1296 | if check == 0x0016: |
| 1265 | 1297 | # REFERENCENAME |
| 1266 | - REFERENCE_Id = check | |
| 1267 | - REFERENCE_SizeOfName = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1268 | - REFERENCE_Name = dir_stream.read(REFERENCE_SizeOfName) | |
| 1269 | - REFERENCE_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1270 | - check_value('REFERENCE_Reserved', 0x003E, REFERENCE_Reserved) | |
| 1271 | - REFERENCE_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1272 | - REFERENCE_NameUnicode = dir_stream.read(REFERENCE_SizeOfNameUnicode) | |
| 1298 | + reference_id = check | |
| 1299 | + reference_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1300 | + reference_name = dir_stream.read(reference_sizeof_name) | |
| 1301 | + reference_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1302 | + check_value('REFERENCE_Reserved', 0x003E, reference_reserved) | |
| 1303 | + reference_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1304 | + reference_name_unicode = dir_stream.read(reference_sizeof_name_unicode) | |
| 1305 | + unused = reference_id | |
| 1306 | + unused = reference_name | |
| 1307 | + unused = reference_name_unicode | |
| 1273 | 1308 | continue |
| 1274 | 1309 | |
| 1275 | 1310 | if check == 0x0033: |
| 1276 | 1311 | # REFERENCEORIGINAL (followed by REFERENCECONTROL) |
| 1277 | - REFERENCEORIGINAL_Id = check | |
| 1278 | - REFERENCEORIGINAL_SizeOfLibidOriginal = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1279 | - REFERENCEORIGINAL_LibidOriginal = dir_stream.read(REFERENCEORIGINAL_SizeOfLibidOriginal) | |
| 1312 | + referenceoriginal_id = check | |
| 1313 | + referenceoriginal_sizeof_libidoriginal = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1314 | + referenceoriginal_libidoriginal = dir_stream.read(referenceoriginal_sizeof_libidoriginal) | |
| 1315 | + unused = referenceoriginal_id | |
| 1316 | + unused = referenceoriginal_libidoriginal | |
| 1280 | 1317 | continue |
| 1281 | 1318 | |
| 1282 | 1319 | if check == 0x002F: |
| 1283 | 1320 | # REFERENCECONTROL |
| 1284 | - REFERENCECONTROL_Id = check | |
| 1285 | - REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 1286 | - REFERENCECONTROL_SizeOfLibidTwiddled = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1287 | - REFERENCECONTROL_LibidTwiddled = dir_stream.read(REFERENCECONTROL_SizeOfLibidTwiddled) | |
| 1288 | - REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 1289 | - check_value('REFERENCECONTROL_Reserved1', 0x0000, REFERENCECONTROL_Reserved1) | |
| 1290 | - REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore | |
| 1291 | - check_value('REFERENCECONTROL_Reserved2', 0x0000, REFERENCECONTROL_Reserved2) | |
| 1321 | + referencecontrol_id = check | |
| 1322 | + referencecontrol_sizetwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 1323 | + referencecontrol_sizeof_libidtwiddled = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1324 | + referencecontrol_libidtwiddled = dir_stream.read(referencecontrol_sizeof_libidtwiddled) | |
| 1325 | + referencecontrol_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore | |
| 1326 | + check_value('REFERENCECONTROL_Reserved1', 0x0000, referencecontrol_reserved1) | |
| 1327 | + referencecontrol_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore | |
| 1328 | + check_value('REFERENCECONTROL_Reserved2', 0x0000, referencecontrol_reserved2) | |
| 1329 | + unused = referencecontrol_id | |
| 1330 | + unused = referencecontrol_sizetwiddled | |
| 1331 | + unused = referencecontrol_libidtwiddled | |
| 1292 | 1332 | # optional field |
| 1293 | 1333 | check2 = struct.unpack("<H", dir_stream.read(2))[0] |
| 1294 | 1334 | if check2 == 0x0016: |
| 1295 | - REFERENCECONTROL_NameRecordExtended_Id = check | |
| 1296 | - REFERENCECONTROL_NameRecordExtended_SizeofName = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1297 | - REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read( | |
| 1298 | - REFERENCECONTROL_NameRecordExtended_SizeofName) | |
| 1299 | - REFERENCECONTROL_NameRecordExtended_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1335 | + referencecontrol_namerecordextended_id = check | |
| 1336 | + referencecontrol_namerecordextended_sizeof_name = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1337 | + referencecontrol_namerecordextended_name = dir_stream.read( | |
| 1338 | + referencecontrol_namerecordextended_sizeof_name) | |
| 1339 | + referencecontrol_namerecordextended_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1300 | 1340 | check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, |
| 1301 | - REFERENCECONTROL_NameRecordExtended_Reserved) | |
| 1302 | - REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1303 | - REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read( | |
| 1304 | - REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode) | |
| 1305 | - REFERENCECONTROL_Reserved3 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1341 | + referencecontrol_namerecordextended_reserved) | |
| 1342 | + referencecontrol_namerecordextended_sizeof_name_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1343 | + referencecontrol_namerecordextended_name_unicode = dir_stream.read( | |
| 1344 | + referencecontrol_namerecordextended_sizeof_name_unicode) | |
| 1345 | + referencecontrol_reserved3 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1346 | + unused = referencecontrol_namerecordextended_id | |
| 1347 | + unused = referencecontrol_namerecordextended_name | |
| 1348 | + unused = referencecontrol_namerecordextended_name_unicode | |
| 1306 | 1349 | else: |
| 1307 | - REFERENCECONTROL_Reserved3 = check2 | |
| 1308 | - | |
| 1309 | - check_value('REFERENCECONTROL_Reserved3', 0x0030, REFERENCECONTROL_Reserved3) | |
| 1310 | - REFERENCECONTROL_SizeExtended = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1311 | - REFERENCECONTROL_SizeOfLibidExtended = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1312 | - REFERENCECONTROL_LibidExtended = dir_stream.read(REFERENCECONTROL_SizeOfLibidExtended) | |
| 1313 | - REFERENCECONTROL_Reserved4 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1314 | - REFERENCECONTROL_Reserved5 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1315 | - REFERENCECONTROL_OriginalTypeLib = dir_stream.read(16) | |
| 1316 | - REFERENCECONTROL_Cookie = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1350 | + referencecontrol_reserved3 = check2 | |
| 1351 | + | |
| 1352 | + check_value('REFERENCECONTROL_Reserved3', 0x0030, referencecontrol_reserved3) | |
| 1353 | + referencecontrol_sizeextended = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1354 | + referencecontrol_sizeof_libidextended = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1355 | + referencecontrol_libidextended = dir_stream.read(referencecontrol_sizeof_libidextended) | |
| 1356 | + referencecontrol_reserved4 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1357 | + referencecontrol_reserved5 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1358 | + referencecontrol_originaltypelib = dir_stream.read(16) | |
| 1359 | + referencecontrol_cookie = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1360 | + unused = referencecontrol_sizeextended | |
| 1361 | + unused = referencecontrol_libidextended | |
| 1362 | + unused = referencecontrol_reserved4 | |
| 1363 | + unused = referencecontrol_reserved5 | |
| 1364 | + unused = referencecontrol_originaltypelib | |
| 1365 | + unused = referencecontrol_cookie | |
| 1317 | 1366 | continue |
| 1318 | 1367 | |
| 1319 | 1368 | if check == 0x000D: |
| 1320 | 1369 | # REFERENCEREGISTERED |
| 1321 | - REFERENCEREGISTERED_Id = check | |
| 1322 | - REFERENCEREGISTERED_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1323 | - REFERENCEREGISTERED_SizeOfLibid = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1324 | - REFERENCEREGISTERED_Libid = dir_stream.read(REFERENCEREGISTERED_SizeOfLibid) | |
| 1325 | - REFERENCEREGISTERED_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1326 | - check_value('REFERENCEREGISTERED_Reserved1', 0x0000, REFERENCEREGISTERED_Reserved1) | |
| 1327 | - REFERENCEREGISTERED_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1328 | - check_value('REFERENCEREGISTERED_Reserved2', 0x0000, REFERENCEREGISTERED_Reserved2) | |
| 1370 | + referenceregistered_id = check | |
| 1371 | + referenceregistered_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1372 | + referenceregistered_sizeof_libid = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1373 | + referenceregistered_libid = dir_stream.read(referenceregistered_sizeof_libid) | |
| 1374 | + referenceregistered_reserved1 = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1375 | + check_value('REFERENCEREGISTERED_Reserved1', 0x0000, referenceregistered_reserved1) | |
| 1376 | + referenceregistered_reserved2 = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1377 | + check_value('REFERENCEREGISTERED_Reserved2', 0x0000, referenceregistered_reserved2) | |
| 1378 | + unused = referenceregistered_id | |
| 1379 | + unused = referenceregistered_size | |
| 1380 | + unused = referenceregistered_libid | |
| 1329 | 1381 | continue |
| 1330 | 1382 | |
| 1331 | 1383 | if check == 0x000E: |
| 1332 | 1384 | # REFERENCEPROJECT |
| 1333 | - REFERENCEPROJECT_Id = check | |
| 1334 | - REFERENCEPROJECT_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1335 | - REFERENCEPROJECT_SizeOfLibidAbsolute = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1336 | - REFERENCEPROJECT_LibidAbsolute = dir_stream.read(REFERENCEPROJECT_SizeOfLibidAbsolute) | |
| 1337 | - REFERENCEPROJECT_SizeOfLibidRelative = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1338 | - REFERENCEPROJECT_LibidRelative = dir_stream.read(REFERENCEPROJECT_SizeOfLibidRelative) | |
| 1339 | - REFERENCEPROJECT_MajorVersion = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1340 | - REFERENCEPROJECT_MinorVersion = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1385 | + referenceproject_id = check | |
| 1386 | + referenceproject_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1387 | + referenceproject_sizeof_libidabsolute = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1388 | + referenceproject_libidabsolute = dir_stream.read(referenceproject_sizeof_libidabsolute) | |
| 1389 | + referenceproject_sizeof_libidrelative = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1390 | + referenceproject_libidrelative = dir_stream.read(referenceproject_sizeof_libidrelative) | |
| 1391 | + referenceproject_majorversion = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1392 | + referenceproject_minorversion = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1393 | + unused = referenceproject_id | |
| 1394 | + unused = referenceproject_size | |
| 1395 | + unused = referenceproject_libidabsolute | |
| 1396 | + unused = referenceproject_libidrelative | |
| 1397 | + unused = referenceproject_majorversion | |
| 1398 | + unused = referenceproject_minorversion | |
| 1341 | 1399 | continue |
| 1342 | 1400 | |
| 1343 | 1401 | log.error('invalid or unknown check Id {0:04X}'.format(check)) |
| 1344 | 1402 | sys.exit(0) |
| 1345 | 1403 | |
| 1346 | - PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0] | |
| 1347 | - check_value('PROJECTMODULES_Id', 0x000F, PROJECTMODULES_Id) | |
| 1348 | - PROJECTMODULES_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1349 | - check_value('PROJECTMODULES_Size', 0x0002, PROJECTMODULES_Size) | |
| 1350 | - PROJECTMODULES_Count = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1351 | - PROJECTMODULES_ProjectCookieRecord_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1352 | - check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, PROJECTMODULES_ProjectCookieRecord_Id) | |
| 1353 | - PROJECTMODULES_ProjectCookieRecord_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1354 | - check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, PROJECTMODULES_ProjectCookieRecord_Size) | |
| 1355 | - PROJECTMODULES_ProjectCookieRecord_Cookie = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1356 | - | |
| 1357 | - log.debug("parsing {0} modules".format(PROJECTMODULES_Count)) | |
| 1358 | - for x in xrange(0, PROJECTMODULES_Count): | |
| 1359 | - MODULENAME_Id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1360 | - check_value('MODULENAME_Id', 0x0019, MODULENAME_Id) | |
| 1361 | - MODULENAME_SizeOfModuleName = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1362 | - MODULENAME_ModuleName = dir_stream.read(MODULENAME_SizeOfModuleName) | |
| 1404 | + projectmodules_id = check #struct.unpack("<H", dir_stream.read(2))[0] | |
| 1405 | + check_value('PROJECTMODULES_Id', 0x000F, projectmodules_id) | |
| 1406 | + projectmodules_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1407 | + check_value('PROJECTMODULES_Size', 0x0002, projectmodules_size) | |
| 1408 | + projectmodules_count = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1409 | + projectmodules_projectcookierecord_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1410 | + check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, projectmodules_projectcookierecord_id) | |
| 1411 | + projectmodules_projectcookierecord_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1412 | + check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, projectmodules_projectcookierecord_size) | |
| 1413 | + projectmodules_projectcookierecord_cookie = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1414 | + unused = projectmodules_projectcookierecord_cookie | |
| 1415 | + | |
| 1416 | + log.debug("parsing {0} modules".format(projectmodules_count)) | |
| 1417 | + for _ in xrange(0, projectmodules_count): | |
| 1418 | + modulename_id = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1419 | + check_value('MODULENAME_Id', 0x0019, modulename_id) | |
| 1420 | + modulename_sizeof_modulename = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1421 | + modulename_modulename = dir_stream.read(modulename_sizeof_modulename) | |
| 1363 | 1422 | # account for optional sections |
| 1364 | 1423 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1365 | 1424 | if section_id == 0x0047: |
| 1366 | - MODULENAMEUNICODE_Id = section_id | |
| 1367 | - MODULENAMEUNICODE_SizeOfModuleNameUnicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1368 | - MODULENAMEUNICODE_ModuleNameUnicode = dir_stream.read(MODULENAMEUNICODE_SizeOfModuleNameUnicode) | |
| 1425 | + modulename_unicode_id = section_id | |
| 1426 | + modulename_unicode_sizeof_modulename_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1427 | + modulename_unicode_modulename_unicode = dir_stream.read(modulename_unicode_sizeof_modulename_unicode) | |
| 1428 | + unused = modulename_unicode_id | |
| 1429 | + unused = modulename_unicode_modulename_unicode | |
| 1369 | 1430 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1370 | 1431 | if section_id == 0x001A: |
| 1371 | - MODULESTREAMNAME_id = section_id | |
| 1372 | - MODULESTREAMNAME_SizeOfStreamName = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1373 | - MODULESTREAMNAME_StreamName = dir_stream.read(MODULESTREAMNAME_SizeOfStreamName) | |
| 1374 | - MODULESTREAMNAME_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1375 | - check_value('MODULESTREAMNAME_Reserved', 0x0032, MODULESTREAMNAME_Reserved) | |
| 1376 | - MODULESTREAMNAME_SizeOfStreamNameUnicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1377 | - MODULESTREAMNAME_StreamNameUnicode = dir_stream.read(MODULESTREAMNAME_SizeOfStreamNameUnicode) | |
| 1432 | + modulestreamname_id = section_id | |
| 1433 | + modulestreamname_sizeof_streamname = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1434 | + modulestreamname_streamname = dir_stream.read(modulestreamname_sizeof_streamname) | |
| 1435 | + modulestreamname_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1436 | + check_value('MODULESTREAMNAME_Reserved', 0x0032, modulestreamname_reserved) | |
| 1437 | + modulestreamname_sizeof_streamname_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1438 | + modulestreamname_streamname_unicode = dir_stream.read(modulestreamname_sizeof_streamname_unicode) | |
| 1439 | + unused = modulestreamname_id | |
| 1378 | 1440 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1379 | 1441 | if section_id == 0x001C: |
| 1380 | - MODULEDOCSTRING_Id = section_id | |
| 1381 | - check_value('MODULEDOCSTRING_Id', 0x001C, MODULEDOCSTRING_Id) | |
| 1382 | - MODULEDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1383 | - MODULEDOCSTRING_DocString = dir_stream.read(MODULEDOCSTRING_SizeOfDocString) | |
| 1384 | - MODULEDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1385 | - check_value('MODULEDOCSTRING_Reserved', 0x0048, MODULEDOCSTRING_Reserved) | |
| 1386 | - MODULEDOCSTRING_SizeOfDocStringUnicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1387 | - MODULEDOCSTRING_DocStringUnicode = dir_stream.read(MODULEDOCSTRING_SizeOfDocStringUnicode) | |
| 1442 | + moduledocstring_id = section_id | |
| 1443 | + check_value('MODULEDOCSTRING_Id', 0x001C, moduledocstring_id) | |
| 1444 | + moduledocstring_sizeof_docstring = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1445 | + moduledocstring_docstring = dir_stream.read(moduledocstring_sizeof_docstring) | |
| 1446 | + moduledocstring_reserved = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1447 | + check_value('MODULEDOCSTRING_Reserved', 0x0048, moduledocstring_reserved) | |
| 1448 | + moduledocstring_sizeof_docstring_unicode = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1449 | + moduledocstring_docstring_unicode = dir_stream.read(moduledocstring_sizeof_docstring_unicode) | |
| 1450 | + unused = moduledocstring_docstring | |
| 1451 | + unused = moduledocstring_docstring_unicode | |
| 1388 | 1452 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1389 | 1453 | if section_id == 0x0031: |
| 1390 | - MODULEOFFSET_Id = section_id | |
| 1391 | - check_value('MODULEOFFSET_Id', 0x0031, MODULEOFFSET_Id) | |
| 1392 | - MODULEOFFSET_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1393 | - check_value('MODULEOFFSET_Size', 0x0004, MODULEOFFSET_Size) | |
| 1394 | - MODULEOFFSET_TextOffset = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1454 | + moduleoffset_id = section_id | |
| 1455 | + check_value('MODULEOFFSET_Id', 0x0031, moduleoffset_id) | |
| 1456 | + moduleoffset_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1457 | + check_value('MODULEOFFSET_Size', 0x0004, moduleoffset_size) | |
| 1458 | + moduleoffset_textoffset = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1395 | 1459 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1396 | 1460 | if section_id == 0x001E: |
| 1397 | - MODULEHELPCONTEXT_Id = section_id | |
| 1398 | - check_value('MODULEHELPCONTEXT_Id', 0x001E, MODULEHELPCONTEXT_Id) | |
| 1399 | - MODULEHELPCONTEXT_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1400 | - check_value('MODULEHELPCONTEXT_Size', 0x0004, MODULEHELPCONTEXT_Size) | |
| 1401 | - MODULEHELPCONTEXT_HelpContext = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1461 | + modulehelpcontext_id = section_id | |
| 1462 | + check_value('MODULEHELPCONTEXT_Id', 0x001E, modulehelpcontext_id) | |
| 1463 | + modulehelpcontext_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1464 | + check_value('MODULEHELPCONTEXT_Size', 0x0004, modulehelpcontext_size) | |
| 1465 | + modulehelpcontext_helpcontext = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1466 | + unused = modulehelpcontext_helpcontext | |
| 1402 | 1467 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1403 | 1468 | if section_id == 0x002C: |
| 1404 | - MODULECOOKIE_Id = section_id | |
| 1405 | - check_value('MODULECOOKIE_Id', 0x002C, MODULECOOKIE_Id) | |
| 1406 | - MODULECOOKIE_Size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1407 | - check_value('MODULECOOKIE_Size', 0x0002, MODULECOOKIE_Size) | |
| 1408 | - MODULECOOKIE_Cookie = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1469 | + modulecookie_id = section_id | |
| 1470 | + check_value('MODULECOOKIE_Id', 0x002C, modulecookie_id) | |
| 1471 | + modulecookie_size = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1472 | + check_value('MODULECOOKIE_Size', 0x0002, modulecookie_size) | |
| 1473 | + modulecookie_cookie = struct.unpack("<H", dir_stream.read(2))[0] | |
| 1474 | + unused = modulecookie_cookie | |
| 1409 | 1475 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1410 | 1476 | if section_id == 0x0021 or section_id == 0x0022: |
| 1411 | - MODULETYPE_Id = section_id | |
| 1412 | - MODULETYPE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1477 | + moduletype_id = section_id | |
| 1478 | + moduletype_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1479 | + unused = moduletype_id | |
| 1480 | + unused = moduletype_reserved | |
| 1413 | 1481 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1414 | 1482 | if section_id == 0x0025: |
| 1415 | - MODULEREADONLY_Id = section_id | |
| 1416 | - check_value('MODULEREADONLY_Id', 0x0025, MODULEREADONLY_Id) | |
| 1417 | - MODULEREADONLY_Reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1418 | - check_value('MODULEREADONLY_Reserved', 0x0000, MODULEREADONLY_Reserved) | |
| 1483 | + modulereadonly_id = section_id | |
| 1484 | + check_value('MODULEREADONLY_Id', 0x0025, modulereadonly_id) | |
| 1485 | + modulereadonly_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1486 | + check_value('MODULEREADONLY_Reserved', 0x0000, modulereadonly_reserved) | |
| 1419 | 1487 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1420 | 1488 | if section_id == 0x0028: |
| 1421 | - MODULEPRIVATE_Id = section_id | |
| 1422 | - check_value('MODULEPRIVATE_Id', 0x0028, MODULEPRIVATE_Id) | |
| 1423 | - MODULEPRIVATE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1424 | - check_value('MODULEPRIVATE_Reserved', 0x0000, MODULEPRIVATE_Reserved) | |
| 1489 | + moduleprivate_id = section_id | |
| 1490 | + check_value('MODULEPRIVATE_Id', 0x0028, moduleprivate_id) | |
| 1491 | + moduleprivate_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1492 | + check_value('MODULEPRIVATE_Reserved', 0x0000, moduleprivate_reserved) | |
| 1425 | 1493 | section_id = struct.unpack("<H", dir_stream.read(2))[0] |
| 1426 | 1494 | if section_id == 0x002B: # TERMINATOR |
| 1427 | - MODULE_Reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1428 | - check_value('MODULE_Reserved', 0x0000, MODULE_Reserved) | |
| 1495 | + module_reserved = struct.unpack("<L", dir_stream.read(4))[0] | |
| 1496 | + check_value('MODULE_Reserved', 0x0000, module_reserved) | |
| 1429 | 1497 | section_id = None |
| 1430 | 1498 | if section_id != None: |
| 1431 | 1499 | log.warning('unknown or invalid module section id {0:04X}'.format(section_id)) |
| 1432 | 1500 | |
| 1433 | - log.debug('Project CodePage = %d' % PROJECTCODEPAGE_CodePage) | |
| 1434 | - vba_codec = 'cp%d' % PROJECTCODEPAGE_CodePage | |
| 1435 | - log.debug("ModuleName = {0}".format(MODULENAME_ModuleName)) | |
| 1436 | - log.debug("StreamName = {0}".format(repr(MODULESTREAMNAME_StreamName))) | |
| 1437 | - streamname_unicode = MODULESTREAMNAME_StreamName.decode(vba_codec) | |
| 1501 | + log.debug('Project CodePage = %d' % projectcodepage_codepage) | |
| 1502 | + vba_codec = 'cp%d' % projectcodepage_codepage | |
| 1503 | + log.debug("ModuleName = {0}".format(modulename_modulename)) | |
| 1504 | + log.debug("StreamName = {0}".format(repr(modulestreamname_streamname))) | |
| 1505 | + streamname_unicode = modulestreamname_streamname.decode(vba_codec) | |
| 1438 | 1506 | log.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode))) |
| 1439 | - log.debug("StreamNameUnicode = {0}".format(repr(MODULESTREAMNAME_StreamNameUnicode))) | |
| 1440 | - log.debug("TextOffset = {0}".format(MODULEOFFSET_TextOffset)) | |
| 1507 | + log.debug("StreamNameUnicode = {0}".format(repr(modulestreamname_streamname_unicode))) | |
| 1508 | + log.debug("TextOffset = {0}".format(moduleoffset_textoffset)) | |
| 1441 | 1509 | |
| 1442 | 1510 | code_path = vba_root + u'VBA/' + streamname_unicode |
| 1443 | 1511 | #TODO: test if stream exists |
| 1444 | 1512 | log.debug('opening VBA code stream %s' % repr(code_path)) |
| 1445 | 1513 | code_data = ole.openstream(code_path).read() |
| 1446 | 1514 | log.debug("length of code_data = {0}".format(len(code_data))) |
| 1447 | - log.debug("offset of code_data = {0}".format(MODULEOFFSET_TextOffset)) | |
| 1448 | - code_data = code_data[MODULEOFFSET_TextOffset:] | |
| 1515 | + log.debug("offset of code_data = {0}".format(moduleoffset_textoffset)) | |
| 1516 | + code_data = code_data[moduleoffset_textoffset:] | |
| 1449 | 1517 | if len(code_data) > 0: |
| 1450 | 1518 | code_data = decompress_stream(code_data) |
| 1451 | 1519 | # case-insensitive search in the code_modules dict to find the file extension: |
| 1452 | - filext = code_modules.get(MODULENAME_ModuleName.lower(), 'bin') | |
| 1453 | - filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext) | |
| 1520 | + filext = code_modules.get(modulename_modulename.lower(), 'bin') | |
| 1521 | + filename = '{0}.{1}'.format(modulename_modulename, filext) | |
| 1454 | 1522 | #TODO: also yield the codepage so that callers can decode it properly |
| 1455 | 1523 | yield (code_path, filename, code_data) |
| 1456 | 1524 | # print '-'*79 |
| ... | ... | @@ -1460,7 +1528,8 @@ def _extract_vba(ole, vba_root, project_path, dir_path): |
| 1460 | 1528 | # print '' |
| 1461 | 1529 | log.debug('extracted file {0}'.format(filename)) |
| 1462 | 1530 | else: |
| 1463 | - log.warning("module stream {0} has code data length 0".format(MODULESTREAMNAME_StreamName)) | |
| 1531 | + log.warning("module stream {0} has code data length 0".format(modulestreamname_streamname)) | |
| 1532 | + _ = unused | |
| 1464 | 1533 | return |
| 1465 | 1534 | |
| 1466 | 1535 | |
| ... | ... | @@ -1616,12 +1685,9 @@ def detect_base64_strings(vba_code): |
| 1616 | 1685 | decoded = base64.b64decode(value) |
| 1617 | 1686 | results.append((value, decoded)) |
| 1618 | 1687 | found.add(value) |
| 1619 | - except KeyboardInterrupt: | |
| 1620 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 1621 | - raise | |
| 1622 | - except: | |
| 1688 | + except (TypeError, ValueError) as exc: | |
| 1689 | + log.debug('Failed to base64-decode (%s)' % exc) | |
| 1623 | 1690 | # if an exception occurs, it is likely not a base64-encoded string |
| 1624 | - pass | |
| 1625 | 1691 | return results |
| 1626 | 1692 | |
| 1627 | 1693 | |
| ... | ... | @@ -1646,12 +1712,9 @@ def detect_dridex_strings(vba_code): |
| 1646 | 1712 | decoded = DridexUrlDecode(value) |
| 1647 | 1713 | results.append((value, decoded)) |
| 1648 | 1714 | found.add(value) |
| 1649 | - except KeyboardInterrupt: | |
| 1650 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 1651 | - raise | |
| 1652 | - except: | |
| 1715 | + except Exception as exc: | |
| 1716 | + log.debug('Failed to Dridex-decode (%s)' % exc) | |
| 1653 | 1717 | # if an exception occurs, it is likely not a dridex-encoded string |
| 1654 | - pass | |
| 1655 | 1718 | return results |
| 1656 | 1719 | |
| 1657 | 1720 | |
| ... | ... | @@ -1701,16 +1764,17 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): |
| 1701 | 1764 | elif isinstance(json_obj, (bool, int, float)): |
| 1702 | 1765 | pass |
| 1703 | 1766 | elif isinstance(json_obj, str): |
| 1767 | + # de-code and re-encode | |
| 1704 | 1768 | dencoded = json_obj.decode(encoding, errors).encode(encoding, errors) |
| 1705 | - if dencoded != str: | |
| 1706 | - logging.info('json2ascii: replaced: {0} (len {1})' | |
| 1707 | - .format(json_obj, len(json_obj))) | |
| 1708 | - logging.info('json2ascii: with: {0} (len {1})' | |
| 1709 | - .format(dencoded, len(dencoded))) | |
| 1769 | + if dencoded != json_obj: | |
| 1770 | + log.info('json2ascii: replaced: {0} (len {1})' | |
| 1771 | + .format(json_obj, len(json_obj))) | |
| 1772 | + log.info('json2ascii: with: {0} (len {1})' | |
| 1773 | + .format(dencoded, len(dencoded))) | |
| 1710 | 1774 | return dencoded |
| 1711 | 1775 | elif isinstance(json_obj, unicode): |
| 1712 | - logging.info('json2ascii: replaced: {0}' | |
| 1713 | - .format(json_obj.encode(encoding, errors))) | |
| 1776 | + log.info('json2ascii: replaced: {0}' | |
| 1777 | + .format(json_obj.encode(encoding, errors))) | |
| 1714 | 1778 | # cannot put original into logger |
| 1715 | 1779 | # print 'original: ' json_obj |
| 1716 | 1780 | return json_obj.encode(encoding, errors) |
| ... | ... | @@ -1721,11 +1785,50 @@ def json2ascii(json_obj, encoding='utf8', errors='replace'): |
| 1721 | 1785 | for item in json_obj: |
| 1722 | 1786 | item = json2ascii(item) |
| 1723 | 1787 | else: |
| 1724 | - logging.debug('unexpected type in json2ascii: {0} -- leave as is' | |
| 1725 | - .format(type(json_obj))) | |
| 1788 | + log.debug('unexpected type in json2ascii: {0} -- leave as is' | |
| 1789 | + .format(type(json_obj))) | |
| 1726 | 1790 | return json_obj |
| 1727 | 1791 | |
| 1728 | 1792 | |
| 1793 | +_have_printed_json_start = False | |
| 1794 | + | |
| 1795 | +def print_json(json_dict=None, _json_is_last=False, **json_parts): | |
| 1796 | + """ line-wise print of json.dumps(json2ascii(..)) with options and indent+1 | |
| 1797 | + | |
| 1798 | + can use in two ways: | |
| 1799 | + (1) print_json(some_dict) | |
| 1800 | + (2) print_json(key1=value1, key2=value2, ...) | |
| 1801 | + | |
| 1802 | + :param bool _json_is_last: set to True only for very last entry to complete | |
| 1803 | + the top-level json-list | |
| 1804 | + """ | |
| 1805 | + global _have_printed_json_start | |
| 1806 | + | |
| 1807 | + if json_dict and json_parts: | |
| 1808 | + raise ValueError('Invalid json argument: want either single dict or ' | |
| 1809 | + 'key=value parts but got both)') | |
| 1810 | + elif (json_dict is not None) and (not isinstance(json_dict, dict)): | |
| 1811 | + raise ValueError('Invalid json argument: want either single dict or ' | |
| 1812 | + 'key=value parts but got {} instead of dict)' | |
| 1813 | + .format(type(json_dict))) | |
| 1814 | + if json_parts: | |
| 1815 | + json_dict = json_parts | |
| 1816 | + | |
| 1817 | + if not _have_printed_json_start: | |
| 1818 | + print '[' | |
| 1819 | + _have_printed_json_start = True | |
| 1820 | + | |
| 1821 | + lines = json.dumps(json2ascii(json_dict), check_circular=False, | |
| 1822 | + indent=4, ensure_ascii=False).splitlines() | |
| 1823 | + for line in lines[:-1]: | |
| 1824 | + print ' {}'.format(line) | |
| 1825 | + if _json_is_last: | |
| 1826 | + print ' {}'.format(lines[-1]) # print last line without comma | |
| 1827 | + print ']' | |
| 1828 | + else: | |
| 1829 | + print ' {},'.format(lines[-1]) # print last line with comma | |
| 1830 | + | |
| 1831 | + | |
| 1729 | 1832 | class VBA_Scanner(object): |
| 1730 | 1833 | """ |
| 1731 | 1834 | Class to scan the source code of a VBA module to find obfuscated strings, |
| ... | ... | @@ -1924,6 +2027,8 @@ class VBA_Parser(object): |
| 1924 | 2027 | |
| 1925 | 2028 | :param container: str, path and filename of container if the file is within |
| 1926 | 2029 | a zip archive, None otherwise. |
| 2030 | + | |
| 2031 | + raises a FileOpenError if all attemps to interpret the data header failed | |
| 1927 | 2032 | """ |
| 1928 | 2033 | #TODO: filename should only be a string, data should be used for the file-like object |
| 1929 | 2034 | #TODO: filename should be mandatory, optional data is a string or file-like object |
| ... | ... | @@ -2000,8 +2105,8 @@ class VBA_Parser(object): |
| 2000 | 2105 | if self.type is None: |
| 2001 | 2106 | # At this stage, could not match a known format: |
| 2002 | 2107 | msg = '%s is not a supported file type, cannot extract VBA Macros.' % self.filename |
| 2003 | - log.error(msg) | |
| 2004 | - raise TypeError(msg) | |
| 2108 | + log.info(msg) | |
| 2109 | + raise FileOpenError(msg) | |
| 2005 | 2110 | |
| 2006 | 2111 | def open_ole(self, _file): |
| 2007 | 2112 | """ |
| ... | ... | @@ -2016,13 +2121,10 @@ class VBA_Parser(object): |
| 2016 | 2121 | # TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet |
| 2017 | 2122 | # set type only if parsing succeeds |
| 2018 | 2123 | self.type = TYPE_OLE |
| 2019 | - except KeyboardInterrupt: | |
| 2020 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2021 | - raise | |
| 2022 | - except: | |
| 2124 | + except (IOError, TypeError, ValueError) as exc: | |
| 2023 | 2125 | # TODO: handle OLE parsing exceptions |
| 2024 | - log.exception('Failed OLE parsing for file %r' % self.filename) | |
| 2025 | - pass | |
| 2126 | + log.info('Failed OLE parsing for file %r (%s)' % (self.filename, exc)) | |
| 2127 | + log.debug('Trace:', exc_info=True) | |
| 2026 | 2128 | |
| 2027 | 2129 | |
| 2028 | 2130 | def open_openxml(self, _file): |
| ... | ... | @@ -2048,22 +2150,17 @@ class VBA_Parser(object): |
| 2048 | 2150 | ole_data = z.open(subfile).read() |
| 2049 | 2151 | try: |
| 2050 | 2152 | self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data)) |
| 2051 | - except KeyboardInterrupt: | |
| 2052 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2053 | - raise | |
| 2054 | - except: | |
| 2055 | - log.debug('%s is not a valid OLE file' % subfile) | |
| 2153 | + except FileOpenError as exc: | |
| 2154 | + log.info('%s is not a valid OLE file (%s)' % (subfile, exc)) | |
| 2056 | 2155 | continue |
| 2057 | 2156 | z.close() |
| 2058 | 2157 | # set type only if parsing succeeds |
| 2059 | 2158 | self.type = TYPE_OpenXML |
| 2060 | - except KeyboardInterrupt: | |
| 2061 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2062 | - raise | |
| 2063 | - except: | |
| 2159 | + except (RuntimeError, zipfile.BadZipfile, zipfile.LargeZipFile, IOError) as exc: | |
| 2064 | 2160 | # TODO: handle parsing exceptions |
| 2065 | - log.exception('Failed Zip/OpenXML parsing for file %r' % self.filename) | |
| 2066 | - pass | |
| 2161 | + log.info('Failed Zip/OpenXML parsing for file %r (%s)' | |
| 2162 | + % (self.filename, exc)) | |
| 2163 | + log.debug('Trace:', exc_info=True) | |
| 2067 | 2164 | |
| 2068 | 2165 | def open_word2003xml(self, data): |
| 2069 | 2166 | """ |
| ... | ... | @@ -2087,25 +2184,25 @@ class VBA_Parser(object): |
| 2087 | 2184 | if is_mso_file(mso_data): |
| 2088 | 2185 | # decompress the zlib data stored in the MSO file, which is the OLE container: |
| 2089 | 2186 | # TODO: handle different offsets => separate function |
| 2090 | - ole_data = mso_file_extract(mso_data) | |
| 2091 | 2187 | try: |
| 2188 | + ole_data = mso_file_extract(mso_data) | |
| 2092 | 2189 | self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) |
| 2093 | - except KeyboardInterrupt: | |
| 2094 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2095 | - raise | |
| 2096 | - except: | |
| 2097 | - log.error('%s does not contain a valid OLE file' % fname) | |
| 2190 | + except MsoExtractionError: | |
| 2191 | + log.info('Failed decompressing an MSO container in %r - %s' | |
| 2192 | + % (fname, MSG_OLEVBA_ISSUES)) | |
| 2193 | + log.debug('Trace:', exc_info=True) | |
| 2194 | + except FileOpenError as exc: | |
| 2195 | + log.debug('%s is not a valid OLE sub file (%s)' % (fname, exc)) | |
| 2098 | 2196 | else: |
| 2099 | - log.error('%s is not a valid MSO file' % fname) | |
| 2197 | + log.info('%s is not a valid MSO file' % fname) | |
| 2100 | 2198 | # set type only if parsing succeeds |
| 2101 | 2199 | self.type = TYPE_Word2003_XML |
| 2102 | - except KeyboardInterrupt: | |
| 2103 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2104 | - raise | |
| 2105 | - except: | |
| 2200 | + except Exception as exc: | |
| 2106 | 2201 | # TODO: differentiate exceptions for each parsing stage |
| 2107 | - log.exception('Failed XML parsing for file %r' % self.filename) | |
| 2108 | - pass | |
| 2202 | + # (but ET is different libs, no good exception description in API) | |
| 2203 | + # found: XMLSyntaxError | |
| 2204 | + log.info('Failed XML parsing for file %r (%s)' % (self.filename, exc)) | |
| 2205 | + log.debug('Trace:', exc_info=True) | |
| 2109 | 2206 | |
| 2110 | 2207 | def open_mht(self, data): |
| 2111 | 2208 | """ |
| ... | ... | @@ -2148,40 +2245,30 @@ class VBA_Parser(object): |
| 2148 | 2245 | log.debug('Found ActiveMime header, decompressing MSO container') |
| 2149 | 2246 | try: |
| 2150 | 2247 | ole_data = mso_file_extract(part_data) |
| 2151 | - try: | |
| 2152 | - # TODO: check if it is actually an OLE file | |
| 2153 | - # TODO: get the MSO filename from content_location? | |
| 2154 | - self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | |
| 2155 | - except KeyboardInterrupt: | |
| 2156 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2157 | - raise | |
| 2158 | - except: | |
| 2159 | - log.debug('%s does not contain a valid OLE file' % fname) | |
| 2160 | - except KeyboardInterrupt: | |
| 2161 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2162 | - raise | |
| 2163 | - except: | |
| 2164 | - log.exception('Failed decompressing an MSO container in %r - %s' | |
| 2248 | + | |
| 2249 | + # TODO: check if it is actually an OLE file | |
| 2250 | + # TODO: get the MSO filename from content_location? | |
| 2251 | + self.ole_subfiles.append(VBA_Parser(filename=fname, data=ole_data)) | |
| 2252 | + except MsoExtractionError: | |
| 2253 | + log.info('Failed decompressing an MSO container in %r - %s' | |
| 2165 | 2254 | % (fname, MSG_OLEVBA_ISSUES)) |
| 2255 | + log.debug('Trace:', exc_info=True) | |
| 2166 | 2256 | # TODO: bug here - need to split in smaller functions/classes? |
| 2257 | + except FileOpenError as exc: | |
| 2258 | + log.debug('%s does not contain a valid OLE file (%s)' | |
| 2259 | + % (fname, exc)) | |
| 2167 | 2260 | else: |
| 2261 | + log.debug('type(part_data) = %s' % type(part_data)) | |
| 2168 | 2262 | try: |
| 2169 | - log.debug('type(part_data) = %s' % type(part_data)) | |
| 2170 | 2263 | log.debug('part_data[0:20] = %r' % part_data[0:20]) |
| 2171 | - except KeyboardInterrupt: | |
| 2172 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2173 | - raise | |
| 2174 | - except: | |
| 2175 | - pass | |
| 2264 | + except TypeError as err: | |
| 2265 | + log.debug('part_data has no __getitem__') | |
| 2176 | 2266 | # set type only if parsing succeeds |
| 2177 | 2267 | self.type = TYPE_MHTML |
| 2178 | - except KeyboardInterrupt: | |
| 2179 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2180 | - raise | |
| 2181 | - except: | |
| 2182 | - log.exception('Failed MIME parsing for file %r - %s' | |
| 2183 | - % (self.filename, MSG_OLEVBA_ISSUES)) | |
| 2184 | - pass | |
| 2268 | + except Exception: | |
| 2269 | + log.info('Failed MIME parsing for file %r - %s' | |
| 2270 | + % (self.filename, MSG_OLEVBA_ISSUES)) | |
| 2271 | + log.debug('Trace:', exc_info=True) | |
| 2185 | 2272 | |
| 2186 | 2273 | |
| 2187 | 2274 | def open_text(self, data): |
| ... | ... | @@ -2191,19 +2278,11 @@ class VBA_Parser(object): |
| 2191 | 2278 | :return: nothing |
| 2192 | 2279 | """ |
| 2193 | 2280 | log.info('Opening text file %s' % self.filename) |
| 2194 | - try: | |
| 2195 | - # directly store the source code: | |
| 2196 | - self.vba_code_all_modules = data | |
| 2197 | - self.contains_macros = True | |
| 2198 | - # set type only if parsing succeeds | |
| 2199 | - self.type = TYPE_TEXT | |
| 2200 | - except KeyboardInterrupt: | |
| 2201 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2202 | - raise | |
| 2203 | - except: | |
| 2204 | - log.exception('Failed text parsing for file %r - %s' | |
| 2205 | - % (self.filename, MSG_OLEVBA_ISSUES)) | |
| 2206 | - pass | |
| 2281 | + # directly store the source code: | |
| 2282 | + self.vba_code_all_modules = data | |
| 2283 | + self.contains_macros = True | |
| 2284 | + # set type only if parsing succeeds | |
| 2285 | + self.type = TYPE_TEXT | |
| 2207 | 2286 | |
| 2208 | 2287 | |
| 2209 | 2288 | def find_vba_projects(self): |
| ... | ... | @@ -2247,6 +2326,15 @@ class VBA_Parser(object): |
| 2247 | 2326 | # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream |
| 2248 | 2327 | # - all names are case-insensitive |
| 2249 | 2328 | |
| 2329 | + def check_vba_stream(ole, vba_root, stream_path): | |
| 2330 | + full_path = vba_root + stream_path | |
| 2331 | + if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM: | |
| 2332 | + log.debug('Found %s stream: %s' % (stream_path, full_path)) | |
| 2333 | + return full_path | |
| 2334 | + else: | |
| 2335 | + log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path) | |
| 2336 | + return False | |
| 2337 | + | |
| 2250 | 2338 | # start with an empty list: |
| 2251 | 2339 | self.vba_projects = [] |
| 2252 | 2340 | # Look for any storage containing those storage/streams: |
| ... | ... | @@ -2263,15 +2351,6 @@ class VBA_Parser(object): |
| 2263 | 2351 | vba_root += '/' |
| 2264 | 2352 | log.debug('Checking vba_root="%s"' % vba_root) |
| 2265 | 2353 | |
| 2266 | - def check_vba_stream(ole, vba_root, stream_path): | |
| 2267 | - full_path = vba_root + stream_path | |
| 2268 | - if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM: | |
| 2269 | - log.debug('Found %s stream: %s' % (stream_path, full_path)) | |
| 2270 | - return full_path | |
| 2271 | - else: | |
| 2272 | - log.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path) | |
| 2273 | - return False | |
| 2274 | - | |
| 2275 | 2354 | # Check if the VBA root storage also contains a PROJECT stream: |
| 2276 | 2355 | project_path = check_vba_stream(ole, vba_root, 'PROJECT') |
| 2277 | 2356 | if not project_path: continue |
| ... | ... | @@ -2436,10 +2515,10 @@ class VBA_Parser(object): |
| 2436 | 2515 | # variable to merge source code from all modules: |
| 2437 | 2516 | if self.vba_code_all_modules is None: |
| 2438 | 2517 | self.vba_code_all_modules = '' |
| 2439 | - for (subfilename, stream_path, vba_filename, vba_code) in self.extract_all_macros(): | |
| 2518 | + for (_, _, _, vba_code) in self.extract_all_macros(): | |
| 2440 | 2519 | #TODO: filter code? (each module) |
| 2441 | 2520 | self.vba_code_all_modules += vba_code + '\n' |
| 2442 | - for (subfilename, form_path, form_string) in self.extract_form_strings(): | |
| 2521 | + for (_, _, form_string) in self.extract_form_strings(): | |
| 2443 | 2522 | self.vba_code_all_modules += form_string + '\n' |
| 2444 | 2523 | # Analyze the whole code at once: |
| 2445 | 2524 | scanner = VBA_Scanner(self.vba_code_all_modules) |
| ... | ... | @@ -2587,8 +2666,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2587 | 2666 | def __init__(self, filename, data=None, container=None): |
| 2588 | 2667 | """ |
| 2589 | 2668 | Constructor for VBA_Parser_CLI. |
| 2590 | - Calls __init__ from VBA_Parser, but handles the TypeError exception | |
| 2591 | - when the file type is not supported. | |
| 2669 | + Calls __init__ from VBA_Parser | |
| 2592 | 2670 | |
| 2593 | 2671 | :param filename: filename or path of file to parse, or file-like object |
| 2594 | 2672 | |
| ... | ... | @@ -2599,11 +2677,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2599 | 2677 | :param container: str, path and filename of container if the file is within |
| 2600 | 2678 | a zip archive, None otherwise. |
| 2601 | 2679 | """ |
| 2602 | - try: | |
| 2603 | - VBA_Parser.__init__(self, filename, data=data, container=container) | |
| 2604 | - except TypeError: | |
| 2605 | - # in that case, self.type=None | |
| 2606 | - pass | |
| 2680 | + super(VBA_Parser_CLI, self).__init__(filename, data=data, container=container) | |
| 2607 | 2681 | |
| 2608 | 2682 | |
| 2609 | 2683 | def print_analysis(self, show_decoded_strings=False, deobfuscate=False): |
| ... | ... | @@ -2653,7 +2727,7 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2653 | 2727 | for kw_type, keyword, description in self.analyze_macros(show_decoded_strings)] |
| 2654 | 2728 | |
| 2655 | 2729 | def process_file(self, show_decoded_strings=False, |
| 2656 | - display_code=True, global_analysis=True, hide_attributes=True, | |
| 2730 | + display_code=True, hide_attributes=True, | |
| 2657 | 2731 | vba_code_only=False, show_deobfuscated_code=False, |
| 2658 | 2732 | deobfuscate=False): |
| 2659 | 2733 | """ |
| ... | ... | @@ -2699,19 +2773,12 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2699 | 2773 | print '(empty macro)' |
| 2700 | 2774 | else: |
| 2701 | 2775 | print vba_code_filtered |
| 2702 | - if not global_analysis and not vba_code_only: | |
| 2703 | - #TODO: remove this option | |
| 2704 | - raise NotImplementedError | |
| 2705 | - print '- ' * 39 | |
| 2706 | - print 'ANALYSIS:' | |
| 2707 | - # analyse each module's code, filtered to avoid false positives: | |
| 2708 | - self.print_analysis(show_decoded_strings, deobfuscate) | |
| 2709 | 2776 | for (subfilename, stream_path, form_string) in self.extract_form_strings(): |
| 2710 | 2777 | print '-' * 79 |
| 2711 | 2778 | print 'VBA FORM STRING IN %r - OLE stream: %r' % (subfilename, stream_path) |
| 2712 | 2779 | print '- ' * 39 |
| 2713 | 2780 | print form_string |
| 2714 | - if global_analysis and not vba_code_only: | |
| 2781 | + if not vba_code_only: | |
| 2715 | 2782 | # analyse the code from all modules at once: |
| 2716 | 2783 | self.print_analysis(show_decoded_strings, deobfuscate) |
| 2717 | 2784 | if show_deobfuscated_code: |
| ... | ... | @@ -2719,20 +2786,16 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2719 | 2786 | print self.reveal() |
| 2720 | 2787 | else: |
| 2721 | 2788 | print 'No VBA macros found.' |
| 2722 | - except KeyboardInterrupt: | |
| 2723 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2724 | - raise | |
| 2725 | - except: #TypeError: | |
| 2726 | - #raise | |
| 2727 | - #TODO: print more info if debug mode | |
| 2728 | - #print sys.exc_value | |
| 2729 | - # display the exception with full stack trace for debugging, but do not stop: | |
| 2730 | - traceback.print_exc() | |
| 2789 | + except Exception as exc: | |
| 2790 | + # display the exception with full stack trace for debugging | |
| 2791 | + log.info('Error processing file %s (%s)' % (self.filename, exc)) | |
| 2792 | + log.debug('Traceback:', exc_info=True) | |
| 2793 | + raise ProcessingError(self.filename, exc) | |
| 2731 | 2794 | print '' |
| 2732 | 2795 | |
| 2733 | 2796 | |
| 2734 | 2797 | def process_file_json(self, show_decoded_strings=False, |
| 2735 | - display_code=True, global_analysis=True, hide_attributes=True, | |
| 2798 | + display_code=True, hide_attributes=True, | |
| 2736 | 2799 | vba_code_only=False, show_deobfuscated_code=False): |
| 2737 | 2800 | """ |
| 2738 | 2801 | Process a single file |
| ... | ... | @@ -2781,27 +2844,19 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2781 | 2844 | curr_macro['ole_stream'] = stream_path |
| 2782 | 2845 | if display_code: |
| 2783 | 2846 | curr_macro['code'] = vba_code_filtered.strip() |
| 2784 | - if not global_analysis and not vba_code_only: | |
| 2785 | - # analyse each module's code, filtered to avoid false positives: | |
| 2786 | - #TODO: remove this option | |
| 2787 | - curr_macro['analysis'] = self.print_analysis_json(show_decoded_strings) | |
| 2788 | 2847 | macros.append(curr_macro) |
| 2789 | - if global_analysis and not vba_code_only: | |
| 2848 | + if not vba_code_only: | |
| 2790 | 2849 | # analyse the code from all modules at once: |
| 2791 | 2850 | result['analysis'] = self.print_analysis_json(show_decoded_strings) |
| 2792 | 2851 | if show_deobfuscated_code: |
| 2793 | 2852 | result['code_deobfuscated'] = self.reveal() |
| 2794 | 2853 | result['macros'] = macros |
| 2795 | 2854 | result['json_conversion_successful'] = True |
| 2796 | - except KeyboardInterrupt: | |
| 2797 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2798 | - raise | |
| 2799 | - except: #TypeError: | |
| 2800 | - #raise | |
| 2801 | - #TODO: print more info if debug mode | |
| 2802 | - #print sys.exc_value | |
| 2803 | - # display the exception with full stack trace for debugging, but do not stop: | |
| 2804 | - traceback.print_exc() | |
| 2855 | + except Exception as exc: | |
| 2856 | + # display the exception with full stack trace for debugging | |
| 2857 | + log.info('Error processing file %s (%s)' % (self.filename, exc)) | |
| 2858 | + log.debug('Traceback:', exc_info=True) | |
| 2859 | + raise ProcessingError(self.filename, exc) | |
| 2805 | 2860 | |
| 2806 | 2861 | return result |
| 2807 | 2862 | |
| ... | ... | @@ -2811,57 +2866,46 @@ class VBA_Parser_CLI(VBA_Parser): |
| 2811 | 2866 | Process a file in triage mode, showing only summary results on one line. |
| 2812 | 2867 | """ |
| 2813 | 2868 | #TODO: replace print by writing to a provided output file (sys.stdout by default) |
| 2814 | - message = '' | |
| 2815 | 2869 | try: |
| 2816 | - if self.type is not None: | |
| 2817 | - #TODO: handle olefile errors, when an OLE file is malformed | |
| 2818 | - if self.detect_vba_macros(): | |
| 2819 | - # print a waiting message only if the output is not redirected to a file: | |
| 2820 | - if sys.stdout.isatty(): | |
| 2821 | - print 'Analysis...\r', | |
| 2822 | - sys.stdout.flush() | |
| 2823 | - self.analyze_macros(show_decoded_strings=show_decoded_strings, | |
| 2824 | - deobfuscate=deobfuscate) | |
| 2825 | - flags = TYPE2TAG[self.type] | |
| 2826 | - macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' | |
| 2827 | - if self.contains_macros: macros = 'M' | |
| 2828 | - if self.nb_autoexec: autoexec = 'A' | |
| 2829 | - if self.nb_suspicious: suspicious = 'S' | |
| 2830 | - if self.nb_iocs: iocs = 'I' | |
| 2831 | - if self.nb_hexstrings: hexstrings = 'H' | |
| 2832 | - if self.nb_base64strings: base64obf = 'B' | |
| 2833 | - if self.nb_dridexstrings: dridex = 'D' | |
| 2834 | - if self.nb_vbastrings: vba_obf = 'V' | |
| 2835 | - flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, | |
| 2836 | - base64obf, dridex, vba_obf) | |
| 2837 | - # old table display: | |
| 2838 | - # macros = autoexec = suspicious = iocs = hexstrings = 'no' | |
| 2839 | - # if nb_macros: macros = 'YES:%d' % nb_macros | |
| 2840 | - # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec | |
| 2841 | - # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious | |
| 2842 | - # if nb_iocs: iocs = 'YES:%d' % nb_iocs | |
| 2843 | - # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings | |
| 2844 | - # # 2nd line = info | |
| 2845 | - # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings) | |
| 2846 | - else: | |
| 2847 | - # self.type==None | |
| 2848 | - # file type not OLE nor OpenXML | |
| 2849 | - flags = '?' | |
| 2850 | - message = 'File format not supported' | |
| 2851 | - except KeyboardInterrupt: | |
| 2852 | - # do not ignore exceptions when the user presses Ctrl+C/Pause: | |
| 2853 | - raise | |
| 2854 | - except: | |
| 2855 | - # another error occurred | |
| 2856 | - #raise | |
| 2857 | - #TODO: print more info if debug mode | |
| 2858 | - #TODO: distinguish real errors from incorrect file types | |
| 2859 | - flags = '!ERROR' | |
| 2860 | - message = sys.exc_value | |
| 2861 | - line = '%-12s %s' % (flags, self.filename) | |
| 2862 | - if message: | |
| 2863 | - line += ' - %s' % message | |
| 2864 | - print line | |
| 2870 | + #TODO: handle olefile errors, when an OLE file is malformed | |
| 2871 | + if self.detect_vba_macros(): | |
| 2872 | + # print a waiting message only if the output is not redirected to a file: | |
| 2873 | + if sys.stdout.isatty(): | |
| 2874 | + print 'Analysis...\r', | |
| 2875 | + sys.stdout.flush() | |
| 2876 | + self.analyze_macros(show_decoded_strings=show_decoded_strings, | |
| 2877 | + deobfuscate=deobfuscate) | |
| 2878 | + flags = TYPE2TAG[self.type] | |
| 2879 | + macros = autoexec = suspicious = iocs = hexstrings = base64obf = dridex = vba_obf = '-' | |
| 2880 | + if self.contains_macros: macros = 'M' | |
| 2881 | + if self.nb_autoexec: autoexec = 'A' | |
| 2882 | + if self.nb_suspicious: suspicious = 'S' | |
| 2883 | + if self.nb_iocs: iocs = 'I' | |
| 2884 | + if self.nb_hexstrings: hexstrings = 'H' | |
| 2885 | + if self.nb_base64strings: base64obf = 'B' | |
| 2886 | + if self.nb_dridexstrings: dridex = 'D' | |
| 2887 | + if self.nb_vbastrings: vba_obf = 'V' | |
| 2888 | + flags += '%s%s%s%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings, | |
| 2889 | + base64obf, dridex, vba_obf) | |
| 2890 | + | |
| 2891 | + line = '%-12s %s' % (flags, self.filename) | |
| 2892 | + print line | |
| 2893 | + | |
| 2894 | + # old table display: | |
| 2895 | + # macros = autoexec = suspicious = iocs = hexstrings = 'no' | |
| 2896 | + # if nb_macros: macros = 'YES:%d' % nb_macros | |
| 2897 | + # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec | |
| 2898 | + # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious | |
| 2899 | + # if nb_iocs: iocs = 'YES:%d' % nb_iocs | |
| 2900 | + # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings | |
| 2901 | + # # 2nd line = info | |
| 2902 | + # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (self.type, macros, autoexec, suspicious, iocs, hexstrings) | |
| 2903 | + except Exception as exc: | |
| 2904 | + # display the exception with full stack trace for debugging only | |
| 2905 | + log.debug('Error processing file %s (%s)' % (self.filename, exc), | |
| 2906 | + exc_info=True) | |
| 2907 | + raise ProcessingError(self.filename, exc) | |
| 2908 | + | |
| 2865 | 2909 | |
| 2866 | 2910 | # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'), |
| 2867 | 2911 | # header=False, border=False) |
| ... | ... | @@ -2883,7 +2927,6 @@ def main(): |
| 2883 | 2927 | """ |
| 2884 | 2928 | Main function, called when olevba is run from the command line |
| 2885 | 2929 | """ |
| 2886 | - global log | |
| 2887 | 2930 | DEFAULT_LOG_LEVEL = "warning" # Default log level |
| 2888 | 2931 | LOG_LEVELS = { |
| 2889 | 2932 | 'debug': logging.DEBUG, |
| ... | ... | @@ -2939,13 +2982,14 @@ def main(): |
| 2939 | 2982 | if len(args) == 0: |
| 2940 | 2983 | print __doc__ |
| 2941 | 2984 | parser.print_help() |
| 2942 | - sys.exit() | |
| 2985 | + sys.exit(RETURN_WRONG_ARGS) | |
| 2943 | 2986 | |
| 2944 | 2987 | # provide info about tool and its version |
| 2945 | 2988 | if options.output_mode == 'json': |
| 2946 | - json_results = [dict(script_name='olevba', version=__version__, | |
| 2947 | - url='http://decalage.info/python/oletools', | |
| 2948 | - type='MetaInformation'), ] | |
| 2989 | + # prints opening [ | |
| 2990 | + print_json(script_name='olevba', version=__version__, | |
| 2991 | + url='http://decalage.info/python/oletools', | |
| 2992 | + type='MetaInformation') | |
| 2949 | 2993 | else: |
| 2950 | 2994 | print 'olevba %s - http://decalage.info/python/oletools' % __version__ |
| 2951 | 2995 | |
| ... | ... | @@ -2971,65 +3015,120 @@ def main(): |
| 2971 | 3015 | count = 0 |
| 2972 | 3016 | container = filename = data = None |
| 2973 | 3017 | vba_parser = None |
| 2974 | - for container, filename, data in xglob.iter_files(args, recursive=options.recursive, | |
| 2975 | - zip_password=options.zip_password, zip_fname=options.zip_fname): | |
| 2976 | - # ignore directory names stored in zip files: | |
| 2977 | - if container and filename.endswith('/'): | |
| 2978 | - continue | |
| 2979 | - # Open the file | |
| 2980 | - vba_parser = VBA_Parser_CLI(filename, data=data, container=container) | |
| 2981 | - if options.output_mode == 'detailed': | |
| 2982 | - # fully detailed output | |
| 2983 | - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, | |
| 2984 | - display_code=options.display_code, global_analysis=True, #options.global_analysis, | |
| 2985 | - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 2986 | - show_deobfuscated_code=options.show_deobfuscated_code, | |
| 2987 | - deobfuscate=options.deobfuscate) | |
| 2988 | - elif options.output_mode in ('triage', 'unspecified'): | |
| 2989 | - # print container name when it changes: | |
| 2990 | - if container != previous_container: | |
| 2991 | - if container is not None: | |
| 2992 | - print '\nFiles in %s:' % container | |
| 2993 | - previous_container = container | |
| 2994 | - # summarized output for triage: | |
| 2995 | - vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, | |
| 2996 | - deobfuscate=options.deobfuscate) | |
| 2997 | - elif options.output_mode == 'json': | |
| 2998 | - json_results.append( | |
| 2999 | - vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, | |
| 3000 | - display_code=options.display_code, global_analysis=True, #options.global_analysis, | |
| 3001 | - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3002 | - show_deobfuscated_code=options.show_deobfuscated_code)) | |
| 3003 | - else: # (should be impossible) | |
| 3004 | - raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) | |
| 3005 | - count += 1 | |
| 3006 | - if options.output_mode == 'triage': | |
| 3007 | - print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 3008 | - 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ | |
| 3009 | - 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' | |
| 3010 | - | |
| 3011 | - if count == 1 and options.output_mode == 'unspecified': | |
| 3012 | - # if options -t, -d and -j were not specified and it's a single file, print details: | |
| 3013 | - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, | |
| 3014 | - display_code=options.display_code, global_analysis=True, #options.global_analysis, | |
| 3015 | - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3016 | - show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3017 | - deobfuscate=options.deobfuscate) | |
| 3018 | - | |
| 3019 | - if options.output_mode == 'json': | |
| 3020 | - json_options = dict(check_circular=False, indent=4, ensure_ascii=False) | |
| 3021 | - | |
| 3022 | - # json.dump[s] cannot deal with unicode objects that are not properly | |
| 3023 | - # encoded --> encode in own function: | |
| 3024 | - json_results = json2ascii(json_results) | |
| 3025 | - #print_json(json_results) | |
| 3026 | - | |
| 3027 | - # if False: # options.outfile: # (option currently commented out) | |
| 3028 | - # with open(outfile, 'w') as write_handle: | |
| 3029 | - # json.dump(write_handle, **json_options) | |
| 3030 | - # else: | |
| 3031 | - print json.dumps(json_results, **json_options) | |
| 3018 | + return_code = RETURN_OK | |
| 3019 | + try: | |
| 3020 | + for container, filename, data in xglob.iter_files(args, recursive=options.recursive, | |
| 3021 | + zip_password=options.zip_password, zip_fname=options.zip_fname): | |
| 3022 | + # ignore directory names stored in zip files: | |
| 3023 | + if container and filename.endswith('/'): | |
| 3024 | + continue | |
| 3025 | + | |
| 3026 | + # handle errors from xglob | |
| 3027 | + if isinstance(data, Exception): | |
| 3028 | + if isinstance(data, PathNotFoundException): | |
| 3029 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3030 | + print '%-12s %s - File not found' % ('?', filename) | |
| 3031 | + elif options.output_mode != 'json': | |
| 3032 | + log.error('Given path %r does not exist!' % filename) | |
| 3033 | + return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ | |
| 3034 | + else RETURN_SEVERAL_ERRS | |
| 3035 | + else: | |
| 3036 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3037 | + print '%-12s %s - Failed to read from zip file %s' % ('?', filename, container) | |
| 3038 | + elif options.output_mode != 'json': | |
| 3039 | + log.error('Exception opening/reading %r from zip file %r: %s' | |
| 3040 | + % (filename, container, data)) | |
| 3041 | + return_code = RETURN_XGLOB_ERR if return_code == 0 \ | |
| 3042 | + else RETURN_SEVERAL_ERRS | |
| 3043 | + if options.output_mode == 'json': | |
| 3044 | + print_json(file=filename, type='error', | |
| 3045 | + error=type(data).__name__, message=str(data)) | |
| 3046 | + continue | |
| 3032 | 3047 | |
| 3048 | + try: | |
| 3049 | + # Open the file | |
| 3050 | + vba_parser = VBA_Parser_CLI(filename, data=data, container=container) | |
| 3051 | + | |
| 3052 | + if options.output_mode == 'detailed': | |
| 3053 | + # fully detailed output | |
| 3054 | + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, | |
| 3055 | + display_code=options.display_code, | |
| 3056 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3057 | + show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3058 | + deobfuscate=options.deobfuscate) | |
| 3059 | + elif options.output_mode in ('triage', 'unspecified'): | |
| 3060 | + # print container name when it changes: | |
| 3061 | + if container != previous_container: | |
| 3062 | + if container is not None: | |
| 3063 | + print '\nFiles in %s:' % container | |
| 3064 | + previous_container = container | |
| 3065 | + # summarized output for triage: | |
| 3066 | + vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings, | |
| 3067 | + deobfuscate=options.deobfuscate) | |
| 3068 | + elif options.output_mode == 'json': | |
| 3069 | + print_json( | |
| 3070 | + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings, | |
| 3071 | + display_code=options.display_code, | |
| 3072 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3073 | + show_deobfuscated_code=options.show_deobfuscated_code)) | |
| 3074 | + else: # (should be impossible) | |
| 3075 | + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode)) | |
| 3076 | + count += 1 | |
| 3077 | + | |
| 3078 | + except FileOpenError as exc: | |
| 3079 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3080 | + print '%-12s %s - File format not supported' % ('?', filename) | |
| 3081 | + elif options.output_mode == 'json': | |
| 3082 | + print_json(file=filename, type='error', | |
| 3083 | + error=type(exc).__name__, message=str(exc)) | |
| 3084 | + else: | |
| 3085 | + log.exception('Failed to open %s -- probably not supported!' % filename) | |
| 3086 | + return_code = RETURN_OPEN_ERROR if return_code == 0 \ | |
| 3087 | + else RETURN_SEVERAL_ERRS | |
| 3088 | + except ProcessingError as exc: | |
| 3089 | + if options.output_mode in ('triage', 'unspecified'): | |
| 3090 | + print '%-12s %s - %s' % ('!ERROR', filename, exc.orig_exception) | |
| 3091 | + elif options.output_mode == 'json': | |
| 3092 | + print_json(file=filename, type='error', | |
| 3093 | + error=type(exc).__name__, | |
| 3094 | + message=str(exc.orig_exception)) | |
| 3095 | + else: | |
| 3096 | + log.exception('Error processing file %s (%s)!' | |
| 3097 | + % (filename, exc.orig_exception)) | |
| 3098 | + return_code = RETURN_PARSE_ERROR if return_code == 0 \ | |
| 3099 | + else RETURN_SEVERAL_ERRS | |
| 3100 | + finally: | |
| 3101 | + if vba_parser is not None: | |
| 3102 | + vba_parser.close() | |
| 3103 | + | |
| 3104 | + if options.output_mode == 'triage': | |
| 3105 | + print '\n(Flags: OpX=OpenXML, XML=Word2003XML, MHT=MHTML, TXT=Text, M=Macros, ' \ | |
| 3106 | + 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ | |
| 3107 | + 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n' | |
| 3108 | + | |
| 3109 | + if count == 1 and options.output_mode == 'unspecified': | |
| 3110 | + # if options -t, -d and -j were not specified and it's a single file, print details: | |
| 3111 | + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings, | |
| 3112 | + display_code=options.display_code, | |
| 3113 | + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only, | |
| 3114 | + show_deobfuscated_code=options.show_deobfuscated_code, | |
| 3115 | + deobfuscate=options.deobfuscate) | |
| 3116 | + | |
| 3117 | + if options.output_mode == 'json': | |
| 3118 | + # print last json entry (a last one without a comma) and closing ] | |
| 3119 | + print_json(type='MetaInformation', return_code=return_code, | |
| 3120 | + n_processed=count, _json_is_last=True) | |
| 3121 | + | |
| 3122 | + except Exception as exc: | |
| 3123 | + # some unexpected error, maybe some of the types caught in except clauses | |
| 3124 | + # above were not sufficient. This is very bad, so log complete trace at exception level | |
| 3125 | + # and do not care about output mode | |
| 3126 | + log.exception('Unhandled exception in main: %s' % exc, exc_info=True) | |
| 3127 | + return_code = RETURN_UNEXPECTED # even if there were others before -- this is more important | |
| 3128 | + | |
| 3129 | + # done. exit | |
| 3130 | + log.debug('will exit now with code %s' % return_code) | |
| 3131 | + sys.exit(return_code) | |
| 3033 | 3132 | |
| 3034 | 3133 | if __name__ == '__main__': |
| 3035 | 3134 | main() | ... | ... |
oletools/thirdparty/xglob/xglob.py
| ... | ... | @@ -60,6 +60,15 @@ __version__ = '0.05' |
| 60 | 60 | |
| 61 | 61 | import os, fnmatch, glob, zipfile |
| 62 | 62 | |
| 63 | +#=== EXCEPTIONS ============================================================== | |
| 64 | + | |
| 65 | +class PathNotFoundException(Exception): | |
| 66 | + """ raised if given a fixed file/dir (not a glob) that does not exist """ | |
| 67 | + def __init__(self, path): | |
| 68 | + super(PathNotFoundException, self).__init__( | |
| 69 | + 'Given path does not exist: %r' % path) | |
| 70 | + | |
| 71 | + | |
| 63 | 72 | #=== FUNCTIONS =============================================================== |
| 64 | 73 | |
| 65 | 74 | # recursive glob function to find files in any subfolder: |
| ... | ... | @@ -118,8 +127,11 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname='*'): |
| 118 | 127 | - then files matching zip_fname are opened from the zip archive |
| 119 | 128 | |
| 120 | 129 | Iterator: yields (container, filename, data) for each file. If zip_password is None, then |
| 121 | - only the filename is returned, container and data=None. Otherwise container si the | |
| 122 | - filename of the container (zip file), and data is the file content. | |
| 130 | + only the filename is returned, container and data=None. Otherwise container is the | |
| 131 | + filename of the container (zip file), and data is the file content (or an exception). | |
| 132 | + If a given filename is not a glob and does not exist, the triplet | |
| 133 | + (None, filename, PathNotFoundException) is yielded. (Globs matching nothing | |
| 134 | + do not trigger exceptions) | |
| 123 | 135 | """ |
| 124 | 136 | #TODO: catch exceptions and yield them for the caller (no file found, file is not zip, wrong password, etc) |
| 125 | 137 | #TODO: use logging instead of printing |
| ... | ... | @@ -131,6 +143,9 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname='*'): |
| 131 | 143 | else: |
| 132 | 144 | iglob = glob.iglob |
| 133 | 145 | for filespec in files: |
| 146 | + if not is_glob(filespec) and not os.path.exists(filespec): | |
| 147 | + yield None, filespec, PathNotFoundException(filespec) | |
| 148 | + continue | |
| 134 | 149 | for filename in iglob(filespec): |
| 135 | 150 | if zip_password is not None: |
| 136 | 151 | # Each file is expected to be a zip archive: |
| ... | ... | @@ -153,3 +168,39 @@ def iter_files(files, recursive=False, zip_password=None, zip_fname='*'): |
| 153 | 168 | #data = open(filename, 'rb').read() |
| 154 | 169 | #yield None, filename, data |
| 155 | 170 | |
| 171 | + | |
| 172 | +def is_glob(filespec): | |
| 173 | + """ determine if given file specification is a single file name or a glob | |
| 174 | + | |
| 175 | + python's glob and fnmatch can only interpret ?, *, [list], and [ra-nge], | |
| 176 | + (and combinations: hex_*_[A-Fabcdef0-9]). | |
| 177 | + The special chars *?[-] can only be escaped using [] | |
| 178 | + --> file_name is not a glob | |
| 179 | + --> file?name is a glob | |
| 180 | + --> file* is a glob | |
| 181 | + --> file[-._]name is a glob | |
| 182 | + --> file[?]name is not a glob (matches literal "file?name") | |
| 183 | + --> file[*]name is not a glob (matches literal "file*name") | |
| 184 | + --> file[-]name is not a glob (matches literal "file-name") | |
| 185 | + --> file-name is not a glob | |
| 186 | + | |
| 187 | + Also, obviously incorrect globs are treated as non-globs | |
| 188 | + --> file[name is not a glob (matches literal "file[name") | |
| 189 | + --> file]-[name is treated as a glob | |
| 190 | + (it is not a valid glob but detecting errors like this requires | |
| 191 | + sophisticated regular expression matching) | |
| 192 | + | |
| 193 | + Python's glob also works with globs in directory-part of path | |
| 194 | + --> dir-part of path is analyzed just like filename-part | |
| 195 | + --> thirdparty/*/xglob.py is a (valid) glob | |
| 196 | + | |
| 197 | + TODO: create a correct regexp to test for validity of ranges | |
| 198 | + """ | |
| 199 | + | |
| 200 | + # remove escaped special chars | |
| 201 | + cleaned = filespec.replace('[*]', '').replace('[?]', '') \ | |
| 202 | + .replace('[[]', '').replace('[]]', '').replace('[-]', '') | |
| 203 | + | |
| 204 | + # check if special chars remain | |
| 205 | + return '*' in cleaned or '?' in cleaned or \ | |
| 206 | + ('[' in cleaned and ']' in cleaned) | ... | ... |