Commit 1d05bbab4d5a5f3bf51e98c2bd085ea94d9fe660
1 parent
be1d4830
olevba: fixed issue #3, case-insensitive search in code_modules
Showing
1 changed file
with
11 additions
and
4 deletions
oletools/olevba.py
| @@ -105,8 +105,9 @@ https://github.com/unixfreak0037/officeparser | @@ -105,8 +105,9 @@ https://github.com/unixfreak0037/officeparser | ||
| 105 | # 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions | 105 | # 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions |
| 106 | # - added scan_vba to run all detection algorithms | 106 | # - added scan_vba to run all detection algorithms |
| 107 | # - decoded hex strings are now also scanned + reversed | 107 | # - decoded hex strings are now also scanned + reversed |
| 108 | +# 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules | ||
| 108 | 109 | ||
| 109 | -__version__ = '0.17' | 110 | +__version__ = '0.18' |
| 110 | 111 | ||
| 111 | #------------------------------------------------------------------------------ | 112 | #------------------------------------------------------------------------------ |
| 112 | # TODO: | 113 | # TODO: |
| @@ -246,6 +247,7 @@ SUSPICIOUS_KEYWORDS = { | @@ -246,6 +247,7 @@ SUSPICIOUS_KEYWORDS = { | ||
| 246 | # Patterns to be extracted (IP addresses, URLs, etc) | 247 | # Patterns to be extracted (IP addresses, URLs, etc) |
| 247 | # From patterns.py in balbuzard | 248 | # From patterns.py in balbuzard |
| 248 | RE_PATTERNS = ( | 249 | RE_PATTERNS = ( |
| 250 | + #TODO: check if this regex matches URLs with an IP address (various forms) | ||
| 249 | ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), | 251 | ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), |
| 250 | ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), | 252 | ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), |
| 251 | ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), | 253 | ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), |
| @@ -440,6 +442,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | @@ -440,6 +442,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | ||
| 440 | # looking for code modules | 442 | # looking for code modules |
| 441 | # add the code module as a key in the dictionary | 443 | # add the code module as a key in the dictionary |
| 442 | # the value will be the extension needed later | 444 | # the value will be the extension needed later |
| 445 | + # The value is converted to lowercase, to allow case-insensitive matching (issue #3) | ||
| 446 | + value = value.lower() | ||
| 443 | if name == 'Document': | 447 | if name == 'Document': |
| 444 | # split value at the 1st slash, keep 1st part: | 448 | # split value at the 1st slash, keep 1st part: |
| 445 | value = value.split('/', 1)[0] | 449 | value = value.split('/', 1)[0] |
| @@ -767,7 +771,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | @@ -767,7 +771,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): | ||
| 767 | code_data = code_data[MODULEOFFSET_TextOffset:] | 771 | code_data = code_data[MODULEOFFSET_TextOffset:] |
| 768 | if len(code_data) > 0: | 772 | if len(code_data) > 0: |
| 769 | code_data = decompress_stream(code_data) | 773 | code_data = decompress_stream(code_data) |
| 770 | - filext = code_modules.get(MODULENAME_ModuleName, 'bin') | 774 | + # case-insensitive search in the code_modules dict to find the file extension: |
| 775 | + filext = code_modules.get(MODULENAME_ModuleName.lower(), 'bin') | ||
| 771 | filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext) | 776 | filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext) |
| 772 | yield (code_path, filename, code_data) | 777 | yield (code_path, filename, code_data) |
| 773 | # print '-'*79 | 778 | # print '-'*79 |
| @@ -903,6 +908,7 @@ def scan_vba(vba_code): | @@ -903,6 +908,7 @@ def scan_vba(vba_code): | ||
| 903 | #TODO: also add reverse strings (before and after decoding), for StrReverse obfuscation | 908 | #TODO: also add reverse strings (before and after decoding), for StrReverse obfuscation |
| 904 | #TODO: only do it if StrReverse found in code? | 909 | #TODO: only do it if StrReverse found in code? |
| 905 | vba_code += '\n'+decoded[::-1] | 910 | vba_code += '\n'+decoded[::-1] |
| 911 | + vba_code += '\n'+binascii.unhexlify(encoded[::-1]) | ||
| 906 | autoexec_keywords = detect_autoexec(vba_code) | 912 | autoexec_keywords = detect_autoexec(vba_code) |
| 907 | suspicious_keywords = detect_suspicious(vba_code) | 913 | suspicious_keywords = detect_suspicious(vba_code) |
| 908 | # If hex-encoded strings were discovered, add an item to suspicious keywords: | 914 | # If hex-encoded strings were discovered, add an item to suspicious keywords: |
| @@ -917,8 +923,8 @@ def scan_vba(vba_code): | @@ -917,8 +923,8 @@ def scan_vba(vba_code): | ||
| 917 | for pattern_type, value in patterns: | 923 | for pattern_type, value in patterns: |
| 918 | results.append(('IOC', value, pattern_type)) | 924 | results.append(('IOC', value, pattern_type)) |
| 919 | # Only if option --hex: | 925 | # Only if option --hex: |
| 920 | - # for encoded, decoded in hex_strings: | ||
| 921 | - # results.append(('Hex String', repr(decoded), encoded)) | 926 | + for encoded, decoded in hex_strings: |
| 927 | + results.append(('Hex String', repr(decoded), encoded)) | ||
| 922 | return results | 928 | return results |
| 923 | 929 | ||
| 924 | 930 | ||
| @@ -1185,6 +1191,7 @@ def process_file (container, filename, data): | @@ -1185,6 +1191,7 @@ def process_file (container, filename, data): | ||
| 1185 | #print 'Contains VBA Macros:' | 1191 | #print 'Contains VBA Macros:' |
| 1186 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): | 1192 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): |
| 1187 | # hide attribute lines: | 1193 | # hide attribute lines: |
| 1194 | + #TODO: option to disable attribute filtering | ||
| 1188 | vba_code = filter_vba(vba_code) | 1195 | vba_code = filter_vba(vba_code) |
| 1189 | print '-'*79 | 1196 | print '-'*79 |
| 1190 | print 'VBA MACRO %s ' % vba_filename | 1197 | print 'VBA MACRO %s ' % vba_filename |