Commit 1d05bbab4d5a5f3bf51e98c2bd085ea94d9fe660
1 parent
be1d4830
olevba: fixed issue #3, case-insensitive search in code_modules
Showing
1 changed file
with
11 additions
and
4 deletions
oletools/olevba.py
| ... | ... | @@ -105,8 +105,9 @@ https://github.com/unixfreak0037/officeparser |
| 105 | 105 | # 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions |
| 106 | 106 | # - added scan_vba to run all detection algorithms |
| 107 | 107 | # - decoded hex strings are now also scanned + reversed |
| 108 | +# 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules | |
| 108 | 109 | |
| 109 | -__version__ = '0.17' | |
| 110 | +__version__ = '0.18' | |
| 110 | 111 | |
| 111 | 112 | #------------------------------------------------------------------------------ |
| 112 | 113 | # TODO: |
| ... | ... | @@ -246,6 +247,7 @@ SUSPICIOUS_KEYWORDS = { |
| 246 | 247 | # Patterns to be extracted (IP addresses, URLs, etc) |
| 247 | 248 | # From patterns.py in balbuzard |
| 248 | 249 | RE_PATTERNS = ( |
| 250 | + #TODO: check if this regex matches URLs with an IP address (various forms) | |
| 249 | 251 | ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), |
| 250 | 252 | ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), |
| 251 | 253 | ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), |
| ... | ... | @@ -440,6 +442,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): |
| 440 | 442 | # looking for code modules |
| 441 | 443 | # add the code module as a key in the dictionary |
| 442 | 444 | # the value will be the extension needed later |
| 445 | + # The value is converted to lowercase, to allow case-insensitive matching (issue #3) | |
| 446 | + value = value.lower() | |
| 443 | 447 | if name == 'Document': |
| 444 | 448 | # split value at the 1st slash, keep 1st part: |
| 445 | 449 | value = value.split('/', 1)[0] |
| ... | ... | @@ -767,7 +771,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path): |
| 767 | 771 | code_data = code_data[MODULEOFFSET_TextOffset:] |
| 768 | 772 | if len(code_data) > 0: |
| 769 | 773 | code_data = decompress_stream(code_data) |
| 770 | - filext = code_modules.get(MODULENAME_ModuleName, 'bin') | |
| 774 | + # case-insensitive search in the code_modules dict to find the file extension: | |
| 775 | + filext = code_modules.get(MODULENAME_ModuleName.lower(), 'bin') | |
| 771 | 776 | filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext) |
| 772 | 777 | yield (code_path, filename, code_data) |
| 773 | 778 | # print '-'*79 |
| ... | ... | @@ -903,6 +908,7 @@ def scan_vba(vba_code): |
| 903 | 908 | #TODO: also add reverse strings (before and after decoding), for StrReverse obfuscation |
| 904 | 909 | #TODO: only do it if StrReverse found in code? |
| 905 | 910 | vba_code += '\n'+decoded[::-1] |
| 911 | + vba_code += '\n'+binascii.unhexlify(encoded[::-1]) | |
| 906 | 912 | autoexec_keywords = detect_autoexec(vba_code) |
| 907 | 913 | suspicious_keywords = detect_suspicious(vba_code) |
| 908 | 914 | # If hex-encoded strings were discovered, add an item to suspicious keywords: |
| ... | ... | @@ -917,8 +923,8 @@ def scan_vba(vba_code): |
| 917 | 923 | for pattern_type, value in patterns: |
| 918 | 924 | results.append(('IOC', value, pattern_type)) |
| 919 | 925 | # Only if option --hex: |
| 920 | - # for encoded, decoded in hex_strings: | |
| 921 | - # results.append(('Hex String', repr(decoded), encoded)) | |
| 926 | + for encoded, decoded in hex_strings: | |
| 927 | + results.append(('Hex String', repr(decoded), encoded)) | |
| 922 | 928 | return results |
| 923 | 929 | |
| 924 | 930 | |
| ... | ... | @@ -1185,6 +1191,7 @@ def process_file (container, filename, data): |
| 1185 | 1191 | #print 'Contains VBA Macros:' |
| 1186 | 1192 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): |
| 1187 | 1193 | # hide attribute lines: |
| 1194 | + #TODO: option to disable attribute filtering | |
| 1188 | 1195 | vba_code = filter_vba(vba_code) |
| 1189 | 1196 | print '-'*79 |
| 1190 | 1197 | print 'VBA MACRO %s ' % vba_filename | ... | ... |