Commit 1d05bbab4d5a5f3bf51e98c2bd085ea94d9fe660

Authored by Philippe Lagadec
1 parent be1d4830

olevba: fixed issue #3, case-insensitive search in code_modules

Showing 1 changed file with 11 additions and 4 deletions
oletools/olevba.py
... ... @@ -105,8 +105,9 @@ https://github.com/unixfreak0037/officeparser
105 105 # 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions
106 106 # - added scan_vba to run all detection algorithms
107 107 # - decoded hex strings are now also scanned + reversed
  108 +# 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules
108 109  
109   -__version__ = '0.17'
  110 +__version__ = '0.18'
110 111  
111 112 #------------------------------------------------------------------------------
112 113 # TODO:
... ... @@ -246,6 +247,7 @@ SUSPICIOUS_KEYWORDS = {
246 247 # Patterns to be extracted (IP addresses, URLs, etc)
247 248 # From patterns.py in balbuzard
248 249 RE_PATTERNS = (
  250 + #TODO: check if this regex matches URLs with an IP address (various forms)
249 251 ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')),
250 252 ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")),
251 253 ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')),
... ... @@ -440,6 +442,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path):
440 442 # looking for code modules
441 443 # add the code module as a key in the dictionary
442 444 # the value will be the extension needed later
  445 + # The value is converted to lowercase, to allow case-insensitive matching (issue #3)
  446 + value = value.lower()
443 447 if name == 'Document':
444 448 # split value at the 1st slash, keep 1st part:
445 449 value = value.split('/', 1)[0]
... ... @@ -767,7 +771,8 @@ def _extract_vba (ole, vba_root, project_path, dir_path):
767 771 code_data = code_data[MODULEOFFSET_TextOffset:]
768 772 if len(code_data) > 0:
769 773 code_data = decompress_stream(code_data)
770   - filext = code_modules.get(MODULENAME_ModuleName, 'bin')
  774 + # case-insensitive search in the code_modules dict to find the file extension:
  775 + filext = code_modules.get(MODULENAME_ModuleName.lower(), 'bin')
771 776 filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext)
772 777 yield (code_path, filename, code_data)
773 778 # print '-'*79
... ... @@ -903,6 +908,7 @@ def scan_vba(vba_code):
903 908 #TODO: also add reverse strings (before and after decoding), for StrReverse obfuscation
904 909 #TODO: only do it if StrReverse found in code?
905 910 vba_code += '\n'+decoded[::-1]
  911 + vba_code += '\n'+binascii.unhexlify(encoded[::-1])
906 912 autoexec_keywords = detect_autoexec(vba_code)
907 913 suspicious_keywords = detect_suspicious(vba_code)
908 914 # If hex-encoded strings were discovered, add an item to suspicious keywords:
... ... @@ -917,8 +923,8 @@ def scan_vba(vba_code):
917 923 for pattern_type, value in patterns:
918 924 results.append(('IOC', value, pattern_type))
919 925 # Only if option --hex:
920   - # for encoded, decoded in hex_strings:
921   - # results.append(('Hex String', repr(decoded), encoded))
  926 + for encoded, decoded in hex_strings:
  927 + results.append(('Hex String', repr(decoded), encoded))
922 928 return results
923 929  
924 930  
... ... @@ -1185,6 +1191,7 @@ def process_file (container, filename, data):
1185 1191 #print 'Contains VBA Macros:'
1186 1192 for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
1187 1193 # hide attribute lines:
  1194 + #TODO: option to disable attribute filtering
1188 1195 vba_code = filter_vba(vba_code)
1189 1196 print '-'*79
1190 1197 print 'VBA MACRO %s ' % vba_filename
... ...