Commit 99e9238ea6094146bc72e0596d6fe61b88bd3c22
1 parent
1281a509
olevba: added pattern extraction
Showing
1 changed file
with
52 additions
and
2 deletions
oletools/olevba.py
| @@ -86,8 +86,9 @@ Usage: olevba.py <file> | @@ -86,8 +86,9 @@ Usage: olevba.py <file> | ||
| 86 | # - ignore empty macros | 86 | # - ignore empty macros |
| 87 | # 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive | 87 | # 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive |
| 88 | # 2014-12-15 v0.08 PL: - improved display for empty macros | 88 | # 2014-12-15 v0.08 PL: - improved display for empty macros |
| 89 | +# - added pattern extraction | ||
| 89 | 90 | ||
| 90 | -__version__ = '0.07' | 91 | +__version__ = '0.08' |
| 91 | 92 | ||
| 92 | #------------------------------------------------------------------------------ | 93 | #------------------------------------------------------------------------------ |
| 93 | # TODO: | 94 | # TODO: |
| @@ -96,6 +97,13 @@ __version__ = '0.07' | @@ -96,6 +97,13 @@ __version__ = '0.07' | ||
| 96 | # + nicer output | 97 | # + nicer output |
| 97 | # + setup logging (common with other oletools) | 98 | # + setup logging (common with other oletools) |
| 98 | # + update readme, wiki and decalage.info, pypi (link to sample files) | 99 | # + update readme, wiki and decalage.info, pypi (link to sample files) |
| 100 | +# + performance improvement: instead of searching each keyword separately, | ||
| 101 | +# first split vba code into a list of words (per line), then check each | ||
| 102 | +# word against a dict. (or put vba words into a set/dict?) | ||
| 103 | +# + for regex, maybe combine them into a single re with named groups? | ||
| 104 | +# + add Yara support, include sample rules? plugins like balbuzard? | ||
| 105 | +# + add balbuzard support | ||
| 106 | +# + move main into functions | ||
| 99 | 107 | ||
| 100 | # TODO later: | 108 | # TODO later: |
| 101 | # + output to file | 109 | # + output to file |
| @@ -103,7 +111,7 @@ __version__ = '0.07' | @@ -103,7 +111,7 @@ __version__ = '0.07' | ||
| 103 | # + look for VBA in embedded documents (e.g. Excel in Word) | 111 | # + look for VBA in embedded documents (e.g. Excel in Word) |
| 104 | # + support SRP streams (see Lenny's article + links and sample) | 112 | # + support SRP streams (see Lenny's article + links and sample) |
| 105 | # - python 3.x support | 113 | # - python 3.x support |
| 106 | -# - add support for PowerPoint macros (see libclamav, libgsf) | 114 | +# - add support for PowerPoint macros (see libclamav, libgsf), use oledump heuristic? |
| 107 | # - check VBA macros in Visio, Access, Project, etc | 115 | # - check VBA macros in Visio, Access, Project, etc |
| 108 | # - extract_macros: convert to a class, split long function into smaller methods | 116 | # - extract_macros: convert to a class, split long function into smaller methods |
| 109 | # - extract_macros: read bytes from stream file objects instead of strings | 117 | # - extract_macros: read bytes from stream file objects instead of strings |
| @@ -123,6 +131,7 @@ import struct | @@ -123,6 +131,7 @@ import struct | ||
| 123 | import cStringIO | 131 | import cStringIO |
| 124 | import math | 132 | import math |
| 125 | import zipfile | 133 | import zipfile |
| 134 | +import re | ||
| 126 | 135 | ||
| 127 | import thirdparty.olefile as olefile | 136 | import thirdparty.olefile as olefile |
| 128 | 137 | ||
| @@ -153,6 +162,16 @@ AUTOEXEC_KEYWORDS = { | @@ -153,6 +162,16 @@ AUTOEXEC_KEYWORDS = { | ||
| 153 | #TODO: full list in MS specs?? | 162 | #TODO: full list in MS specs?? |
| 154 | } | 163 | } |
| 155 | 164 | ||
| 165 | +# Patterns to be extracted (IP addresses, URLs, etc) | ||
| 166 | +# From patterns.py in balbuzard | ||
| 167 | +RE_PATTERNS = ( | ||
| 168 | + ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), | ||
| 169 | + ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), | ||
| 170 | + ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), | ||
| 171 | + ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')), | ||
| 172 | + ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|VBS|JS|VBE|JSE|BAT|CMD|DLL|SCR|CLASS|JAR)\b")), | ||
| 173 | + ) | ||
| 174 | + | ||
| 156 | 175 | ||
| 157 | #--- FUNCTIONS ---------------------------------------------------------------- | 176 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 158 | 177 | ||
| @@ -696,6 +715,7 @@ def detect_autoexec(vba_code): | @@ -696,6 +715,7 @@ def detect_autoexec(vba_code): | ||
| 696 | :param vba_code: str, VBA source code | 715 | :param vba_code: str, VBA source code |
| 697 | :return: list of str tuples (keyword, description) | 716 | :return: list of str tuples (keyword, description) |
| 698 | """ | 717 | """ |
| 718 | + #TODO: use regex to find keywords with word boundaries | ||
| 699 | # case-insensitive search | 719 | # case-insensitive search |
| 700 | vba_code = vba_code.lower() | 720 | vba_code = vba_code.lower() |
| 701 | results = [] | 721 | results = [] |
| @@ -706,6 +726,22 @@ def detect_autoexec(vba_code): | @@ -706,6 +726,22 @@ def detect_autoexec(vba_code): | ||
| 706 | return results | 726 | return results |
| 707 | 727 | ||
| 708 | 728 | ||
| 729 | +def detect_patterns(vba_code): | ||
| 730 | + """ | ||
| 731 | + Detect if the VBA code contains specific patterns such as IP addresses, | ||
| 732 | + URLs, e-mail addresses, executable file names, etc. | ||
| 733 | + | ||
| 734 | + :param vba_code: str, VBA source code | ||
| 735 | + :return: list of str tuples (pattern type, value) | ||
| 736 | + """ | ||
| 737 | + results = [] | ||
| 738 | + for pattern_type, pattern_re in RE_PATTERNS: | ||
| 739 | + match = pattern_re.search(vba_code) | ||
| 740 | + if match is not None: | ||
| 741 | + results.append((pattern_type, match.group())) | ||
| 742 | + return results | ||
| 743 | + | ||
| 744 | + | ||
| 709 | #=== CLASSES ================================================================= | 745 | #=== CLASSES ================================================================= |
| 710 | 746 | ||
| 711 | class VBA_Parser(object): | 747 | class VBA_Parser(object): |
| @@ -964,6 +1000,20 @@ if __name__ == '__main__': | @@ -964,6 +1000,20 @@ if __name__ == '__main__': | ||
| 964 | else: | 1000 | else: |
| 965 | print 'Auto-executable macro keywords: None found' | 1001 | print 'Auto-executable macro keywords: None found' |
| 966 | 1002 | ||
| 1003 | + print '- '*39 | ||
| 1004 | + patterns = detect_patterns(vba_code) | ||
| 1005 | + if patterns: | ||
| 1006 | + print 'Patterns found:' | ||
| 1007 | + t = prettytable.PrettyTable(('Value', 'Pattern type')) | ||
| 1008 | + t.align = 'l' | ||
| 1009 | + t.max_width['Value'] = 40 | ||
| 1010 | + t.max_width['Pattern type'] = 39 | ||
| 1011 | + for pattern_type, value in patterns: | ||
| 1012 | + t.add_row((value, pattern_type)) | ||
| 1013 | + print t | ||
| 1014 | + else: | ||
| 1015 | + print 'Patterns: None found' | ||
| 1016 | + | ||
| 967 | else: | 1017 | else: |
| 968 | print 'No VBA macros found.' | 1018 | print 'No VBA macros found.' |
| 969 | except TypeError: | 1019 | except TypeError: |