Commit 99e9238ea6094146bc72e0596d6fe61b88bd3c22
1 parent
1281a509
olevba: added pattern extraction
Showing
1 changed file
with
52 additions
and
2 deletions
oletools/olevba.py
| ... | ... | @@ -86,8 +86,9 @@ Usage: olevba.py <file> |
| 86 | 86 | # - ignore empty macros |
| 87 | 87 | # 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive |
| 88 | 88 | # 2014-12-15 v0.08 PL: - improved display for empty macros |
| 89 | +# - added pattern extraction | |
| 89 | 90 | |
| 90 | -__version__ = '0.07' | |
| 91 | +__version__ = '0.08' | |
| 91 | 92 | |
| 92 | 93 | #------------------------------------------------------------------------------ |
| 93 | 94 | # TODO: |
| ... | ... | @@ -96,6 +97,13 @@ __version__ = '0.07' |
| 96 | 97 | # + nicer output |
| 97 | 98 | # + setup logging (common with other oletools) |
| 98 | 99 | # + update readme, wiki and decalage.info, pypi (link to sample files) |
| 100 | +# + performance improvement: instead of searching each keyword separately, | |
| 101 | +# first split vba code into a list of words (per line), then check each | |
| 102 | +# word against a dict. (or put vba words into a set/dict?) | |
| 103 | +# + for regex, maybe combine them into a single re with named groups? | |
| 104 | +# + add Yara support, include sample rules? plugins like balbuzard? | |
| 105 | +# + add balbuzard support | |
| 106 | +# + move main into functions | |
| 99 | 107 | |
| 100 | 108 | # TODO later: |
| 101 | 109 | # + output to file |
| ... | ... | @@ -103,7 +111,7 @@ __version__ = '0.07' |
| 103 | 111 | # + look for VBA in embedded documents (e.g. Excel in Word) |
| 104 | 112 | # + support SRP streams (see Lenny's article + links and sample) |
| 105 | 113 | # - python 3.x support |
| 106 | -# - add support for PowerPoint macros (see libclamav, libgsf) | |
| 114 | +# - add support for PowerPoint macros (see libclamav, libgsf), use oledump heuristic? | |
| 107 | 115 | # - check VBA macros in Visio, Access, Project, etc |
| 108 | 116 | # - extract_macros: convert to a class, split long function into smaller methods |
| 109 | 117 | # - extract_macros: read bytes from stream file objects instead of strings |
| ... | ... | @@ -123,6 +131,7 @@ import struct |
| 123 | 131 | import cStringIO |
| 124 | 132 | import math |
| 125 | 133 | import zipfile |
| 134 | +import re | |
| 126 | 135 | |
| 127 | 136 | import thirdparty.olefile as olefile |
| 128 | 137 | |
| ... | ... | @@ -153,6 +162,16 @@ AUTOEXEC_KEYWORDS = { |
| 153 | 162 | #TODO: full list in MS specs?? |
| 154 | 163 | } |
| 155 | 164 | |
| 165 | +# Patterns to be extracted (IP addresses, URLs, etc) | |
| 166 | +# From patterns.py in balbuzard | |
| 167 | +RE_PATTERNS = ( | |
| 168 | + ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), | |
| 169 | + ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), | |
| 170 | + ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), | |
| 171 | + ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')), | |
| 172 | + ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|COM|VBS|JS|VBE|JSE|BAT|CMD|DLL|SCR|CLASS|JAR)\b")), | |
| 173 | + ) | |
| 174 | + | |
| 156 | 175 | |
| 157 | 176 | #--- FUNCTIONS ---------------------------------------------------------------- |
| 158 | 177 | |
| ... | ... | @@ -696,6 +715,7 @@ def detect_autoexec(vba_code): |
| 696 | 715 | :param vba_code: str, VBA source code |
| 697 | 716 | :return: list of str tuples (keyword, description) |
| 698 | 717 | """ |
| 718 | + #TODO: use regex to find keywords with word boundaries | |
| 699 | 719 | # case-insensitive search |
| 700 | 720 | vba_code = vba_code.lower() |
| 701 | 721 | results = [] |
| ... | ... | @@ -706,6 +726,22 @@ def detect_autoexec(vba_code): |
| 706 | 726 | return results |
| 707 | 727 | |
| 708 | 728 | |
| 729 | +def detect_patterns(vba_code): | |
| 730 | + """ | |
| 731 | + Detect if the VBA code contains specific patterns such as IP addresses, | |
| 732 | + URLs, e-mail addresses, executable file names, etc. | |
| 733 | + | |
| 734 | + :param vba_code: str, VBA source code | |
| 735 | + :return: list of str tuples (pattern type, value) | |
| 736 | + """ | |
| 737 | + results = [] | |
| 738 | + for pattern_type, pattern_re in RE_PATTERNS: | |
| 739 | + match = pattern_re.search(vba_code) | |
| 740 | + if match is not None: | |
| 741 | + results.append((pattern_type, match.group())) | |
| 742 | + return results | |
| 743 | + | |
| 744 | + | |
| 709 | 745 | #=== CLASSES ================================================================= |
| 710 | 746 | |
| 711 | 747 | class VBA_Parser(object): |
| ... | ... | @@ -964,6 +1000,20 @@ if __name__ == '__main__': |
| 964 | 1000 | else: |
| 965 | 1001 | print 'Auto-executable macro keywords: None found' |
| 966 | 1002 | |
| 1003 | + print '- '*39 | |
| 1004 | + patterns = detect_patterns(vba_code) | |
| 1005 | + if patterns: | |
| 1006 | + print 'Patterns found:' | |
| 1007 | + t = prettytable.PrettyTable(('Value', 'Pattern type')) | |
| 1008 | + t.align = 'l' | |
| 1009 | + t.max_width['Value'] = 40 | |
| 1010 | + t.max_width['Pattern type'] = 39 | |
| 1011 | + for pattern_type, value in patterns: | |
| 1012 | + t.add_row((value, pattern_type)) | |
| 1013 | + print t | |
| 1014 | + else: | |
| 1015 | + print 'Patterns: None found' | |
| 1016 | + | |
| 967 | 1017 | else: |
| 968 | 1018 | print 'No VBA macros found.' |
| 969 | 1019 | except TypeError: | ... | ... |