From 99e9238ea6094146bc72e0596d6fe61b88bd3c22 Mon Sep 17 00:00:00 2001 From: Philippe Lagadec Date: Thu, 25 Dec 2014 15:42:12 +0100 Subject: [PATCH] olevba: added pattern extraction --- oletools/olevba.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/oletools/olevba.py b/oletools/olevba.py index 95423a0..e36a444 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -86,8 +86,9 @@ Usage: olevba.py # - ignore empty macros # 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive # 2014-12-15 v0.08 PL: - improved display for empty macros +# - added pattern extraction -__version__ = '0.07' +__version__ = '0.08' #------------------------------------------------------------------------------ # TODO: @@ -96,6 +97,13 @@ __version__ = '0.07' # + nicer output # + setup logging (common with other oletools) # + update readme, wiki and decalage.info, pypi (link to sample files) +# + performance improvement: instead of searching each keyword separately, +# first split vba code into a list of words (per line), then check each +# word against a dict. (or put vba words into a set/dict?) +# + for regex, maybe combine them into a single re with named groups? +# + add Yara support, include sample rules? plugins like balbuzard? +# + add balbuzard support +# + move main into functions # TODO later: # + output to file @@ -103,7 +111,7 @@ __version__ = '0.07' # + look for VBA in embedded documents (e.g. Excel in Word) # + support SRP streams (see Lenny's article + links and sample) # - python 3.x support -# - add support for PowerPoint macros (see libclamav, libgsf) +# - add support for PowerPoint macros (see libclamav, libgsf), use oledump heuristic? # - check VBA macros in Visio, Access, Project, etc # - extract_macros: convert to a class, split long function into smaller methods # - extract_macros: read bytes from stream file objects instead of strings @@ -123,6 +131,7 @@ import struct import cStringIO import math import zipfile +import re import thirdparty.olefile as olefile @@ -153,6 +162,16 @@ AUTOEXEC_KEYWORDS = { #TODO: full list in MS specs?? } +# Patterns to be extracted (IP addresses, URLs, etc) +# From patterns.py in balbuzard +RE_PATTERNS = ( + ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), + ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), + ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), + ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?