From a299cc6d941fe7cf99a999eb081420d0ca6a9eab Mon Sep 17 00:00:00 2001 From: Philippe Lagadec Date: Sat, 27 Dec 2014 22:30:53 +0100 Subject: [PATCH] olevba: added scanning of multiple files with wildcards, recursive scanning in subdirectories, scanning files within password-protected zips --- oletools/olevba.py | 58 +++++++++++++++++++++++++++++++++++----------------------- oletools/thirdparty/xglob/__init__.py | 1 + oletools/thirdparty/xglob/xglob.py | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 23 deletions(-) create mode 100644 oletools/thirdparty/xglob/__init__.py create mode 100644 oletools/thirdparty/xglob/xglob.py diff --git a/oletools/olevba.py b/oletools/olevba.py index 10db8f5..a9e33b3 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -87,6 +87,9 @@ https://github.com/unixfreak0037/officeparser # - added pattern extraction # 2014-12-25 v0.09 PL: - added suspicious keywords detection # 2014-12-27 v0.10 PL: - added OptionParser, main and process_file +# - uses xglob to scan several files with wildcards +# - option -r to recurse subdirectories +# - option -z to scan files in password-protected zips __version__ = '0.10' @@ -134,6 +137,7 @@ import optparse import thirdparty.olefile as olefile from thirdparty.prettytable import prettytable +from thirdparty.xglob import xglob #--- CONSTANTS ---------------------------------------------------------------- @@ -806,7 +810,7 @@ class VBA_Parser(object): - PowerPoint 2007+ (.pptm, .ppsm) """ - def __init__(self, _file, filename=None): + def __init__(self, filename, data=None): """ Constructor for VBA_Parser @@ -816,20 +820,26 @@ class VBA_Parser(object): """ #TODO: filename should be mandatory, optional data is a string or file-like object #TODO: also support olefile and zipfile as input - self.file = _file + if data is None: + # open file from disk: + _file = filename + else: + # file already read in memory, make it a file-like object for zipfile: + _file = cStringIO.StringIO(data) + #self.file = _file self.ole_file = None self.ole_subfiles = [] self.filename = filename self.type = None self.vba_projects = None - if filename is None: - if isinstance(_file, basestring): - if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: - self.filename = _file - else: - self.filename = '' - else: - self.filename = '' + # if filename is None: + # if isinstance(_file, basestring): + # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: + # self.filename = _file + # else: + # self.filename = '' + # else: + # self.filename = '' if olefile.isOleFile(_file): # This looks like an OLE file logging.info('Parsing OLE file %s' % self.filename) @@ -851,7 +861,7 @@ class VBA_Parser(object): logging.debug('Opening OLE file %s within zip' % subfile) ole_data = z.open(subfile).read() try: - self.ole_subfiles.append(VBA_Parser(ole_data, filename=subfile)) + self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data)) except: logging.debug('%s is not a valid OLE file' % subfile) continue @@ -1005,16 +1015,17 @@ class VBA_Parser(object): self.ole_file.close() -def process_file (filename): +def process_file (filename, data): """ Process a single file """ #TODO: replace print by writing to a provided output file (sys.stdout by default) + print '' print '='*79 print 'File:', filename try: #TODO: handle olefile errors, when an OLE file is malformed - vba = VBA_Parser(filename, filename=filename) + vba = VBA_Parser(filename, data) print 'Type:', vba.type if vba.detect_vba_macros(): print 'Contains VBA Macros:' @@ -1075,8 +1086,8 @@ def process_file (filename): else: print 'No VBA macros found.' - except TypeError: - raise + except: #TypeError: + #raise print sys.exc_value @@ -1092,12 +1103,12 @@ def main(): # help='output file') # parser.add_option('-c', '--csv', dest='csv', # help='export results to a CSV file') - # parser.add_option("-r", action="store_true", dest="recursive", - # help='find files recursively in subdirectories.') - # parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, - # help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') - # parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', - # help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') + parser.add_option("-r", action="store_true", dest="recursive", + help='find files recursively in subdirectories.') + parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, + help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') (options, args) = parser.parse_args() @@ -1109,9 +1120,10 @@ def main(): logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) - for filespec in args: + for filename, data in xglob.iter_files(args, recursive=options.recursive, + zip_password=options.zip_password, zip_fname=options.zip_fname): #data = open(filespec, 'rb').read() - process_file(filespec) + process_file(filename, data) if __name__ == '__main__': main() diff --git a/oletools/thirdparty/xglob/__init__.py b/oletools/thirdparty/xglob/__init__.py new file mode 100644 index 0000000..692c296 --- /dev/null +++ b/oletools/thirdparty/xglob/__init__.py @@ -0,0 +1 @@ +from .xglob import * \ No newline at end of file diff --git a/oletools/thirdparty/xglob/xglob.py b/oletools/thirdparty/xglob/xglob.py new file mode 100644 index 0000000..db7ea28 --- /dev/null +++ b/oletools/thirdparty/xglob/xglob.py @@ -0,0 +1,140 @@ +#! /usr/bin/env python2 +""" +xglob + +xglob is a python package to list files matching wildcards (*, ?, []), +extending the functionality of the glob module from the standard python +library (https://docs.python.org/2/library/glob.html). + +Main features: +- recursive file listing (including subfolders) +- file listing within Zip archives +- helper function to open files specified as arguments, supporting files + within zip archives encrypted with a password + +Author: Philippe Lagadec - http://www.decalage.info +License: BSD, see source code or documentation + +For more info and updates: http://www.decalage.info/xglob +""" + +# LICENSE: +# +# xglob is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#------------------------------------------------------------------------------ +# CHANGELOG: +# 2013-12-04 v0.01 PL: - scan several files from command line args +# 2014-01-14 v0.02 PL: - added riglob, ziglob +# 2014-12-26 v0.03 PL: - moved code from balbuzard into a separate package + +__version__ = '0.03' + + +#=== IMPORTS ================================================================= + +import os, fnmatch, glob, zipfile + +#=== FUNCTIONS =============================================================== + +# recursive glob function to find files in any subfolder: +# inspired by http://stackoverflow.com/questions/14798220/how-can-i-search-sub-folders-using-glob-glob-module-in-python +def rglob (path, pattern='*.*'): + """ + Recursive glob: + similar to glob.glob, but finds files recursively in all subfolders of path. + path: root directory where to search files + pattern: pattern for filenames, using wildcards, e.g. *.txt + """ + #TODO: more compatible API with glob: use single param, split path from pattern + return [os.path.join(dirpath, f) + for dirpath, dirnames, files in os.walk(path) + for f in fnmatch.filter(files, pattern)] + + +def riglob (pathname): + """ + Recursive iglob: + similar to glob.iglob, but finds files recursively in all subfolders of path. + pathname: root directory where to search files followed by pattern for + filenames, using wildcards, e.g. *.txt + """ + path, filespec = os.path.split(pathname) + for dirpath, dirnames, files in os.walk(path): + for f in fnmatch.filter(files, filespec): + yield os.path.join(dirpath, f) + + +def ziglob (zipfileobj, pathname): + """ + iglob in a zip: + similar to glob.iglob, but finds files within a zip archive. + - zipfileobj: zipfile.ZipFile object + - pathname: root directory where to search files followed by pattern for + filenames, using wildcards, e.g. *.txt + """ + files = zipfileobj.namelist() + for f in files: print f + for f in fnmatch.filter(files, pathname): + yield f + + +def iter_files(files, recursive=False, zip_password=None, zip_fname='*'): + """ + Open each file provided as argument: + - files is a list of arguments + - if zip_password is None, each file is listed without reading its content. + Wilcards are supported. + - if not, then each file is opened as a zip archive with the provided password + - then files matching zip_fname are opened from the zip archive + + Iterator: yields (filename, data) for each file. If zip_password is None, then + only the filename is returned, and data=None. Otherwise data is the file + content. + """ + # choose recursive or non-recursive iglob: + if recursive: + iglob = riglob + else: + iglob = glob.iglob + for filespec in files: + for filename in iglob(filespec): + if zip_password is not None: + # Each file is expected to be a zip archive: + print 'Opening zip archive %s with provided password' % filename + z = zipfile.ZipFile(filename, 'r') + print 'Looking for file(s) matching "%s"' % zip_fname + for filename in ziglob(z, zip_fname): + print 'Opening file in zip archive:', filename + data = z.read(filename, zip_password) + yield filename, data + else: + # normal file + # do not read the file content, just yield the filename + yield filename, None + print 'Opening file', filename + #data = open(filename, 'rb').read() + #yield filename, data + -- libgit2 0.21.4