Commit a299cc6d941fe7cf99a999eb081420d0ca6a9eab

Authored by Philippe Lagadec
1 parent f133f113

olevba: added scanning of multiple files with wildcards, recursive scanning in s…

…ubdirectories, scanning files within password-protected zips
oletools/olevba.py
@@ -87,6 +87,9 @@ https://github.com/unixfreak0037/officeparser @@ -87,6 +87,9 @@ https://github.com/unixfreak0037/officeparser
87 # - added pattern extraction 87 # - added pattern extraction
88 # 2014-12-25 v0.09 PL: - added suspicious keywords detection 88 # 2014-12-25 v0.09 PL: - added suspicious keywords detection
89 # 2014-12-27 v0.10 PL: - added OptionParser, main and process_file 89 # 2014-12-27 v0.10 PL: - added OptionParser, main and process_file
  90 +# - uses xglob to scan several files with wildcards
  91 +# - option -r to recurse subdirectories
  92 +# - option -z to scan files in password-protected zips
90 93
91 __version__ = '0.10' 94 __version__ = '0.10'
92 95
@@ -134,6 +137,7 @@ import optparse @@ -134,6 +137,7 @@ import optparse
134 137
135 import thirdparty.olefile as olefile 138 import thirdparty.olefile as olefile
136 from thirdparty.prettytable import prettytable 139 from thirdparty.prettytable import prettytable
  140 +from thirdparty.xglob import xglob
137 141
138 #--- CONSTANTS ---------------------------------------------------------------- 142 #--- CONSTANTS ----------------------------------------------------------------
139 143
@@ -806,7 +810,7 @@ class VBA_Parser(object): @@ -806,7 +810,7 @@ class VBA_Parser(object):
806 - PowerPoint 2007+ (.pptm, .ppsm) 810 - PowerPoint 2007+ (.pptm, .ppsm)
807 """ 811 """
808 812
809 - def __init__(self, _file, filename=None): 813 + def __init__(self, filename, data=None):
810 """ 814 """
811 Constructor for VBA_Parser 815 Constructor for VBA_Parser
812 816
@@ -816,20 +820,26 @@ class VBA_Parser(object): @@ -816,20 +820,26 @@ class VBA_Parser(object):
816 """ 820 """
817 #TODO: filename should be mandatory, optional data is a string or file-like object 821 #TODO: filename should be mandatory, optional data is a string or file-like object
818 #TODO: also support olefile and zipfile as input 822 #TODO: also support olefile and zipfile as input
819 - self.file = _file 823 + if data is None:
  824 + # open file from disk:
  825 + _file = filename
  826 + else:
  827 + # file already read in memory, make it a file-like object for zipfile:
  828 + _file = cStringIO.StringIO(data)
  829 + #self.file = _file
820 self.ole_file = None 830 self.ole_file = None
821 self.ole_subfiles = [] 831 self.ole_subfiles = []
822 self.filename = filename 832 self.filename = filename
823 self.type = None 833 self.type = None
824 self.vba_projects = None 834 self.vba_projects = None
825 - if filename is None:  
826 - if isinstance(_file, basestring):  
827 - if len(_file) < olefile.MINIMAL_OLEFILE_SIZE:  
828 - self.filename = _file  
829 - else:  
830 - self.filename = '<file in bytes string>'  
831 - else:  
832 - self.filename = '<file-like object>' 835 + # if filename is None:
  836 + # if isinstance(_file, basestring):
  837 + # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE:
  838 + # self.filename = _file
  839 + # else:
  840 + # self.filename = '<file in bytes string>'
  841 + # else:
  842 + # self.filename = '<file-like object>'
833 if olefile.isOleFile(_file): 843 if olefile.isOleFile(_file):
834 # This looks like an OLE file 844 # This looks like an OLE file
835 logging.info('Parsing OLE file %s' % self.filename) 845 logging.info('Parsing OLE file %s' % self.filename)
@@ -851,7 +861,7 @@ class VBA_Parser(object): @@ -851,7 +861,7 @@ class VBA_Parser(object):
851 logging.debug('Opening OLE file %s within zip' % subfile) 861 logging.debug('Opening OLE file %s within zip' % subfile)
852 ole_data = z.open(subfile).read() 862 ole_data = z.open(subfile).read()
853 try: 863 try:
854 - self.ole_subfiles.append(VBA_Parser(ole_data, filename=subfile)) 864 + self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data))
855 except: 865 except:
856 logging.debug('%s is not a valid OLE file' % subfile) 866 logging.debug('%s is not a valid OLE file' % subfile)
857 continue 867 continue
@@ -1005,16 +1015,17 @@ class VBA_Parser(object): @@ -1005,16 +1015,17 @@ class VBA_Parser(object):
1005 self.ole_file.close() 1015 self.ole_file.close()
1006 1016
1007 1017
1008 -def process_file (filename): 1018 +def process_file (filename, data):
1009 """ 1019 """
1010 Process a single file 1020 Process a single file
1011 """ 1021 """
1012 #TODO: replace print by writing to a provided output file (sys.stdout by default) 1022 #TODO: replace print by writing to a provided output file (sys.stdout by default)
  1023 + print ''
1013 print '='*79 1024 print '='*79
1014 print 'File:', filename 1025 print 'File:', filename
1015 try: 1026 try:
1016 #TODO: handle olefile errors, when an OLE file is malformed 1027 #TODO: handle olefile errors, when an OLE file is malformed
1017 - vba = VBA_Parser(filename, filename=filename) 1028 + vba = VBA_Parser(filename, data)
1018 print 'Type:', vba.type 1029 print 'Type:', vba.type
1019 if vba.detect_vba_macros(): 1030 if vba.detect_vba_macros():
1020 print 'Contains VBA Macros:' 1031 print 'Contains VBA Macros:'
@@ -1075,8 +1086,8 @@ def process_file (filename): @@ -1075,8 +1086,8 @@ def process_file (filename):
1075 1086
1076 else: 1087 else:
1077 print 'No VBA macros found.' 1088 print 'No VBA macros found.'
1078 - except TypeError:  
1079 - raise 1089 + except: #TypeError:
  1090 + #raise
1080 print sys.exc_value 1091 print sys.exc_value
1081 1092
1082 1093
@@ -1092,12 +1103,12 @@ def main(): @@ -1092,12 +1103,12 @@ def main():
1092 # help='output file') 1103 # help='output file')
1093 # parser.add_option('-c', '--csv', dest='csv', 1104 # parser.add_option('-c', '--csv', dest='csv',
1094 # help='export results to a CSV file') 1105 # help='export results to a CSV file')
1095 - # parser.add_option("-r", action="store_true", dest="recursive",  
1096 - # help='find files recursively in subdirectories.')  
1097 - # parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,  
1098 - # help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')  
1099 - # parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',  
1100 - # help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') 1106 + parser.add_option("-r", action="store_true", dest="recursive",
  1107 + help='find files recursively in subdirectories.')
  1108 + parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
  1109 + help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
  1110 + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
  1111 + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
1101 1112
1102 (options, args) = parser.parse_args() 1113 (options, args) = parser.parse_args()
1103 1114
@@ -1109,9 +1120,10 @@ def main(): @@ -1109,9 +1120,10 @@ def main():
1109 1120
1110 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) 1121 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO)
1111 1122
1112 - for filespec in args: 1123 + for filename, data in xglob.iter_files(args, recursive=options.recursive,
  1124 + zip_password=options.zip_password, zip_fname=options.zip_fname):
1113 #data = open(filespec, 'rb').read() 1125 #data = open(filespec, 'rb').read()
1114 - process_file(filespec) 1126 + process_file(filename, data)
1115 1127
1116 if __name__ == '__main__': 1128 if __name__ == '__main__':
1117 main() 1129 main()
oletools/thirdparty/xglob/__init__.py 0 → 100644
  1 +from .xglob import *
0 \ No newline at end of file 2 \ No newline at end of file
oletools/thirdparty/xglob/xglob.py 0 → 100644
  1 +#! /usr/bin/env python2
  2 +"""
  3 +xglob
  4 +
  5 +xglob is a python package to list files matching wildcards (*, ?, []),
  6 +extending the functionality of the glob module from the standard python
  7 +library (https://docs.python.org/2/library/glob.html).
  8 +
  9 +Main features:
  10 +- recursive file listing (including subfolders)
  11 +- file listing within Zip archives
  12 +- helper function to open files specified as arguments, supporting files
  13 + within zip archives encrypted with a password
  14 +
  15 +Author: Philippe Lagadec - http://www.decalage.info
  16 +License: BSD, see source code or documentation
  17 +
  18 +For more info and updates: http://www.decalage.info/xglob
  19 +"""
  20 +
  21 +# LICENSE:
  22 +#
  23 +# xglob is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info)
  24 +# All rights reserved.
  25 +#
  26 +# Redistribution and use in source and binary forms, with or without modification,
  27 +# are permitted provided that the following conditions are met:
  28 +#
  29 +# * Redistributions of source code must retain the above copyright notice, this
  30 +# list of conditions and the following disclaimer.
  31 +# * Redistributions in binary form must reproduce the above copyright notice,
  32 +# this list of conditions and the following disclaimer in the documentation
  33 +# and/or other materials provided with the distribution.
  34 +#
  35 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  36 +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  37 +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38 +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  39 +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  40 +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  41 +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  42 +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  43 +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 +
  46 +
  47 +#------------------------------------------------------------------------------
  48 +# CHANGELOG:
  49 +# 2013-12-04 v0.01 PL: - scan several files from command line args
  50 +# 2014-01-14 v0.02 PL: - added riglob, ziglob
  51 +# 2014-12-26 v0.03 PL: - moved code from balbuzard into a separate package
  52 +
  53 +__version__ = '0.03'
  54 +
  55 +
  56 +#=== IMPORTS =================================================================
  57 +
  58 +import os, fnmatch, glob, zipfile
  59 +
  60 +#=== FUNCTIONS ===============================================================
  61 +
  62 +# recursive glob function to find files in any subfolder:
  63 +# inspired by http://stackoverflow.com/questions/14798220/how-can-i-search-sub-folders-using-glob-glob-module-in-python
  64 +def rglob (path, pattern='*.*'):
  65 + """
  66 + Recursive glob:
  67 + similar to glob.glob, but finds files recursively in all subfolders of path.
  68 + path: root directory where to search files
  69 + pattern: pattern for filenames, using wildcards, e.g. *.txt
  70 + """
  71 + #TODO: more compatible API with glob: use single param, split path from pattern
  72 + return [os.path.join(dirpath, f)
  73 + for dirpath, dirnames, files in os.walk(path)
  74 + for f in fnmatch.filter(files, pattern)]
  75 +
  76 +
  77 +def riglob (pathname):
  78 + """
  79 + Recursive iglob:
  80 + similar to glob.iglob, but finds files recursively in all subfolders of path.
  81 + pathname: root directory where to search files followed by pattern for
  82 + filenames, using wildcards, e.g. *.txt
  83 + """
  84 + path, filespec = os.path.split(pathname)
  85 + for dirpath, dirnames, files in os.walk(path):
  86 + for f in fnmatch.filter(files, filespec):
  87 + yield os.path.join(dirpath, f)
  88 +
  89 +
  90 +def ziglob (zipfileobj, pathname):
  91 + """
  92 + iglob in a zip:
  93 + similar to glob.iglob, but finds files within a zip archive.
  94 + - zipfileobj: zipfile.ZipFile object
  95 + - pathname: root directory where to search files followed by pattern for
  96 + filenames, using wildcards, e.g. *.txt
  97 + """
  98 + files = zipfileobj.namelist()
  99 + for f in files: print f
  100 + for f in fnmatch.filter(files, pathname):
  101 + yield f
  102 +
  103 +
  104 +def iter_files(files, recursive=False, zip_password=None, zip_fname='*'):
  105 + """
  106 + Open each file provided as argument:
  107 + - files is a list of arguments
  108 + - if zip_password is None, each file is listed without reading its content.
  109 + Wilcards are supported.
  110 + - if not, then each file is opened as a zip archive with the provided password
  111 + - then files matching zip_fname are opened from the zip archive
  112 +
  113 + Iterator: yields (filename, data) for each file. If zip_password is None, then
  114 + only the filename is returned, and data=None. Otherwise data is the file
  115 + content.
  116 + """
  117 + # choose recursive or non-recursive iglob:
  118 + if recursive:
  119 + iglob = riglob
  120 + else:
  121 + iglob = glob.iglob
  122 + for filespec in files:
  123 + for filename in iglob(filespec):
  124 + if zip_password is not None:
  125 + # Each file is expected to be a zip archive:
  126 + print 'Opening zip archive %s with provided password' % filename
  127 + z = zipfile.ZipFile(filename, 'r')
  128 + print 'Looking for file(s) matching "%s"' % zip_fname
  129 + for filename in ziglob(z, zip_fname):
  130 + print 'Opening file in zip archive:', filename
  131 + data = z.read(filename, zip_password)
  132 + yield filename, data
  133 + else:
  134 + # normal file
  135 + # do not read the file content, just yield the filename
  136 + yield filename, None
  137 + print 'Opening file', filename
  138 + #data = open(filename, 'rb').read()
  139 + #yield filename, data
  140 +