Commit a299cc6d941fe7cf99a999eb081420d0ca6a9eab
1 parent
f133f113
olevba: added scanning of multiple files with wildcards, recursive scanning in s…
…ubdirectories, scanning files within password-protected zips
Showing
3 changed files
with
176 additions
and
23 deletions
oletools/olevba.py
| @@ -87,6 +87,9 @@ https://github.com/unixfreak0037/officeparser | @@ -87,6 +87,9 @@ https://github.com/unixfreak0037/officeparser | ||
| 87 | # - added pattern extraction | 87 | # - added pattern extraction |
| 88 | # 2014-12-25 v0.09 PL: - added suspicious keywords detection | 88 | # 2014-12-25 v0.09 PL: - added suspicious keywords detection |
| 89 | # 2014-12-27 v0.10 PL: - added OptionParser, main and process_file | 89 | # 2014-12-27 v0.10 PL: - added OptionParser, main and process_file |
| 90 | +# - uses xglob to scan several files with wildcards | ||
| 91 | +# - option -r to recurse subdirectories | ||
| 92 | +# - option -z to scan files in password-protected zips | ||
| 90 | 93 | ||
| 91 | __version__ = '0.10' | 94 | __version__ = '0.10' |
| 92 | 95 | ||
| @@ -134,6 +137,7 @@ import optparse | @@ -134,6 +137,7 @@ import optparse | ||
| 134 | 137 | ||
| 135 | import thirdparty.olefile as olefile | 138 | import thirdparty.olefile as olefile |
| 136 | from thirdparty.prettytable import prettytable | 139 | from thirdparty.prettytable import prettytable |
| 140 | +from thirdparty.xglob import xglob | ||
| 137 | 141 | ||
| 138 | #--- CONSTANTS ---------------------------------------------------------------- | 142 | #--- CONSTANTS ---------------------------------------------------------------- |
| 139 | 143 | ||
| @@ -806,7 +810,7 @@ class VBA_Parser(object): | @@ -806,7 +810,7 @@ class VBA_Parser(object): | ||
| 806 | - PowerPoint 2007+ (.pptm, .ppsm) | 810 | - PowerPoint 2007+ (.pptm, .ppsm) |
| 807 | """ | 811 | """ |
| 808 | 812 | ||
| 809 | - def __init__(self, _file, filename=None): | 813 | + def __init__(self, filename, data=None): |
| 810 | """ | 814 | """ |
| 811 | Constructor for VBA_Parser | 815 | Constructor for VBA_Parser |
| 812 | 816 | ||
| @@ -816,20 +820,26 @@ class VBA_Parser(object): | @@ -816,20 +820,26 @@ class VBA_Parser(object): | ||
| 816 | """ | 820 | """ |
| 817 | #TODO: filename should be mandatory, optional data is a string or file-like object | 821 | #TODO: filename should be mandatory, optional data is a string or file-like object |
| 818 | #TODO: also support olefile and zipfile as input | 822 | #TODO: also support olefile and zipfile as input |
| 819 | - self.file = _file | 823 | + if data is None: |
| 824 | + # open file from disk: | ||
| 825 | + _file = filename | ||
| 826 | + else: | ||
| 827 | + # file already read in memory, make it a file-like object for zipfile: | ||
| 828 | + _file = cStringIO.StringIO(data) | ||
| 829 | + #self.file = _file | ||
| 820 | self.ole_file = None | 830 | self.ole_file = None |
| 821 | self.ole_subfiles = [] | 831 | self.ole_subfiles = [] |
| 822 | self.filename = filename | 832 | self.filename = filename |
| 823 | self.type = None | 833 | self.type = None |
| 824 | self.vba_projects = None | 834 | self.vba_projects = None |
| 825 | - if filename is None: | ||
| 826 | - if isinstance(_file, basestring): | ||
| 827 | - if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: | ||
| 828 | - self.filename = _file | ||
| 829 | - else: | ||
| 830 | - self.filename = '<file in bytes string>' | ||
| 831 | - else: | ||
| 832 | - self.filename = '<file-like object>' | 835 | + # if filename is None: |
| 836 | + # if isinstance(_file, basestring): | ||
| 837 | + # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: | ||
| 838 | + # self.filename = _file | ||
| 839 | + # else: | ||
| 840 | + # self.filename = '<file in bytes string>' | ||
| 841 | + # else: | ||
| 842 | + # self.filename = '<file-like object>' | ||
| 833 | if olefile.isOleFile(_file): | 843 | if olefile.isOleFile(_file): |
| 834 | # This looks like an OLE file | 844 | # This looks like an OLE file |
| 835 | logging.info('Parsing OLE file %s' % self.filename) | 845 | logging.info('Parsing OLE file %s' % self.filename) |
| @@ -851,7 +861,7 @@ class VBA_Parser(object): | @@ -851,7 +861,7 @@ class VBA_Parser(object): | ||
| 851 | logging.debug('Opening OLE file %s within zip' % subfile) | 861 | logging.debug('Opening OLE file %s within zip' % subfile) |
| 852 | ole_data = z.open(subfile).read() | 862 | ole_data = z.open(subfile).read() |
| 853 | try: | 863 | try: |
| 854 | - self.ole_subfiles.append(VBA_Parser(ole_data, filename=subfile)) | 864 | + self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data)) |
| 855 | except: | 865 | except: |
| 856 | logging.debug('%s is not a valid OLE file' % subfile) | 866 | logging.debug('%s is not a valid OLE file' % subfile) |
| 857 | continue | 867 | continue |
| @@ -1005,16 +1015,17 @@ class VBA_Parser(object): | @@ -1005,16 +1015,17 @@ class VBA_Parser(object): | ||
| 1005 | self.ole_file.close() | 1015 | self.ole_file.close() |
| 1006 | 1016 | ||
| 1007 | 1017 | ||
| 1008 | -def process_file (filename): | 1018 | +def process_file (filename, data): |
| 1009 | """ | 1019 | """ |
| 1010 | Process a single file | 1020 | Process a single file |
| 1011 | """ | 1021 | """ |
| 1012 | #TODO: replace print by writing to a provided output file (sys.stdout by default) | 1022 | #TODO: replace print by writing to a provided output file (sys.stdout by default) |
| 1023 | + print '' | ||
| 1013 | print '='*79 | 1024 | print '='*79 |
| 1014 | print 'File:', filename | 1025 | print 'File:', filename |
| 1015 | try: | 1026 | try: |
| 1016 | #TODO: handle olefile errors, when an OLE file is malformed | 1027 | #TODO: handle olefile errors, when an OLE file is malformed |
| 1017 | - vba = VBA_Parser(filename, filename=filename) | 1028 | + vba = VBA_Parser(filename, data) |
| 1018 | print 'Type:', vba.type | 1029 | print 'Type:', vba.type |
| 1019 | if vba.detect_vba_macros(): | 1030 | if vba.detect_vba_macros(): |
| 1020 | print 'Contains VBA Macros:' | 1031 | print 'Contains VBA Macros:' |
| @@ -1075,8 +1086,8 @@ def process_file (filename): | @@ -1075,8 +1086,8 @@ def process_file (filename): | ||
| 1075 | 1086 | ||
| 1076 | else: | 1087 | else: |
| 1077 | print 'No VBA macros found.' | 1088 | print 'No VBA macros found.' |
| 1078 | - except TypeError: | ||
| 1079 | - raise | 1089 | + except: #TypeError: |
| 1090 | + #raise | ||
| 1080 | print sys.exc_value | 1091 | print sys.exc_value |
| 1081 | 1092 | ||
| 1082 | 1093 | ||
| @@ -1092,12 +1103,12 @@ def main(): | @@ -1092,12 +1103,12 @@ def main(): | ||
| 1092 | # help='output file') | 1103 | # help='output file') |
| 1093 | # parser.add_option('-c', '--csv', dest='csv', | 1104 | # parser.add_option('-c', '--csv', dest='csv', |
| 1094 | # help='export results to a CSV file') | 1105 | # help='export results to a CSV file') |
| 1095 | - # parser.add_option("-r", action="store_true", dest="recursive", | ||
| 1096 | - # help='find files recursively in subdirectories.') | ||
| 1097 | - # parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, | ||
| 1098 | - # help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | ||
| 1099 | - # parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | ||
| 1100 | - # help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | 1106 | + parser.add_option("-r", action="store_true", dest="recursive", |
| 1107 | + help='find files recursively in subdirectories.') | ||
| 1108 | + parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, | ||
| 1109 | + help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | ||
| 1110 | + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | ||
| 1111 | + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | ||
| 1101 | 1112 | ||
| 1102 | (options, args) = parser.parse_args() | 1113 | (options, args) = parser.parse_args() |
| 1103 | 1114 | ||
| @@ -1109,9 +1120,10 @@ def main(): | @@ -1109,9 +1120,10 @@ def main(): | ||
| 1109 | 1120 | ||
| 1110 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) | 1121 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) |
| 1111 | 1122 | ||
| 1112 | - for filespec in args: | 1123 | + for filename, data in xglob.iter_files(args, recursive=options.recursive, |
| 1124 | + zip_password=options.zip_password, zip_fname=options.zip_fname): | ||
| 1113 | #data = open(filespec, 'rb').read() | 1125 | #data = open(filespec, 'rb').read() |
| 1114 | - process_file(filespec) | 1126 | + process_file(filename, data) |
| 1115 | 1127 | ||
| 1116 | if __name__ == '__main__': | 1128 | if __name__ == '__main__': |
| 1117 | main() | 1129 | main() |
oletools/thirdparty/xglob/__init__.py
0 → 100644
oletools/thirdparty/xglob/xglob.py
0 → 100644
| 1 | +#! /usr/bin/env python2 | ||
| 2 | +""" | ||
| 3 | +xglob | ||
| 4 | + | ||
| 5 | +xglob is a python package to list files matching wildcards (*, ?, []), | ||
| 6 | +extending the functionality of the glob module from the standard python | ||
| 7 | +library (https://docs.python.org/2/library/glob.html). | ||
| 8 | + | ||
| 9 | +Main features: | ||
| 10 | +- recursive file listing (including subfolders) | ||
| 11 | +- file listing within Zip archives | ||
| 12 | +- helper function to open files specified as arguments, supporting files | ||
| 13 | + within zip archives encrypted with a password | ||
| 14 | + | ||
| 15 | +Author: Philippe Lagadec - http://www.decalage.info | ||
| 16 | +License: BSD, see source code or documentation | ||
| 17 | + | ||
| 18 | +For more info and updates: http://www.decalage.info/xglob | ||
| 19 | +""" | ||
| 20 | + | ||
| 21 | +# LICENSE: | ||
| 22 | +# | ||
| 23 | +# xglob is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info) | ||
| 24 | +# All rights reserved. | ||
| 25 | +# | ||
| 26 | +# Redistribution and use in source and binary forms, with or without modification, | ||
| 27 | +# are permitted provided that the following conditions are met: | ||
| 28 | +# | ||
| 29 | +# * Redistributions of source code must retain the above copyright notice, this | ||
| 30 | +# list of conditions and the following disclaimer. | ||
| 31 | +# * Redistributions in binary form must reproduce the above copyright notice, | ||
| 32 | +# this list of conditions and the following disclaimer in the documentation | ||
| 33 | +# and/or other materials provided with the distribution. | ||
| 34 | +# | ||
| 35 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
| 36 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 37 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 38 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
| 39 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 40 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 41 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
| 42 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| 43 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 44 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 45 | + | ||
| 46 | + | ||
| 47 | +#------------------------------------------------------------------------------ | ||
| 48 | +# CHANGELOG: | ||
| 49 | +# 2013-12-04 v0.01 PL: - scan several files from command line args | ||
| 50 | +# 2014-01-14 v0.02 PL: - added riglob, ziglob | ||
| 51 | +# 2014-12-26 v0.03 PL: - moved code from balbuzard into a separate package | ||
| 52 | + | ||
| 53 | +__version__ = '0.03' | ||
| 54 | + | ||
| 55 | + | ||
| 56 | +#=== IMPORTS ================================================================= | ||
| 57 | + | ||
| 58 | +import os, fnmatch, glob, zipfile | ||
| 59 | + | ||
| 60 | +#=== FUNCTIONS =============================================================== | ||
| 61 | + | ||
| 62 | +# recursive glob function to find files in any subfolder: | ||
| 63 | +# inspired by http://stackoverflow.com/questions/14798220/how-can-i-search-sub-folders-using-glob-glob-module-in-python | ||
| 64 | +def rglob (path, pattern='*.*'): | ||
| 65 | + """ | ||
| 66 | + Recursive glob: | ||
| 67 | + similar to glob.glob, but finds files recursively in all subfolders of path. | ||
| 68 | + path: root directory where to search files | ||
| 69 | + pattern: pattern for filenames, using wildcards, e.g. *.txt | ||
| 70 | + """ | ||
| 71 | + #TODO: more compatible API with glob: use single param, split path from pattern | ||
| 72 | + return [os.path.join(dirpath, f) | ||
| 73 | + for dirpath, dirnames, files in os.walk(path) | ||
| 74 | + for f in fnmatch.filter(files, pattern)] | ||
| 75 | + | ||
| 76 | + | ||
| 77 | +def riglob (pathname): | ||
| 78 | + """ | ||
| 79 | + Recursive iglob: | ||
| 80 | + similar to glob.iglob, but finds files recursively in all subfolders of path. | ||
| 81 | + pathname: root directory where to search files followed by pattern for | ||
| 82 | + filenames, using wildcards, e.g. *.txt | ||
| 83 | + """ | ||
| 84 | + path, filespec = os.path.split(pathname) | ||
| 85 | + for dirpath, dirnames, files in os.walk(path): | ||
| 86 | + for f in fnmatch.filter(files, filespec): | ||
| 87 | + yield os.path.join(dirpath, f) | ||
| 88 | + | ||
| 89 | + | ||
| 90 | +def ziglob (zipfileobj, pathname): | ||
| 91 | + """ | ||
| 92 | + iglob in a zip: | ||
| 93 | + similar to glob.iglob, but finds files within a zip archive. | ||
| 94 | + - zipfileobj: zipfile.ZipFile object | ||
| 95 | + - pathname: root directory where to search files followed by pattern for | ||
| 96 | + filenames, using wildcards, e.g. *.txt | ||
| 97 | + """ | ||
| 98 | + files = zipfileobj.namelist() | ||
| 99 | + for f in files: print f | ||
| 100 | + for f in fnmatch.filter(files, pathname): | ||
| 101 | + yield f | ||
| 102 | + | ||
| 103 | + | ||
| 104 | +def iter_files(files, recursive=False, zip_password=None, zip_fname='*'): | ||
| 105 | + """ | ||
| 106 | + Open each file provided as argument: | ||
| 107 | + - files is a list of arguments | ||
| 108 | + - if zip_password is None, each file is listed without reading its content. | ||
| 109 | + Wilcards are supported. | ||
| 110 | + - if not, then each file is opened as a zip archive with the provided password | ||
| 111 | + - then files matching zip_fname are opened from the zip archive | ||
| 112 | + | ||
| 113 | + Iterator: yields (filename, data) for each file. If zip_password is None, then | ||
| 114 | + only the filename is returned, and data=None. Otherwise data is the file | ||
| 115 | + content. | ||
| 116 | + """ | ||
| 117 | + # choose recursive or non-recursive iglob: | ||
| 118 | + if recursive: | ||
| 119 | + iglob = riglob | ||
| 120 | + else: | ||
| 121 | + iglob = glob.iglob | ||
| 122 | + for filespec in files: | ||
| 123 | + for filename in iglob(filespec): | ||
| 124 | + if zip_password is not None: | ||
| 125 | + # Each file is expected to be a zip archive: | ||
| 126 | + print 'Opening zip archive %s with provided password' % filename | ||
| 127 | + z = zipfile.ZipFile(filename, 'r') | ||
| 128 | + print 'Looking for file(s) matching "%s"' % zip_fname | ||
| 129 | + for filename in ziglob(z, zip_fname): | ||
| 130 | + print 'Opening file in zip archive:', filename | ||
| 131 | + data = z.read(filename, zip_password) | ||
| 132 | + yield filename, data | ||
| 133 | + else: | ||
| 134 | + # normal file | ||
| 135 | + # do not read the file content, just yield the filename | ||
| 136 | + yield filename, None | ||
| 137 | + print 'Opening file', filename | ||
| 138 | + #data = open(filename, 'rb').read() | ||
| 139 | + #yield filename, data | ||
| 140 | + |