Commit a299cc6d941fe7cf99a999eb081420d0ca6a9eab
1 parent
f133f113
olevba: added scanning of multiple files with wildcards, recursive scanning in s…
…ubdirectories, scanning files within password-protected zips
Showing
3 changed files
with
176 additions
and
23 deletions
oletools/olevba.py
| ... | ... | @@ -87,6 +87,9 @@ https://github.com/unixfreak0037/officeparser |
| 87 | 87 | # - added pattern extraction |
| 88 | 88 | # 2014-12-25 v0.09 PL: - added suspicious keywords detection |
| 89 | 89 | # 2014-12-27 v0.10 PL: - added OptionParser, main and process_file |
| 90 | +# - uses xglob to scan several files with wildcards | |
| 91 | +# - option -r to recurse subdirectories | |
| 92 | +# - option -z to scan files in password-protected zips | |
| 90 | 93 | |
| 91 | 94 | __version__ = '0.10' |
| 92 | 95 | |
| ... | ... | @@ -134,6 +137,7 @@ import optparse |
| 134 | 137 | |
| 135 | 138 | import thirdparty.olefile as olefile |
| 136 | 139 | from thirdparty.prettytable import prettytable |
| 140 | +from thirdparty.xglob import xglob | |
| 137 | 141 | |
| 138 | 142 | #--- CONSTANTS ---------------------------------------------------------------- |
| 139 | 143 | |
| ... | ... | @@ -806,7 +810,7 @@ class VBA_Parser(object): |
| 806 | 810 | - PowerPoint 2007+ (.pptm, .ppsm) |
| 807 | 811 | """ |
| 808 | 812 | |
| 809 | - def __init__(self, _file, filename=None): | |
| 813 | + def __init__(self, filename, data=None): | |
| 810 | 814 | """ |
| 811 | 815 | Constructor for VBA_Parser |
| 812 | 816 | |
| ... | ... | @@ -816,20 +820,26 @@ class VBA_Parser(object): |
| 816 | 820 | """ |
| 817 | 821 | #TODO: filename should be mandatory, optional data is a string or file-like object |
| 818 | 822 | #TODO: also support olefile and zipfile as input |
| 819 | - self.file = _file | |
| 823 | + if data is None: | |
| 824 | + # open file from disk: | |
| 825 | + _file = filename | |
| 826 | + else: | |
| 827 | + # file already read in memory, make it a file-like object for zipfile: | |
| 828 | + _file = cStringIO.StringIO(data) | |
| 829 | + #self.file = _file | |
| 820 | 830 | self.ole_file = None |
| 821 | 831 | self.ole_subfiles = [] |
| 822 | 832 | self.filename = filename |
| 823 | 833 | self.type = None |
| 824 | 834 | self.vba_projects = None |
| 825 | - if filename is None: | |
| 826 | - if isinstance(_file, basestring): | |
| 827 | - if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: | |
| 828 | - self.filename = _file | |
| 829 | - else: | |
| 830 | - self.filename = '<file in bytes string>' | |
| 831 | - else: | |
| 832 | - self.filename = '<file-like object>' | |
| 835 | + # if filename is None: | |
| 836 | + # if isinstance(_file, basestring): | |
| 837 | + # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: | |
| 838 | + # self.filename = _file | |
| 839 | + # else: | |
| 840 | + # self.filename = '<file in bytes string>' | |
| 841 | + # else: | |
| 842 | + # self.filename = '<file-like object>' | |
| 833 | 843 | if olefile.isOleFile(_file): |
| 834 | 844 | # This looks like an OLE file |
| 835 | 845 | logging.info('Parsing OLE file %s' % self.filename) |
| ... | ... | @@ -851,7 +861,7 @@ class VBA_Parser(object): |
| 851 | 861 | logging.debug('Opening OLE file %s within zip' % subfile) |
| 852 | 862 | ole_data = z.open(subfile).read() |
| 853 | 863 | try: |
| 854 | - self.ole_subfiles.append(VBA_Parser(ole_data, filename=subfile)) | |
| 864 | + self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data)) | |
| 855 | 865 | except: |
| 856 | 866 | logging.debug('%s is not a valid OLE file' % subfile) |
| 857 | 867 | continue |
| ... | ... | @@ -1005,16 +1015,17 @@ class VBA_Parser(object): |
| 1005 | 1015 | self.ole_file.close() |
| 1006 | 1016 | |
| 1007 | 1017 | |
| 1008 | -def process_file (filename): | |
| 1018 | +def process_file (filename, data): | |
| 1009 | 1019 | """ |
| 1010 | 1020 | Process a single file |
| 1011 | 1021 | """ |
| 1012 | 1022 | #TODO: replace print by writing to a provided output file (sys.stdout by default) |
| 1023 | + print '' | |
| 1013 | 1024 | print '='*79 |
| 1014 | 1025 | print 'File:', filename |
| 1015 | 1026 | try: |
| 1016 | 1027 | #TODO: handle olefile errors, when an OLE file is malformed |
| 1017 | - vba = VBA_Parser(filename, filename=filename) | |
| 1028 | + vba = VBA_Parser(filename, data) | |
| 1018 | 1029 | print 'Type:', vba.type |
| 1019 | 1030 | if vba.detect_vba_macros(): |
| 1020 | 1031 | print 'Contains VBA Macros:' |
| ... | ... | @@ -1075,8 +1086,8 @@ def process_file (filename): |
| 1075 | 1086 | |
| 1076 | 1087 | else: |
| 1077 | 1088 | print 'No VBA macros found.' |
| 1078 | - except TypeError: | |
| 1079 | - raise | |
| 1089 | + except: #TypeError: | |
| 1090 | + #raise | |
| 1080 | 1091 | print sys.exc_value |
| 1081 | 1092 | |
| 1082 | 1093 | |
| ... | ... | @@ -1092,12 +1103,12 @@ def main(): |
| 1092 | 1103 | # help='output file') |
| 1093 | 1104 | # parser.add_option('-c', '--csv', dest='csv', |
| 1094 | 1105 | # help='export results to a CSV file') |
| 1095 | - # parser.add_option("-r", action="store_true", dest="recursive", | |
| 1096 | - # help='find files recursively in subdirectories.') | |
| 1097 | - # parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, | |
| 1098 | - # help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | |
| 1099 | - # parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | |
| 1100 | - # help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | |
| 1106 | + parser.add_option("-r", action="store_true", dest="recursive", | |
| 1107 | + help='find files recursively in subdirectories.') | |
| 1108 | + parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, | |
| 1109 | + help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') | |
| 1110 | + parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', | |
| 1111 | + help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') | |
| 1101 | 1112 | |
| 1102 | 1113 | (options, args) = parser.parse_args() |
| 1103 | 1114 | |
| ... | ... | @@ -1109,9 +1120,10 @@ def main(): |
| 1109 | 1120 | |
| 1110 | 1121 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) |
| 1111 | 1122 | |
| 1112 | - for filespec in args: | |
| 1123 | + for filename, data in xglob.iter_files(args, recursive=options.recursive, | |
| 1124 | + zip_password=options.zip_password, zip_fname=options.zip_fname): | |
| 1113 | 1125 | #data = open(filespec, 'rb').read() |
| 1114 | - process_file(filespec) | |
| 1126 | + process_file(filename, data) | |
| 1115 | 1127 | |
| 1116 | 1128 | if __name__ == '__main__': |
| 1117 | 1129 | main() | ... | ... |
oletools/thirdparty/xglob/__init__.py
0 → 100644
oletools/thirdparty/xglob/xglob.py
0 → 100644
| 1 | +#! /usr/bin/env python2 | |
| 2 | +""" | |
| 3 | +xglob | |
| 4 | + | |
| 5 | +xglob is a python package to list files matching wildcards (*, ?, []), | |
| 6 | +extending the functionality of the glob module from the standard python | |
| 7 | +library (https://docs.python.org/2/library/glob.html). | |
| 8 | + | |
| 9 | +Main features: | |
| 10 | +- recursive file listing (including subfolders) | |
| 11 | +- file listing within Zip archives | |
| 12 | +- helper function to open files specified as arguments, supporting files | |
| 13 | + within zip archives encrypted with a password | |
| 14 | + | |
| 15 | +Author: Philippe Lagadec - http://www.decalage.info | |
| 16 | +License: BSD, see source code or documentation | |
| 17 | + | |
| 18 | +For more info and updates: http://www.decalage.info/xglob | |
| 19 | +""" | |
| 20 | + | |
| 21 | +# LICENSE: | |
| 22 | +# | |
| 23 | +# xglob is copyright (c) 2013-2014, Philippe Lagadec (http://www.decalage.info) | |
| 24 | +# All rights reserved. | |
| 25 | +# | |
| 26 | +# Redistribution and use in source and binary forms, with or without modification, | |
| 27 | +# are permitted provided that the following conditions are met: | |
| 28 | +# | |
| 29 | +# * Redistributions of source code must retain the above copyright notice, this | |
| 30 | +# list of conditions and the following disclaimer. | |
| 31 | +# * Redistributions in binary form must reproduce the above copyright notice, | |
| 32 | +# this list of conditions and the following disclaimer in the documentation | |
| 33 | +# and/or other materials provided with the distribution. | |
| 34 | +# | |
| 35 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 36 | +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 37 | +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 38 | +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| 39 | +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 40 | +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 41 | +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 42 | +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 43 | +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 44 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 45 | + | |
| 46 | + | |
| 47 | +#------------------------------------------------------------------------------ | |
| 48 | +# CHANGELOG: | |
| 49 | +# 2013-12-04 v0.01 PL: - scan several files from command line args | |
| 50 | +# 2014-01-14 v0.02 PL: - added riglob, ziglob | |
| 51 | +# 2014-12-26 v0.03 PL: - moved code from balbuzard into a separate package | |
| 52 | + | |
| 53 | +__version__ = '0.03' | |
| 54 | + | |
| 55 | + | |
| 56 | +#=== IMPORTS ================================================================= | |
| 57 | + | |
| 58 | +import os, fnmatch, glob, zipfile | |
| 59 | + | |
| 60 | +#=== FUNCTIONS =============================================================== | |
| 61 | + | |
| 62 | +# recursive glob function to find files in any subfolder: | |
| 63 | +# inspired by http://stackoverflow.com/questions/14798220/how-can-i-search-sub-folders-using-glob-glob-module-in-python | |
| 64 | +def rglob (path, pattern='*.*'): | |
| 65 | + """ | |
| 66 | + Recursive glob: | |
| 67 | + similar to glob.glob, but finds files recursively in all subfolders of path. | |
| 68 | + path: root directory where to search files | |
| 69 | + pattern: pattern for filenames, using wildcards, e.g. *.txt | |
| 70 | + """ | |
| 71 | + #TODO: more compatible API with glob: use single param, split path from pattern | |
| 72 | + return [os.path.join(dirpath, f) | |
| 73 | + for dirpath, dirnames, files in os.walk(path) | |
| 74 | + for f in fnmatch.filter(files, pattern)] | |
| 75 | + | |
| 76 | + | |
| 77 | +def riglob (pathname): | |
| 78 | + """ | |
| 79 | + Recursive iglob: | |
| 80 | + similar to glob.iglob, but finds files recursively in all subfolders of path. | |
| 81 | + pathname: root directory where to search files followed by pattern for | |
| 82 | + filenames, using wildcards, e.g. *.txt | |
| 83 | + """ | |
| 84 | + path, filespec = os.path.split(pathname) | |
| 85 | + for dirpath, dirnames, files in os.walk(path): | |
| 86 | + for f in fnmatch.filter(files, filespec): | |
| 87 | + yield os.path.join(dirpath, f) | |
| 88 | + | |
| 89 | + | |
| 90 | +def ziglob (zipfileobj, pathname): | |
| 91 | + """ | |
| 92 | + iglob in a zip: | |
| 93 | + similar to glob.iglob, but finds files within a zip archive. | |
| 94 | + - zipfileobj: zipfile.ZipFile object | |
| 95 | + - pathname: root directory where to search files followed by pattern for | |
| 96 | + filenames, using wildcards, e.g. *.txt | |
| 97 | + """ | |
| 98 | + files = zipfileobj.namelist() | |
| 99 | + for f in files: print f | |
| 100 | + for f in fnmatch.filter(files, pathname): | |
| 101 | + yield f | |
| 102 | + | |
| 103 | + | |
| 104 | +def iter_files(files, recursive=False, zip_password=None, zip_fname='*'): | |
| 105 | + """ | |
| 106 | + Open each file provided as argument: | |
| 107 | + - files is a list of arguments | |
| 108 | + - if zip_password is None, each file is listed without reading its content. | |
| 109 | + Wilcards are supported. | |
| 110 | + - if not, then each file is opened as a zip archive with the provided password | |
| 111 | + - then files matching zip_fname are opened from the zip archive | |
| 112 | + | |
| 113 | + Iterator: yields (filename, data) for each file. If zip_password is None, then | |
| 114 | + only the filename is returned, and data=None. Otherwise data is the file | |
| 115 | + content. | |
| 116 | + """ | |
| 117 | + # choose recursive or non-recursive iglob: | |
| 118 | + if recursive: | |
| 119 | + iglob = riglob | |
| 120 | + else: | |
| 121 | + iglob = glob.iglob | |
| 122 | + for filespec in files: | |
| 123 | + for filename in iglob(filespec): | |
| 124 | + if zip_password is not None: | |
| 125 | + # Each file is expected to be a zip archive: | |
| 126 | + print 'Opening zip archive %s with provided password' % filename | |
| 127 | + z = zipfile.ZipFile(filename, 'r') | |
| 128 | + print 'Looking for file(s) matching "%s"' % zip_fname | |
| 129 | + for filename in ziglob(z, zip_fname): | |
| 130 | + print 'Opening file in zip archive:', filename | |
| 131 | + data = z.read(filename, zip_password) | |
| 132 | + yield filename, data | |
| 133 | + else: | |
| 134 | + # normal file | |
| 135 | + # do not read the file content, just yield the filename | |
| 136 | + yield filename, None | |
| 137 | + print 'Opening file', filename | |
| 138 | + #data = open(filename, 'rb').read() | |
| 139 | + #yield filename, data | |
| 140 | + | ... | ... |