Commit f133f113184ef10bbaa8f97f567fa46931341fd7

Authored by Philippe Lagadec
1 parent b933bc98

olevba: added OptionParser, main and process_file

Showing 1 changed file with 55 additions and 27 deletions
oletools/olevba.py
... ... @@ -18,8 +18,6 @@ http://www.decalage.info/python/oletools
18 18  
19 19 olevba is based on source code from officeparser by John William Davison
20 20 https://github.com/unixfreak0037/officeparser
21   -
22   -Usage: olevba.py <file>
23 21 """
24 22  
25 23 #=== LICENSE ==================================================================
... ... @@ -88,27 +86,26 @@ Usage: olevba.py &lt;file&gt;
88 86 # 2014-12-15 v0.08 PL: - improved display for empty macros
89 87 # - added pattern extraction
90 88 # 2014-12-25 v0.09 PL: - added suspicious keywords detection
  89 +# 2014-12-27 v0.10 PL: - added OptionParser, main and process_file
91 90  
92   -__version__ = '0.09'
  91 +__version__ = '0.10'
93 92  
94 93 #------------------------------------------------------------------------------
95 94 # TODO:
  95 +# + process several files in dirs or zips with password
96 96 # + do not use logging, but a provided logger (null logger by default)
97   -# + optparse
98 97 # + nicer output
99 98 # + setup logging (common with other oletools)
100 99 # + update readme, wiki and decalage.info, pypi (link to sample files)
  100 +
  101 +# TODO later:
101 102 # + performance improvement: instead of searching each keyword separately,
102 103 # first split vba code into a list of words (per line), then check each
103 104 # word against a dict. (or put vba words into a set/dict?)
104 105 # + for regex, maybe combine them into a single re with named groups?
105 106 # + add Yara support, include sample rules? plugins like balbuzard?
106 107 # + add balbuzard support
107   -# + move main into functions
108   -
109   -# TODO later:
110   -# + output to file
111   -# + process several files in dirs or zips with password
  108 +# + output to file (replace print by file.write, sys.stdout by default)
112 109 # + look for VBA in embedded documents (e.g. Excel in Word)
113 110 # + support SRP streams (see Lenny's article + links and sample)
114 111 # - python 3.x support
... ... @@ -133,8 +130,10 @@ import cStringIO
133 130 import math
134 131 import zipfile
135 132 import re
  133 +import optparse
136 134  
137 135 import thirdparty.olefile as olefile
  136 +from thirdparty.prettytable import prettytable
138 137  
139 138 #--- CONSTANTS ----------------------------------------------------------------
140 139  
... ... @@ -815,6 +814,7 @@ class VBA_Parser(object):
815 814 :param filename: actual filename if _file is a file-like object or file content
816 815 in a bytes string
817 816 """
  817 + #TODO: filename should be mandatory, optional data is a string or file-like object
818 818 #TODO: also support olefile and zipfile as input
819 819 self.file = _file
820 820 self.ole_file = None
... ... @@ -1005,33 +1005,24 @@ class VBA_Parser(object):
1005 1005 self.ole_file.close()
1006 1006  
1007 1007  
1008   -#=== MAIN =====================================================================
1009   -
1010   -if __name__ == '__main__':
1011   -
1012   - from thirdparty.prettytable import prettytable
1013   -
1014   - if len(sys.argv)<2:
1015   - print __doc__
1016   - sys.exit(1)
1017   -
1018   - logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO)
1019   -
1020   - #TODO: option parser
1021   - fname = sys.argv[1]
  1008 +def process_file (filename):
  1009 + """
  1010 + Process a single file
  1011 + """
  1012 + #TODO: replace print by writing to a provided output file (sys.stdout by default)
1022 1013 print '='*79
1023   - print 'File:', fname
  1014 + print 'File:', filename
1024 1015 try:
1025 1016 #TODO: handle olefile errors, when an OLE file is malformed
1026   - vba = VBA_Parser(fname)
  1017 + vba = VBA_Parser(filename, filename=filename)
1027 1018 print 'Type:', vba.type
1028 1019 if vba.detect_vba_macros():
1029 1020 print 'Contains VBA Macros:'
1030   - for (filename, stream_path, vba_filename, vba_code) in vba.extract_macros():
  1021 + for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
1031 1022 # hide attribute lines:
1032 1023 vba_code = filter_vba(vba_code)
1033 1024 print '-'*79
1034   - print 'Filename :', filename
  1025 + print 'Filename :', subfilename
1035 1026 print 'OLE stream :', stream_path
1036 1027 print 'VBA filename:', vba_filename
1037 1028 print '- '*39
... ... @@ -1088,4 +1079,41 @@ if __name__ == &#39;__main__&#39;:
1088 1079 raise
1089 1080 print sys.exc_value
1090 1081  
  1082 +
  1083 +#=== MAIN =====================================================================
  1084 +
  1085 +def main():
  1086 + """
  1087 + Main function, called when olevba is run from the command line
  1088 + """
  1089 + usage = 'usage: %prog [options] <filename> [filename2 ...]'
  1090 + parser = optparse.OptionParser(usage=usage)
  1091 + # parser.add_option('-o', '--outfile', dest='outfile',
  1092 + # help='output file')
  1093 + # parser.add_option('-c', '--csv', dest='csv',
  1094 + # help='export results to a CSV file')
  1095 + # parser.add_option("-r", action="store_true", dest="recursive",
  1096 + # help='find files recursively in subdirectories.')
  1097 + # parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
  1098 + # help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
  1099 + # parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
  1100 + # help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
  1101 +
  1102 + (options, args) = parser.parse_args()
  1103 +
  1104 + # Print help if no argurments are passed
  1105 + if len(args) == 0:
  1106 + print __doc__
  1107 + parser.print_help()
  1108 + sys.exit()
  1109 +
  1110 + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO)
  1111 +
  1112 + for filespec in args:
  1113 + #data = open(filespec, 'rb').read()
  1114 + process_file(filespec)
  1115 +
  1116 +if __name__ == '__main__':
  1117 + main()
  1118 +
1091 1119 # This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
1092 1120 \ No newline at end of file
... ...