Commit 45aec6e69c923f0190536fbbaa9e1e0e3be8e6ef

Authored by decalage2
2 parents d4df0c06 d8f80b9d

Merge branch 'pcode-options'

# Conflicts:
#	oletools/olevba.py
Showing 1 changed file with 123 additions and 79 deletions
oletools/olevba.py
... ... @@ -275,7 +275,7 @@ from io import BytesIO, StringIO
275 275 import math
276 276 import zipfile
277 277 import re
278   -import optparse
  278 +import argparse
279 279 import binascii
280 280 import base64
281 281 import zlib
... ... @@ -532,7 +532,7 @@ class UnexpectedDataError(OlevbaBaseException):
532 532 # return codes
533 533 RETURN_OK = 0
534 534 RETURN_WARNINGS = 1 # (reserved, not used yet)
535   -RETURN_WRONG_ARGS = 2 # (fixed, built into optparse)
  535 +RETURN_WRONG_ARGS = 2 # (fixed, built into argparse)
536 536 RETURN_FILE_NOT_FOUND = 3
537 537 RETURN_XGLOB_ERR = 4
538 538 RETURN_OPEN_ERROR = 5
... ... @@ -2626,7 +2626,8 @@ class VBA_Parser(object):
2626 2626 """
2627 2627 # TODO: relaxed is enabled by default temporarily, until a solution is found for issue #593
2628 2628  
2629   - def __init__(self, filename, data=None, container=None, relaxed=True, encoding=DEFAULT_API_ENCODING):
  2629 + def __init__(self, filename, data=None, container=None, relaxed=True, encoding=DEFAULT_API_ENCODING,
  2630 + disable_pcode=False):
2630 2631 """
2631 2632 Constructor for VBA_Parser
2632 2633  
... ... @@ -2687,10 +2688,11 @@ class VBA_Parser(object):
2687 2688 self.xlm_macros = []
2688 2689 self.no_xlm = False
2689 2690 #: Output from pcodedmp, disassembly of the VBA P-code
  2691 + self.disable_pcode = disable_pcode
2690 2692 self.pcodedmp_output = None
2691 2693 #: Flag set to True/False if VBA stomping detected
2692 2694 self.vba_stomping_detected = None
2693   -
  2695 +
2694 2696 # if filename is None:
2695 2697 # if isinstance(_file, basestring):
2696 2698 # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE:
... ... @@ -2800,9 +2802,7 @@ class VBA_Parser(object):
2800 2802 with z.open(subfile) as file_handle:
2801 2803 ole_data = file_handle.read()
2802 2804 try:
2803   - self.ole_subfiles.append(
2804   - VBA_Parser(filename=subfile, data=ole_data,
2805   - relaxed=self.relaxed))
  2805 + self.append_subfile(filename=subfile, data=ole_data)
2806 2806 except OlevbaBaseException as exc:
2807 2807 if self.relaxed:
2808 2808 log.info('%s is not a valid OLE file (%s)' % (subfile, exc))
... ... @@ -2851,9 +2851,7 @@ class VBA_Parser(object):
2851 2851 # TODO: handle different offsets => separate function
2852 2852 try:
2853 2853 ole_data = mso_file_extract(mso_data)
2854   - self.ole_subfiles.append(
2855   - VBA_Parser(filename=fname, data=ole_data,
2856   - relaxed=self.relaxed))
  2854 + self.append_subfile(filename=fname, data=ole_data)
2857 2855 except OlevbaBaseException as exc:
2858 2856 if self.relaxed:
2859 2857 log.info('Error parsing subfile {0}: {1}'
... ... @@ -2898,9 +2896,7 @@ class VBA_Parser(object):
2898 2896 for bindata in pkgpart.iterfind(TAG_PKGBINDATA):
2899 2897 try:
2900 2898 ole_data = binascii.a2b_base64(bindata.text)
2901   - self.ole_subfiles.append(
2902   - VBA_Parser(filename=fname, data=ole_data,
2903   - relaxed=self.relaxed))
  2899 + self.append_subfile(filename=fname, data=ole_data)
2904 2900 except OlevbaBaseException as exc:
2905 2901 if self.relaxed:
2906 2902 log.info('Error parsing subfile {0}: {1}'
... ... @@ -2979,9 +2975,7 @@ class VBA_Parser(object):
2979 2975  
2980 2976 # TODO: check if it is actually an OLE file
2981 2977 # TODO: get the MSO filename from content_location?
2982   - self.ole_subfiles.append(
2983   - VBA_Parser(filename=fname, data=ole_data,
2984   - relaxed=self.relaxed))
  2978 + self.append_subfile(filename=fname, data=ole_data)
2985 2979 except OlevbaBaseException as exc:
2986 2980 if self.relaxed:
2987 2981 log.info('%s does not contain a valid OLE file (%s)'
... ... @@ -3020,8 +3014,7 @@ class VBA_Parser(object):
3020 3014 try:
3021 3015 ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
3022 3016 for vba_data in ppt.iter_vba_data():
3023   - self.ole_subfiles.append(VBA_Parser(None, vba_data,
3024   - container='PptParser'))
  3017 + self.append_subfile(None, vba_data, container='PptParser')
3025 3018 log.info('File is PPT')
3026 3019 self.ole_file.close() # just in case
3027 3020 self.ole_file = None # required to make other methods look at ole_subfiles
... ... @@ -3083,6 +3076,14 @@ class VBA_Parser(object):
3083 3076 # set type only if parsing succeeds
3084 3077 self.type = TYPE_TEXT
3085 3078  
  3079 + def append_subfile(self, filename, data, container=None):
  3080 + """
  3081 + Create sub-parser for given subfile/data and append to subfiles.
  3082 + """
  3083 + self.ole_subfiles.append(VBA_Parser(filename, data, container,
  3084 + relaxed=self.relaxed,
  3085 + encoding=self.encoding,
  3086 + disable_pcode=self.disable_pcode))
3086 3087  
3087 3088 def find_vba_projects(self):
3088 3089 """
... ... @@ -3617,6 +3618,9 @@ class VBA_Parser(object):
3617 3618 self.pcodedmp_output = ''
3618 3619 return ''
3619 3620 # only run it once:
  3621 + if self.disable_pcode:
  3622 + self.pcodedmp_output = ''
  3623 + return ''
3620 3624 if self.pcodedmp_output is None:
3621 3625 log.debug('Calling pcodedmp to extract and disassemble the VBA P-code')
3622 3626 # import pcodedmp here to avoid circular imports:
... ... @@ -3873,7 +3877,7 @@ class VBA_Parser_CLI(VBA_Parser):
3873 3877 def process_file(self, show_decoded_strings=False,
3874 3878 display_code=True, hide_attributes=True,
3875 3879 vba_code_only=False, show_deobfuscated_code=False,
3876   - deobfuscate=False, pcode=False, no_xlm=False):
  3880 + deobfuscate=False, show_pcode=False, no_xlm=False):
3877 3881 """
3878 3882 Process a single file
3879 3883  
... ... @@ -3885,7 +3889,7 @@ class VBA_Parser_CLI(VBA_Parser):
3885 3889 otherwise each module is analyzed separately (old behaviour)
3886 3890 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
3887 3891 :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
3888   - :param pcode bool: if True, call pcodedmp to disassemble P-code and display it
  3892 + :param show_pcode bool: if True, call pcodedmp to disassemble P-code and display it
3889 3893 :param no_xlm bool: if True, don't use the BIFF plugin to extract old style XLM macros
3890 3894 """
3891 3895 #TODO: replace print by writing to a provided output file (sys.stdout by default)
... ... @@ -3957,7 +3961,7 @@ class VBA_Parser_CLI(VBA_Parser):
3957 3961 # display the exception with full stack trace for debugging
3958 3962 log.info('Error parsing form: %s' % exc)
3959 3963 log.debug('Traceback:', exc_info=True)
3960   - if pcode:
  3964 + if show_pcode:
3961 3965 print('-' * 79)
3962 3966 print('P-CODE disassembly:')
3963 3967 pcode = self.extract_pcode()
... ... @@ -3997,7 +4001,7 @@ class VBA_Parser_CLI(VBA_Parser):
3997 4001 def process_file_json(self, show_decoded_strings=False,
3998 4002 display_code=True, hide_attributes=True,
3999 4003 vba_code_only=False, show_deobfuscated_code=False,
4000   - deobfuscate=False, no_xlm=False):
  4004 + deobfuscate=False, show_pcode=False, no_xlm=False):
4001 4005 """
4002 4006 Process a single file
4003 4007  
... ... @@ -4010,7 +4014,9 @@ class VBA_Parser_CLI(VBA_Parser):
4010 4014 :param global_analysis: bool, if True all modules are merged for a single analysis (default),
4011 4015 otherwise each module is analyzed separately (old behaviour)
4012 4016 :param hide_attributes: bool, if True the first lines starting with "Attribute VB" are hidden (default)
  4017 + :param show_deobfuscated_code: bool, if True add deobfuscated code to result
4013 4018 :param deobfuscate: bool, if True attempt to deobfuscate VBA expressions (slow)
  4019 + :param show_pcode: bool, if True add extracted pcode to result
4014 4020 """
4015 4021 #TODO: fix conflicting parameters (?)
4016 4022  
... ... @@ -4029,6 +4035,7 @@ class VBA_Parser_CLI(VBA_Parser):
4029 4035 result['analysis'] = None
4030 4036 result['code_deobfuscated'] = None
4031 4037 result['do_deobfuscate'] = deobfuscate
  4038 + result['show_pcode'] = show_pcode
4032 4039  
4033 4040 try:
4034 4041 #TODO: handle olefile errors, when an OLE file is malformed
... ... @@ -4057,6 +4064,8 @@ class VBA_Parser_CLI(VBA_Parser):
4057 4064 deobfuscate)
4058 4065 if show_deobfuscated_code:
4059 4066 result['code_deobfuscated'] = self.reveal()
  4067 + if show_pcode:
  4068 + result['pcode'] = self.extract_pcode()
4060 4069 result['macros'] = macros
4061 4070 result['json_conversion_successful'] = True
4062 4071 except Exception as exc:
... ... @@ -4119,60 +4128,87 @@ def parse_args(cmd_line_args=None):
4119 4128 }
4120 4129  
4121 4130 usage = 'usage: olevba [options] <filename> [filename2 ...]'
4122   - parser = optparse.OptionParser(usage=usage)
4123   - # parser.add_option('-o', '--outfile', dest='outfile',
  4131 + parser = argparse.ArgumentParser(usage=usage)
  4132 + parser.add_argument('filenames', nargs='*', help='Files to analyze')
  4133 + # parser.add_argument('-o', '--outfile', dest='outfile',
4124 4134 # help='output file')
4125   - # parser.add_option('-c', '--csv', dest='csv',
  4135 + # parser.add_argument('-c', '--csv', dest='csv',
4126 4136 # help='export results to a CSV file')
4127   - parser.add_option("-r", action="store_true", dest="recursive",
4128   - help='find files recursively in subdirectories.')
4129   - parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
4130   - help='if the file is a zip archive, open all files from it, using the provided password.')
4131   - parser.add_option("-p", "--password", type='str', action='append',
4132   - default=[],
4133   - help='if encrypted office files are encountered, try '
4134   - 'decryption with this password. May be repeated.')
4135   - parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
4136   - help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
4137   - # output mode; could make this even simpler with add_option(type='choice') but that would make
4138   - # cmd line interface incompatible...
4139   - modes = optparse.OptionGroup(parser, title='Output mode (mutually exclusive)')
4140   - modes.add_option("-t", '--triage', action="store_const", dest="output_mode",
4141   - const='triage', default='unspecified',
4142   - help='triage mode, display results as a summary table (default for multiple files)')
4143   - modes.add_option("-d", '--detailed', action="store_const", dest="output_mode",
4144   - const='detailed', default='unspecified',
4145   - help='detailed mode, display full results (default for single file)')
4146   - modes.add_option("-j", '--json', action="store_const", dest="output_mode",
4147   - const='json', default='unspecified',
4148   - help='json mode, detailed in json format (never default)')
4149   - parser.add_option_group(modes)
4150   - parser.add_option("-a", '--analysis', action="store_false", dest="display_code", default=True,
4151   - help='display only analysis results, not the macro source code')
4152   - parser.add_option("-c", '--code', action="store_true", dest="vba_code_only", default=False,
4153   - help='display only VBA source code, do not analyze it')
4154   - parser.add_option("--decode", action="store_true", dest="show_decoded_strings",
4155   - help='display all the obfuscated strings with their decoded content (Hex, Base64, StrReverse, Dridex, VBA).')
4156   - parser.add_option("--attr", action="store_false", dest="hide_attributes", default=True,
4157   - help='display the attribute lines at the beginning of VBA source code')
4158   - parser.add_option("--reveal", action="store_true", dest="show_deobfuscated_code",
4159   - help='display the macro source code after replacing all the obfuscated strings by their decoded content.')
4160   - parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL,
4161   - help="logging level debug/info/warning/error/critical (default=%default)")
4162   - parser.add_option('--deobf', dest="deobfuscate", action="store_true", default=False,
4163   - help="Attempt to deobfuscate VBA expressions (slow)")
  4137 + parser.add_argument("-r", action="store_true", dest="recursive",
  4138 + help='find files recursively in subdirectories.')
  4139 + parser.add_argument("-z", "--zip", dest='zip_password', type=str,
  4140 + default=None,
  4141 + help='if the file is a zip archive, open all files '
  4142 + 'from it, using the provided password.')
  4143 + parser.add_argument("-p", "--password", type=str, action='append',
  4144 + default=[],
  4145 + help='if encrypted office files are encountered, try '
  4146 + 'decryption with this password. May be repeated.')
  4147 + parser.add_argument("-f", "--zipfname", dest='zip_fname', type=str,
  4148 + default='*',
  4149 + help='if the file is a zip archive, file(s) to be '
  4150 + 'opened within the zip. Wildcards * and ? are '
  4151 + 'supported. (default: %(default)s)')
  4152 + modes = parser.add_argument_group(title='Output mode (mutually exclusive)')
  4153 + modes.add_argument("-t", '--triage', action="store_const",
  4154 + dest="output_mode", const='triage',
  4155 + default='unspecified',
  4156 + help='triage mode, display results as a summary table '
  4157 + '(default for multiple files)')
  4158 + modes.add_argument("-d", '--detailed', action="store_const",
  4159 + dest="output_mode", const='detailed',
  4160 + default='unspecified',
  4161 + help='detailed mode, display full results (default for '
  4162 + 'single file)')
  4163 + modes.add_argument("-j", '--json', action="store_const",
  4164 + dest="output_mode", const='json', default='unspecified',
  4165 + help='json mode, detailed in json format '
  4166 + '(never default)')
  4167 + parser.add_argument("-a", '--analysis', action="store_false",
  4168 + dest="display_code", default=True,
  4169 + help='display only analysis results, not the macro '
  4170 + 'source code')
  4171 + parser.add_argument("-c", '--code', action="store_true",
  4172 + dest="vba_code_only", default=False,
  4173 + help='display only VBA source code, do not analyze it')
  4174 + parser.add_argument("--decode", action="store_true",
  4175 + dest="show_decoded_strings",
  4176 + help='display all the obfuscated strings with their '
  4177 + 'decoded content (Hex, Base64, StrReverse, '
  4178 + 'Dridex, VBA).')
  4179 + parser.add_argument("--attr", action="store_false", dest="hide_attributes",
  4180 + default=True,
  4181 + help='display the attribute lines at the beginning of '
  4182 + 'VBA source code')
  4183 + parser.add_argument("--reveal", action="store_true",
  4184 + dest="show_deobfuscated_code",
  4185 + help='display the macro source code after replacing '
  4186 + 'all the obfuscated strings by their decoded '
  4187 + 'content.')
  4188 + parser.add_argument('-l', '--loglevel', dest="loglevel", action="store",
  4189 + default=DEFAULT_LOG_LEVEL,
  4190 + help='logging level debug/info/warning/error/critical '
  4191 + '(default=%(default)s)')
  4192 + parser.add_argument('--deobf', dest="deobfuscate", action="store_true",
  4193 + default=False,
  4194 + help="Attempt to deobfuscate VBA expressions (slow)")
4164 4195 # TODO: --relaxed is enabled temporarily until a solution to issue #593 is found
4165   - parser.add_option('--relaxed', dest="relaxed", action="store_true", default=True,
4166   - help="Do not raise errors if opening of substream fails")
4167   - parser.add_option('--pcode', dest="pcode", action="store_true", default=False,
4168   - help="Disassemble and display the P-code (using pcodedmp)")
4169   - parser.add_option('--no-xlm', dest="no_xlm", action="store_true", default=False,
  4196 + parser.add_argument('--relaxed', dest="relaxed", action="store_true",
  4197 + default=True,
  4198 + help='Do not raise errors if opening of substream '
  4199 + 'fails')
  4200 + parser.add_argument('--show-pcode', dest="show_pcode", action="store_true",
  4201 + default=False,
  4202 + help="Show disassembled P-code (using pcodedmp)")
  4203 + parser.add_argument('--no-pcode', action='store_true',
  4204 + help='Disable extraction and analysis of pcode')
  4205 + parser.add_argument('--no-xlm', dest="no_xlm", action="store_true", default=False,
4170 4206 help="Do not extract XLM Excel macros. This may speed up analysis of large files.")
4171 4207  
4172   - (options, args) = parser.parse_args(cmd_line_args)
  4208 + options = parser.parse_args(cmd_line_args)
4173 4209  
4174 4210 # Print help if no arguments are passed
4175   - if len(args) == 0:
  4211 + if len(options.filenames) == 0:
4176 4212 # print banner with version
4177 4213 python_version = '%d.%d.%d' % sys.version_info[0:3]
4178 4214 print('olevba %s on Python %s - http://decalage.info/python/oletools' %
... ... @@ -4181,9 +4217,12 @@ def parse_args(cmd_line_args=None):
4181 4217 parser.print_help()
4182 4218 sys.exit(RETURN_WRONG_ARGS)
4183 4219  
  4220 + if options.show_pcode and options.no_pcode:
  4221 + parser.error('You cannot combine options --no-pcode and --show-pcode')
  4222 +
4184 4223 options.loglevel = LOG_LEVELS[options.loglevel]
4185 4224  
4186   - return options, args
  4225 + return options
4187 4226  
4188 4227  
4189 4228 def process_file(filename, data, container, options, crypto_nesting=0):
... ... @@ -4197,7 +4236,8 @@ def process_file(filename, data, container, options, crypto_nesting=0):
4197 4236 try:
4198 4237 # Open the file
4199 4238 vba_parser = VBA_Parser_CLI(filename, data=data, container=container,
4200   - relaxed=options.relaxed)
  4239 + relaxed=options.relaxed,
  4240 + disable_pcode=options.no_pcode)
4201 4241  
4202 4242 if options.output_mode == 'detailed':
4203 4243 # fully detailed output
... ... @@ -4205,7 +4245,8 @@ def process_file(filename, data, container, options, crypto_nesting=0):
4205 4245 display_code=options.display_code,
4206 4246 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
4207 4247 show_deobfuscated_code=options.show_deobfuscated_code,
4208   - deobfuscate=options.deobfuscate, pcode=options.pcode, no_xlm=options.no_xlm)
  4248 + deobfuscate=options.deobfuscate, show_pcode=options.show_pcode,
  4249 + no_xlm=options.no_xlm)
4209 4250 elif options.output_mode == 'triage':
4210 4251 # summarized output for triage:
4211 4252 vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
... ... @@ -4216,7 +4257,8 @@ def process_file(filename, data, container, options, crypto_nesting=0):
4216 4257 display_code=options.display_code,
4217 4258 hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
4218 4259 show_deobfuscated_code=options.show_deobfuscated_code,
4219   - deobfuscate=options.deobfuscate, no_xlm=options.no_xlm))
  4260 + deobfuscate=options.deobfuscate, show_pcode=options.show_pcode,
  4261 + no_xlm=options.no_xlm))
4220 4262 else: # (should be impossible)
4221 4263 raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
4222 4264  
... ... @@ -4280,8 +4322,6 @@ def process_file(filename, data, container, options, crypto_nesting=0):
4280 4322 log.info('Working on decrypted file')
4281 4323 return process_file(decrypted_file, data, container or filename,
4282 4324 options, crypto_nesting+1)
4283   - except Exception:
4284   - raise
4285 4325 finally: # clean up
4286 4326 try:
4287 4327 log.debug('Removing crypt temp file {}'.format(decrypted_file))
... ... @@ -4300,7 +4340,7 @@ def main(cmd_line_args=None):
4300 4340 in process_args. Per default (cmd_line_args=None), sys.argv is used. Option
4301 4341 mainly added for unit-testing
4302 4342 """
4303   - options, args = parse_args(cmd_line_args)
  4343 + options = parse_args(cmd_line_args)
4304 4344  
4305 4345 # provide info about tool and its version
4306 4346 if options.output_mode == 'json':
... ... @@ -4322,14 +4362,12 @@ def main(cmd_line_args=None):
4322 4362 if options.show_deobfuscated_code and not options.deobfuscate:
4323 4363 log.debug('set --deobf because --reveal was set')
4324 4364 options.deobfuscate = True
4325   - if options.output_mode == 'triage' and options.show_deobfuscated_code:
4326   - log.debug('ignoring option --reveal in triage output mode')
4327 4365  
4328 4366 # gather info on all files that must be processed
4329 4367 # ignore directory names stored in zip files:
4330 4368 all_input_info = tuple((container, filename, data) for
4331 4369 container, filename, data in xglob.iter_files(
4332   - args, recursive=options.recursive,
  4370 + options.filenames, recursive=options.recursive,
4333 4371 zip_password=options.zip_password,
4334 4372 zip_fname=options.zip_fname)
4335 4373 if not (container and filename.endswith('/')))
... ... @@ -4341,6 +4379,12 @@ def main(cmd_line_args=None):
4341 4379 else:
4342 4380 options.output_mode = 'triage'
4343 4381  
  4382 + if options.output_mode == 'triage':
  4383 + if options.show_deobfuscated_code:
  4384 + log.debug('ignoring option --reveal in triage output mode')
  4385 + if options.show_pcode:
  4386 + log.debug('ignoring option --show-pcode in triage output mode')
  4387 +
4344 4388 # Column headers for triage mode
4345 4389 if options.output_mode == 'triage':
4346 4390 print('%-12s %-65s' % ('Flags', 'Filename'))
... ...