Commit d993998ada1eadd166f3408a0f6a516adb87bd06

Authored by Christian Herdtweck
1 parent 1f04b96d

olevba: move processing & error handling from main to separate function

Code is almost functionally identical.
Only difference is that iteration of xglob is saved in a tuple to
determine beforehand whether there is only one single file or several.
This allows choosing the output if handling a single file with unspecified
output mode and thus greatly simplifies handling of vba_parser
Showing 1 changed file with 116 additions and 102 deletions
oletools/olevba.py
@@ -3787,6 +3787,83 @@ def parse_args(cmd_line_args=None): @@ -3787,6 +3787,83 @@ def parse_args(cmd_line_args=None):
3787 return options, args 3787 return options, args
3788 3788
3789 3789
  3790 +def process_file(filename, data, container, options):
  3791 + """
  3792 + Part of main function that processes a single file.
  3793 +
  3794 + This handles exceptions and encryption.
  3795 +
  3796 + Returns a single code summarizing the status of processing of this file
  3797 + """
  3798 + try:
  3799 + # Open the file
  3800 + vba_parser = VBA_Parser_CLI(filename, data=data, container=container,
  3801 + relaxed=options.relaxed)
  3802 +
  3803 + if options.output_mode == 'detailed':
  3804 + # fully detailed output
  3805 + vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,
  3806 + display_code=options.display_code,
  3807 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3808 + show_deobfuscated_code=options.show_deobfuscated_code,
  3809 + deobfuscate=options.deobfuscate)
  3810 + elif options.output_mode == 'triage':
  3811 + # summarized output for triage:
  3812 + vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,
  3813 + deobfuscate=options.deobfuscate)
  3814 + elif options.output_mode == 'json':
  3815 + print_json(
  3816 + vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,
  3817 + display_code=options.display_code,
  3818 + hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,
  3819 + show_deobfuscated_code=options.show_deobfuscated_code,
  3820 + deobfuscate=options.deobfuscate))
  3821 + else: # (should be impossible)
  3822 + raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))
  3823 +
  3824 + except (SubstreamOpenError, UnexpectedDataError) as exc:
  3825 + if options.output_mode == 'triage':
  3826 + print('%-12s %s - Error opening substream or uenxpected ' \
  3827 + 'content' % ('?', filename))
  3828 + elif options.output_mode == 'json':
  3829 + print_json(file=filename, type='error',
  3830 + error=type(exc).__name__, message=str(exc))
  3831 + else:
  3832 + log.exception('Error opening substream or unexpected '
  3833 + 'content in %s' % filename)
  3834 + return RETURN_OPEN_ERROR
  3835 + except FileOpenError as exc:
  3836 + if options.output_mode == 'triage':
  3837 + print('%-12s %s - File format not supported' % ('?', filename))
  3838 + elif options.output_mode == 'json':
  3839 + print_json(file=filename, type='error',
  3840 + error=type(exc).__name__, message=str(exc))
  3841 + else:
  3842 + log.exception('Failed to open %s -- probably not supported!' % filename)
  3843 + return RETURN_OPEN_ERROR
  3844 + except ProcessingError as exc:
  3845 + if options.output_mode == 'triage':
  3846 + print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))
  3847 + elif options.output_mode == 'json':
  3848 + print_json(file=filename, type='error',
  3849 + error=type(exc).__name__,
  3850 + message=str(exc.orig_exc))
  3851 + else:
  3852 + log.exception('Error processing file %s (%s)!'
  3853 + % (filename, exc.orig_exc))
  3854 + return RETURN_PARSE_ERROR
  3855 + except FileIsEncryptedError as exc:
  3856 + if options.output_mode == 'triage':
  3857 + print('%-12s %s - File is encrypted' % ('!ERROR', filename))
  3858 + elif options.output_mode == 'json':
  3859 + print_json(file=filename, type='error',
  3860 + error=type(exc).__name__, message=str(exc))
  3861 + else:
  3862 + log.exception('File %s is encrypted!' % (filename))
  3863 + return RETURN_ENCRYPTED
  3864 + return RETURN_OK
  3865 +
  3866 +
3790 def main(cmd_line_args=None): 3867 def main(cmd_line_args=None):
3791 """ 3868 """
3792 Main function, called when olevba is run from the command line 3869 Main function, called when olevba is run from the command line
@@ -3821,35 +3898,44 @@ def main(cmd_line_args=None): @@ -3821,35 +3898,44 @@ def main(cmd_line_args=None):
3821 if options.output_mode == 'triage' and options.show_deobfuscated_code: 3898 if options.output_mode == 'triage' and options.show_deobfuscated_code:
3822 log.info('ignoring option --reveal in triage output mode') 3899 log.info('ignoring option --reveal in triage output mode')
3823 3900
3824 - # Column headers (do not know how many files there will be yet, so if no output_mode  
3825 - # was specified, we will print triage for first file --> need these headers)  
3826 - if options.output_mode in ('triage', 'unspecified'): 3901 + # gather info on all files that must be processed
  3902 + # ignore directory names stored in zip files:
  3903 + all_input_info = tuple((container, filename, data) for
  3904 + container, filename, data in xglob.iter_files(
  3905 + args, recursive=options.recursive,
  3906 + zip_password=options.zip_password,
  3907 + zip_fname=options.zip_fname)
  3908 + if not (container and filename.endswith('/')))
  3909 +
  3910 + # specify output mode if options -t, -d and -j were not specified
  3911 + if options.output_mode == 'unspecified':
  3912 + if len(all_input_info) == 1:
  3913 + options.output_mode = 'detailed'
  3914 + else:
  3915 + options.output_mode = 'triage'
  3916 +
  3917 + # Column headers for triage mode
  3918 + if options.output_mode == 'triage':
3827 print('%-12s %-65s' % ('Flags', 'Filename')) 3919 print('%-12s %-65s' % ('Flags', 'Filename'))
3828 print('%-12s %-65s' % ('-' * 11, '-' * 65)) 3920 print('%-12s %-65s' % ('-' * 11, '-' * 65))
3829 3921
3830 previous_container = None 3922 previous_container = None
3831 count = 0 3923 count = 0
3832 container = filename = data = None 3924 container = filename = data = None
3833 - vba_parser = None  
3834 return_code = RETURN_OK 3925 return_code = RETURN_OK
3835 try: 3926 try:
3836 - for container, filename, data in xglob.iter_files(args, recursive=options.recursive,  
3837 - zip_password=options.zip_password, zip_fname=options.zip_fname):  
3838 - # ignore directory names stored in zip files:  
3839 - if container and filename.endswith('/'):  
3840 - continue  
3841 - 3927 + for container, filename, data in all_input_info:
3842 # handle errors from xglob 3928 # handle errors from xglob
3843 if isinstance(data, Exception): 3929 if isinstance(data, Exception):
3844 if isinstance(data, PathNotFoundException): 3930 if isinstance(data, PathNotFoundException):
3845 - if options.output_mode in ('triage', 'unspecified'): 3931 + if options.output_mode == 'triage':
3846 print('%-12s %s - File not found' % ('?', filename)) 3932 print('%-12s %s - File not found' % ('?', filename))
3847 elif options.output_mode != 'json': 3933 elif options.output_mode != 'json':
3848 log.error('Given path %r does not exist!' % filename) 3934 log.error('Given path %r does not exist!' % filename)
3849 return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \ 3935 return_code = RETURN_FILE_NOT_FOUND if return_code == 0 \
3850 else RETURN_SEVERAL_ERRS 3936 else RETURN_SEVERAL_ERRS
3851 else: 3937 else:
3852 - if options.output_mode in ('triage', 'unspecified'): 3938 + if options.output_mode == 'triage':
3853 print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container)) 3939 print('%-12s %s - Failed to read from zip file %s' % ('?', filename, container))
3854 elif options.output_mode != 'json': 3940 elif options.output_mode != 'json':
3855 log.error('Exception opening/reading %r from zip file %r: %s' 3941 log.error('Exception opening/reading %r from zip file %r: %s'
@@ -3861,102 +3947,30 @@ def main(cmd_line_args=None): @@ -3861,102 +3947,30 @@ def main(cmd_line_args=None):
3861 error=type(data).__name__, message=str(data)) 3947 error=type(data).__name__, message=str(data))
3862 continue 3948 continue
3863 3949
3864 - try:  
3865 - # close the previous file if analyzing several:  
3866 - # (this must be done here to avoid closing the file if there is only 1,  
3867 - # to fix issue #219)  
3868 - if vba_parser is not None:  
3869 - vba_parser.close()  
3870 - # Open the file  
3871 - vba_parser = VBA_Parser_CLI(filename, data=data, container=container,  
3872 - relaxed=options.relaxed)  
3873 -  
3874 - if options.output_mode == 'detailed':  
3875 - # fully detailed output  
3876 - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,  
3877 - display_code=options.display_code,  
3878 - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,  
3879 - show_deobfuscated_code=options.show_deobfuscated_code,  
3880 - deobfuscate=options.deobfuscate)  
3881 - elif options.output_mode in ('triage', 'unspecified'):  
3882 - # print container name when it changes:  
3883 - if container != previous_container:  
3884 - if container is not None:  
3885 - print('\nFiles in %s:' % container)  
3886 - previous_container = container  
3887 - # summarized output for triage:  
3888 - vba_parser.process_file_triage(show_decoded_strings=options.show_decoded_strings,  
3889 - deobfuscate=options.deobfuscate)  
3890 - elif options.output_mode == 'json':  
3891 - print_json(  
3892 - vba_parser.process_file_json(show_decoded_strings=options.show_decoded_strings,  
3893 - display_code=options.display_code,  
3894 - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,  
3895 - show_deobfuscated_code=options.show_deobfuscated_code,  
3896 - deobfuscate=options.deobfuscate))  
3897 - else: # (should be impossible)  
3898 - raise ValueError('unexpected output mode: "{0}"!'.format(options.output_mode))  
3899 - count += 1  
3900 -  
3901 - except (SubstreamOpenError, UnexpectedDataError) as exc:  
3902 - if options.output_mode in ('triage', 'unspecified'):  
3903 - print('%-12s %s - Error opening substream or uenxpected ' \  
3904 - 'content' % ('?', filename))  
3905 - elif options.output_mode == 'json':  
3906 - print_json(file=filename, type='error',  
3907 - error=type(exc).__name__, message=str(exc))  
3908 - else:  
3909 - log.exception('Error opening substream or unexpected '  
3910 - 'content in %s' % filename)  
3911 - return_code = RETURN_OPEN_ERROR if return_code == 0 \  
3912 - else RETURN_SEVERAL_ERRS  
3913 - except FileOpenError as exc:  
3914 - if options.output_mode in ('triage', 'unspecified'):  
3915 - print('%-12s %s - File format not supported' % ('?', filename))  
3916 - elif options.output_mode == 'json':  
3917 - print_json(file=filename, type='error',  
3918 - error=type(exc).__name__, message=str(exc))  
3919 - else:  
3920 - log.exception('Failed to open %s -- probably not supported!' % filename)  
3921 - return_code = RETURN_OPEN_ERROR if return_code == 0 \  
3922 - else RETURN_SEVERAL_ERRS  
3923 - except ProcessingError as exc:  
3924 - if options.output_mode in ('triage', 'unspecified'):  
3925 - print('%-12s %s - %s' % ('!ERROR', filename, exc.orig_exc))  
3926 - elif options.output_mode == 'json':  
3927 - print_json(file=filename, type='error',  
3928 - error=type(exc).__name__,  
3929 - message=str(exc.orig_exc))  
3930 - else:  
3931 - log.exception('Error processing file %s (%s)!'  
3932 - % (filename, exc.orig_exc))  
3933 - return_code = RETURN_PARSE_ERROR if return_code == 0 \  
3934 - else RETURN_SEVERAL_ERRS  
3935 - except FileIsEncryptedError as exc:  
3936 - if options.output_mode in ('triage', 'unspecified'):  
3937 - print('%-12s %s - File is encrypted' % ('!ERROR', filename))  
3938 - elif options.output_mode == 'json':  
3939 - print_json(file=filename, type='error',  
3940 - error=type(exc).__name__, message=str(exc))  
3941 - else:  
3942 - log.exception('File %s is encrypted!' % (filename))  
3943 - return_code = RETURN_ENCRYPTED if return_code == 0 \  
3944 - else RETURN_SEVERAL_ERRS  
3945 - # Here we do not close the vba_parser, because process_file may need it below. 3950 + if options.output_mode == 'triage':
  3951 + # print container name when it changes:
  3952 + if container != previous_container:
  3953 + if container is not None:
  3954 + print('\nFiles in %s:' % container)
  3955 + previous_container = container
  3956 +
  3957 + # process the file, handling errors and encryption
  3958 + curr_return_code = process_file(filename, data, container, options)
  3959 + count += 1
  3960 +
  3961 + # adjust overall return code
  3962 + if curr_return_code == RETURN_OK:
  3963 + continue # do not modify overall return code
  3964 + if return_code == RETURN_OK:
  3965 + return_code = curr_return_code # first error return code
  3966 + else:
  3967 + return_code = RETURN_SEVERAL_ERRS # several errors
3946 3968
3947 if options.output_mode == 'triage': 3969 if options.output_mode == 'triage':
3948 print('\n(Flags: OpX=OpenXML, XML=Word2003XML, FlX=FlatOPC XML, MHT=MHTML, TXT=Text, M=Macros, ' \ 3970 print('\n(Flags: OpX=OpenXML, XML=Word2003XML, FlX=FlatOPC XML, MHT=MHTML, TXT=Text, M=Macros, ' \
3949 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \ 3971 'A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex strings, ' \
3950 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n') 3972 'B=Base64 strings, D=Dridex strings, V=VBA strings, ?=Unknown)\n')
3951 3973
3952 - if count == 1 and options.output_mode == 'unspecified':  
3953 - # if options -t, -d and -j were not specified and it's a single file, print details:  
3954 - vba_parser.process_file(show_decoded_strings=options.show_decoded_strings,  
3955 - display_code=options.display_code,  
3956 - hide_attributes=options.hide_attributes, vba_code_only=options.vba_code_only,  
3957 - show_deobfuscated_code=options.show_deobfuscated_code,  
3958 - deobfuscate=options.deobfuscate)  
3959 -  
3960 if options.output_mode == 'json': 3974 if options.output_mode == 'json':
3961 # print last json entry (a last one without a comma) and closing ] 3975 # print last json entry (a last one without a comma) and closing ]
3962 print_json(type='MetaInformation', return_code=return_code, 3976 print_json(type='MetaInformation', return_code=return_code,